Source code for egc.model.graph_clustering.disjoint.vgae_kmeans

"""
vgae_kmeans
"""
import scipy.sparse as sp
import torch

from ....utils import sk_clustering
from ...node_embedding.vgae import DGL_VGAE
from ...node_embedding.vgae import VGAE
from ..base import Base


[docs]class DGL_VGAEKmeans(Base): """VGAE Kmeans implement using dgl Args: epochs (int, optional): number of embedding training epochs. Defaults to 200. n_clusters (int): cluster num. fead_dim (int): dim of features n_nodes (int): number of nodes hidden_dim1 (int): hidden units size of gcn_1. Defaults to 32. hidden_dim2 (int): hidden units size of gcn_2. Defaults to 16. dropout (int, optional): Dropout rate (1 - keep probability). lr (float, optional): learning rate.. Defaults to 0.001. early_stop (int, optional): early stopping threshold. Defaults to 10. activation (str, optional): activation of gcn layer_1. Defaults to 'relu'. """ def __init__( self, epochs: int, n_clusters: int, fead_dim: int, n_nodes: int, hidden_dim1: int = 32, hidden_dim2: int = 16, dropout: float = 0.0, lr: float = 0.01, early_stop: int = 10, activation: str = "relu", ) -> None: super().__init__() self.n_clusters = n_clusters # device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') self.model = DGL_VGAE( epochs, n_clusters, fead_dim, n_nodes, hidden_dim1, hidden_dim2, dropout, lr, early_stop, activation, )
[docs] def fit(self, adj_csr, features): """Fit for Specific Graph Args: adj_csr (sp.lil_matrix): 2D sparse features. features (torch.Tensor): node's features """ self.model.fit(adj_csr, features)
[docs] def get_embedding(self): return self.model.get_embedding()
[docs] def get_memberships(self): return sk_clustering(self.get_embedding().cpu(), self.n_clusters, name="kmeans")
[docs]class VGAEKmeans(Base): """VGAE Kmeans Args: in_features (int): input feature dimension. hidden_units_1 (int, optional): gcn_1 hidden units. Defaults to 32. hidden_units_2 (int, optional): gcn_2 hidden units. Defaults to 16. n_epochs (int, optional): node embedding epochs. Defaults to 400. early_stopping_epoch (int, optional): early stopping epoch number. Defaults to 20. lr (float, optional): learning rate. Defaults to 0.001. l2_coef (float, optional): l2 weight decay. Defaults to 0.0. activation (str, optional): activation of gcn layer. Defaults to 'relu'. model_filename: str = 'vgae_kmeans', """ def __init__( self, in_features: int, hidden_units_1: int = 32, hidden_units_2: int = 16, n_epochs: int = 400, early_stopping_epoch: int = 20, lr: float = 0.001, l2_coef: float = 0.0, activation: str = "relu", model_filename: str = "vgae_kmeans", ) -> None: super().__init__() self.n_clusters = None self.model = VGAE( in_features, hidden_units_1, hidden_units_2, n_epochs, early_stopping_epoch, lr, l2_coef, activation, model_filename, )
[docs] def fit(self, features_lil: sp.lil_matrix, adj_csr: sp.csr_matrix, n_clusters: int): """Fit for Specific Graph Args: features (sp.lil_matrix): 2D sparse features. adj_orig (sp.csr_matrix): 2D sparse adj. n_clusters (int): cluster num. neg_list_num (int, optional): negative sample times. Defaults to 5. """ self.n_clusters = n_clusters self.model.fit(features_lil, adj_csr)
[docs] def get_embedding(self): return self.model.get_embedding()
[docs] def get_memberships(self): return sk_clustering(torch.squeeze(self.get_embedding(), 0).cpu(), self.n_clusters, name="kmeans")