Source code for egc.module.pretrain.communitygan.kmeans_pretrain

"""Embdding Pretrain using Kmeans
"""
import numpy as np
import scipy.sparse as sp
import torch

from ....model.graph_clustering.disjoint.vgae_kmeans import VGAEKmeans
from ....utils.evaluation import evaluation


[docs]def kmeans_pretrain(
    features_lil: torch.Tensor,
    adj_csr: torch.Tensor,
    n_clusters: int,
    label: np.ndarray,
) -> sp.csr_matrix:
    """kmeans pretraining

    Args:
        features_lil (torch.Tensor): features.
        adj_csr (torch.Tensor): adj.
        n_clusters (int): num of clusters.
        label (np.ndarray): labels.

    Returns:
        sp.csr_matrix: one hot cluster embbeding for nodes.
    """
    _model = VGAEKmeans(
        in_features=features_lil.shape[1],
        hidden_units_1=128,
        hidden_units_2=64,
        lr=0.01,
        early_stopping_epoch=20,
        n_epochs=400,
    )
    _model.fit(features_lil, adj_csr, n_clusters)
    _label = _model.get_memberships()
    row = []
    col = []
    data = []
    for i in range(n_clusters):
        line = np.nonzero(_label == i)[0].tolist()
        col.extend(line)
        row.extend([i] * len(line))
        data.extend([1] * len(line))
    emb = (sp.csr_matrix((data, (row, col)),
                         shape=(n_clusters, max(col) + 1),
                         dtype=np.int32).todense().T)
    (
        ARI_score,
        NMI_score,
        AMI_score,
        ACC_score,
        Micro_F1_score,
        Macro_F1_score,
        purity,
    ) = evaluation(label, _label)
    print("\n"
          f"ARI:{ARI_score}\n"
          f"NMI:{ NMI_score}\n"
          f"AMI:{ AMI_score}\n"
          f"ACC:{ACC_score}\n"
          f"Micro F1:{Micro_F1_score}\n"
          f"Macro F1:{Macro_F1_score}\n"
          f"purity: {purity}\n")

    return emb