Source code for egc.utils.evaluation

"""
Author: Zhou Sheng
Evaluation Metric for Graph Clustering
ACC, NMI, ARI, F1 Score
"""
from typing import Tuple

import numpy as np
import torch
from scipy.optimize import linear_sum_assignment as linear_assignment
from sklearn.metrics import accuracy_score as ACC
from sklearn.metrics import adjusted_mutual_info_score as AMI
from sklearn.metrics import adjusted_rand_score as ARI
from sklearn.metrics import f1_score as F1
from sklearn.metrics import normalized_mutual_info_score as NMI
from sklearn.metrics.cluster import contingency_matrix as ctg


[docs]def purity(y_true, y_pred):
    # compute contingency matrix (also called confusion matrix)
    contingency_matrix = ctg(y_true, y_pred)
    # return purity
    return np.sum(np.amax(contingency_matrix,
                          axis=0)) / np.sum(contingency_matrix)


[docs]def best_mapping(
    labels_true: list or np.array,
    labels_pred: list or np.array,
) -> Tuple[np.array, np.array]:
    """Get best mapping between labels_true and labels_pred.

    Args:
        labels_true (list or np.array): gnd labels.
        labels_pred (list or np.array): pred labels.

    Raises:
        ValueError: Labels must be in numpy format!

    Returns:
        Tuple[np.array,np.array]: best mapping.
    """
    if torch.is_tensor(labels_true) or torch.is_tensor(labels_pred):
        raise ValueError("Labels must be in numpy format!")
    D = max(labels_true.max(), labels_pred.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    # pylint: disable=consider-using-enumerate
    for i in range(len(labels_pred)):
        w[labels_pred[i], labels_true[i]] += 1
    mapping = linear_assignment(w.max() - w)
    old_pred, new_pred = mapping
    label_map = dict(zip(old_pred, new_pred))
    labels_pred = [label_map[x] for x in labels_pred]
    return labels_true, labels_pred


[docs]def evaluation(
    labels_true: torch.Tensor or np.ndarray,
    labels_pred: torch.Tensor or np.ndarray,
) -> Tuple[float]:
    """Clustering evaluation.

    Args:
        labels_true (torch.Tensor or np.ndarray): Ground Truth Community.
        labels_pred (torch.Tensor or np.ndarray): Predicted Community.

    Returns:
        Tuple[float]: (ARI, NMI, AMI, ACC, Micro-F1, Macro-F1, purity)
    """
    if torch.is_tensor(labels_true):
        labels_true = labels_true.numpy().reshape(-1)
    if torch.is_tensor(labels_pred):
        labels_pred = labels_pred.numpy().reshape(-1)
    labels_true, labels_pred = best_mapping(labels_true, labels_pred)
    ARI_score = ARI(labels_true, labels_pred)
    NMI_score = NMI(labels_true, labels_pred)
    AMI_score = AMI(labels_true, labels_pred)
    ACC_score = ACC(labels_true, labels_pred)
    Micro_F1_score = F1(labels_true, labels_pred, average="micro")
    Macro_F1_score = F1(labels_true, labels_pred, average="macro")
    purity_score = purity(labels_true, labels_pred)
    return ARI_score, NMI_score, AMI_score, ACC_score, Micro_F1_score, Macro_F1_score, purity_score


# if __name__=='__main__':
#     A=torch.randint(5,(1,100)).numpy()[0]
#     B=torch.randint(5,(1,100)).numpy()[0]
#     print(evaluation(A,B))