Source code for egc.model.node_embedding.gae

"""
GAE embedding
"""
import copy

import dgl
import numpy as np
import scipy.sparse as sp
import torch
import torch.nn.functional as F
from dgl.nn.pytorch.conv import GraphConv
from torch import nn
from torch import optim

from ...module.layers import InnerProductDecoder


[docs]def bce_loss(preds, labels, norm, pos_weight): cost = norm * F.binary_cross_entropy_with_logits( preds, labels, pos_weight=pos_weight, ) return cost
[docs]class DGL_GAE(nn.Module): """An implementation of "GAE" Args: epochs (int, optional): number of embedding training epochs. Defaults to 200. n_clusters (int): cluster num. fead_dim (int): dim of features n_nodes (int): number of nodes hidden_dim1 (int): hidden units size of gcn_1. Defaults to 32. dropout (int, optional): Dropout rate (1 - keep probability). lr (float, optional): learning rate.. Defaults to 0.001. early_stop (int, optional): early stopping threshold. Defaults to 10. activation (str, optional): activation of gcn layer_1. Defaults to 'relu'. """ def __init__( self, epochs: int, n_clusters: int, fead_dim: int, n_nodes: int, hidden_dim1: int = 32, dropout: float = 0.0, lr: float = 0.01, early_stop: int = 10, activation: str = "relu", ): super().__init__() # ---------------Parameters--------------- self.epochs = epochs self.n_clusters = n_clusters self.n_nodes = n_nodes self.lr = lr self.estop_steps = early_stop if activation == "prelu": self.activation = nn.PReLU() elif activation == "relu": self.activation = nn.ReLU() else: self.activation = activation self.best_model = None self.features = None self.adj_orig_graph = None self.norm = None self.pos_weight = None self.device = None # ----------------Layers--------------- self.gconv1 = GraphConv(fead_dim, hidden_dim1) self.dc = InnerProductDecoder(dropout) # now = time.strftime("%Y%m%d_%H%M%S", time.localtime()) # model_name = f'gae_{now}' # print(model_name) # self.writer = SummaryWriter(f'logs/{model_name}')
[docs] def Encode(self, graph, features): """Encoder for GAE Args: graph (dgl.DGLGraph): Graph data in dgl features (torch.Tensor): node's features Returns: h1 (torch.Tensor):Latent embedding of GAE """ h1 = self.gconv1(graph, features) return h1
[docs] def Decode(self, z): """Decoder for GAE Args: features (torch.Tensor): node's features Returns: h1 (torch.Tensor):Latent embedding of GAE """ return self.dc(z)
[docs] def forward(self): """Forward Propagation Returns: Graph_Reconstruction (torch.Tensor):Reconstructed adj matrix Latent_Representation (torch.Tensor):Latent embedding of GAE """ Latent_Representation = self.Encode(self.adj_orig_graph, self.features) Graph_Reconstruction = self.Decode(Latent_Representation) return Graph_Reconstruction, Latent_Representation
# pylint: disable=too-many-locals
[docs] def fit( self, adj_csr: sp.csr_matrix, features: torch.Tensor, device: torch.device = torch.device("cpu"), ) -> None: """Fitting a GAE model Args: adj_csr (sp.lil_matrix): 2D sparse features. features (torch.Tensor): node's features. device (torch.device, optional): torch device. Defaults to torch.device('cpu'). """ self.device = device # ------------------Data-------------- self.features = features # remove diagonal entries adj_orig = adj_csr - sp.dia_matrix( (adj_csr.diagonal()[np.newaxis, :], [0]), shape=adj_csr.shape) adj_orig.eliminate_zeros() adj_orig = adj_orig + sp.eye(adj_orig.shape[0]) self.adj_orig_graph = dgl.from_scipy(adj_orig) self.pos_weight = float(adj_csr.shape[0] * adj_csr.shape[0] - adj_csr.sum()) / adj_csr.sum() self.norm = (adj_csr.shape[0] * adj_csr.shape[0] / float( (adj_csr.shape[0] * adj_csr.shape[0] - adj_csr.sum()) * 2)) best_loss = 1e9 cnt = 0 best_epoch = 0 optimizer = optim.Adam(self.parameters(), lr=self.lr) self.to(self.device) self.adj_orig_graph = self.adj_orig_graph.to(self.device) self.features = self.features.to(self.device) self.pos_weight = torch.FloatTensor([self.pos_weight]).to(self.device) target = torch.from_numpy(adj_orig.toarray()).view(-1).to(self.device) for epoch in range(self.epochs): self.train() optimizer.zero_grad() Graph_Reconstruction, _ = self.forward() pred = Graph_Reconstruction.view(-1) loss = bce_loss( pred, target, self.norm, self.pos_weight, ) loss.backward() cur_loss = loss.item() optimizer.step() print( f"EPOCH_{epoch}", f", Loss {cur_loss}", ) if cur_loss < best_loss: cnt = 0 best_epoch = epoch best_loss = cur_loss del self.best_model self.best_model = copy.deepcopy(self) else: cnt += 1 print(f"loss increase count:{cnt}") if cnt >= self.estop_steps: print(f"early stopping,best epoch:{best_epoch}") break print("Optimization Finished!")
[docs] def get_embedding(self) -> np.ndarray: """Get the embeddings (graph or node level). Returns: (numpy.ndarray): embedding. """ self.eval() _, z_mu = self.best_model() return z_mu.detach()