Structural Equivalence and Blockmodels

Denodgrams and blocks

Isha Mahajan, Ankit Kumar (UMass Amherst)
2022-05-09

Structural Equivilence and Dendograms

For this week, we worked on Structural Equivalence models on our data to observe groups of clubs that might have similar attributes. Given the large size of our edgelist, it is not prudent to cluster in a large dataset with so many nodes. In this blog, we split the data with two parameters. First, differentiating by the year variable which is essentially the season of the transfer, and the weight of the edges i.e the amount of the transfer. These are split into three further categories - small , medium and expensive transfers. We chose this strategy because we fee that this would align with understanding the roles of brokers and clubs with higher degrees in our network. We run two kinds of equivalence clusters - first using the average method and the hamming distance, and the other using the lead eigenvector.

The cluster_lead_eigen function finds densely connected subgraphs in the network by calculating the leading non-negative eigenvector of the modularity matrix of the graph. (igraph package)

Transfers 2019

Expensive Transfers

#convert data into matrix format
data.mat.exp <- as.matrix(expensive_transfers_2019)
#create igraph object from data
ig.exp.19 <- graph_from_data_frame(data.mat.exp, directed = TRUE)
# add edge attribute weight i.e transfer fee
ig.exp.19 <- set_edge_attr(ig.exp.19, "weight", value = na.omit(expensive_transfers_2019$fee))
ig.exp.19 <- delete_edge_attr(ig.exp.19, "fee")
#check summary of the igraph object
summary(ig.exp.19)
IGRAPH bf64324 DNW- 185 157 -- 
+ attr: name (v/c), name (e/c), weight (e/n)
network.exp.19 <- intergraph::asNetwork(ig.exp.19) 
network.exp.19
 Network attributes:
  vertices = 185 
  directed = TRUE 
  hyper = FALSE 
  loops = FALSE 
  multiple = TRUE 
  bipartite = FALSE 
  total edges= 157 
    missing edges= 0 
    non-missing edges= 157 

 Vertex attribute names: 
    vertex.names 

 Edge attribute names: 
    name weight 
matrix.exp.19 <- as.matrix(as_adjacency_matrix(ig.exp.19, attr = "weight"))
transfer.se.exp<-equiv.clust(matrix.exp.19, equiv.fun="sedist", cluster.method="average", method="hamming",mode="graph")
plot(transfer.se.exp,labels=transfer.se.exp$glabels, cex = 0.2, main="Cluster Dendrogram Expensive 2019 Transfers-  Hamming (Average)")
rect.hclust(transfer.se.exp$cluster, k = 3) 

plot_dendrogram(cluster_leading_eigen(as.undirected(ig.exp.19)), mode = igraph_opt("dend.plot.type"))

Medium Transfers

#convert data into matrix format
data.mat.med <- as.matrix(medium_transfers_2019)
#create igraph object from data
ig.med.19 <- graph_from_data_frame(data.mat.med, directed = TRUE)
# add edge attribute weight i.e transfer fee
ig.med.19<- set_edge_attr(ig.med.19, "weight", value = na.omit(medium_transfers_2019$fee))
ig.med.19<- delete_edge_attr(ig.med.19, "fee")
#check summary of the igraph object
summary(ig.med.19)
IGRAPH 3ffce03 DNW- 461 718 -- 
+ attr: name (v/c), name (e/c), weight (e/n)
network.med.19 <- intergraph::asNetwork(ig.med.19) 
network.med.19
 Network attributes:
  vertices = 461 
  directed = TRUE 
  hyper = FALSE 
  loops = FALSE 
  multiple = TRUE 
  bipartite = FALSE 
  total edges= 718 
    missing edges= 0 
    non-missing edges= 718 

 Vertex attribute names: 
    vertex.names 

 Edge attribute names: 
    name weight 
matrix.med.19 <- as.matrix(as_adjacency_matrix(ig.med.19, attr = "weight"))
transfer.se.med<-equiv.clust(matrix.med.19, equiv.fun="sedist", cluster.method="average", method="hamming",mode="graph")
plot(transfer.se.med,labels=transfer.se.med$glabels, cex = 0.2, main="Cluster Dendrogram Medium 2019 Transfers - Hamming (Average)")
rect.hclust(transfer.se.med$cluster, k = 3) 

plot_dendrogram(cluster_leading_eigen(as.undirected(ig.med.19)), mode = igraph_opt("dend.plot.type"))

Small Transfers

#convert data into matrix format
data.mat.sm <- as.matrix(small_transfers_2019)
#create igraph object from data
ig.sm.19 <- graph_from_data_frame(data.mat.sm, directed = TRUE)
# add edge attribute weight i.e transfer fee
ig.sm.19<- set_edge_attr(ig.sm.19, "weight", value = na.omit(small_transfers_2019$fee))
ig.sm.19<- delete_edge_attr(ig.sm.19, "fee")
#check summary of the igraph object
summary(ig.sm.19)
IGRAPH ba13c7e DNW- 283 312 -- 
+ attr: name (v/c), name (e/c), weight (e/n)
network.sm.19 <- intergraph::asNetwork(ig.sm.19) 
network.sm.19
 Network attributes:
  vertices = 283 
  directed = TRUE 
  hyper = FALSE 
  loops = FALSE 
  multiple = TRUE 
  bipartite = FALSE 
  total edges= 312 
    missing edges= 0 
    non-missing edges= 312 

 Vertex attribute names: 
    vertex.names 

 Edge attribute names: 
    name weight 
matrix.sm.19 <- as.matrix(as_adjacency_matrix(ig.sm.19, attr = "weight"))
transfer.se.sm<-equiv.clust(matrix.sm.19, equiv.fun="sedist", cluster.method="average", method="hamming",mode="graph")
plot(transfer.se.sm,labels=transfer.se.sm$glabels,cex = 0.2, main="Cluster Dendrogram Small 2019 Transfers - Hamming (Average)")
rect.hclust(transfer.se.sm$cluster, k = 3) 

#dd <- plot_dendrogram(cluster_leading_eigen(as.undirected(ig.sm.19)), mode = igraph_opt("dend.plot.type"))
#dd

Block Modeling

g_mat <- as.matrix(get.adjacency(simplify(ig.exp.19)))
g_eq <- equiv.clust(g_mat, mode = "graph")
plot(g_eq)
g_block <- blockmodel(g_mat, g_eq, h= 5, mode="graph")
plot(g_block, cex=0.1)
gplot(g_mat, vertex.col = g_block$block.membership, gmode="graph")
gplot(g_block$block.model, gmode="graph", label=rownames(g_block$block.model), edge.lwd = g_block$block.model*10)
plot(density(g_block$block.membership))
# for this package to work, we will need to return to our original artist-artist adjacency matrix