Denodgrams and blocks
For this week, we worked on Structural Equivalence models on our data to observe groups of clubs that might have similar attributes. Given the large size of our edgelist, it is not prudent to cluster in a large dataset with so many nodes. In this blog, we split the data with two parameters. First, differentiating by the year
variable which is essentially the season of the transfer, and the weight of the edges i.e the amount of the transfer. These are split into three further categories - small , medium and expensive transfers. We chose this strategy because we fee that this would align with understanding the roles of brokers and clubs with higher degrees in our network. We run two kinds of equivalence clusters - first using the average method and the hamming distance, and the other using the lead eigenvector.
The cluster_lead_eigen
function finds densely connected subgraphs in the network by calculating the leading non-negative eigenvector of the modularity matrix of the graph. (igraph package)
#convert data into matrix format
data.mat.exp <- as.matrix(expensive_transfers_2019)
#create igraph object from data
ig.exp.19 <- graph_from_data_frame(data.mat.exp, directed = TRUE)
# add edge attribute weight i.e transfer fee
ig.exp.19 <- set_edge_attr(ig.exp.19, "weight", value = na.omit(expensive_transfers_2019$fee))
ig.exp.19 <- delete_edge_attr(ig.exp.19, "fee")
#check summary of the igraph object
summary(ig.exp.19)
IGRAPH bf64324 DNW- 185 157 --
+ attr: name (v/c), name (e/c), weight (e/n)
network.exp.19 <- intergraph::asNetwork(ig.exp.19)
network.exp.19
Network attributes:
vertices = 185
directed = TRUE
hyper = FALSE
loops = FALSE
multiple = TRUE
bipartite = FALSE
total edges= 157
missing edges= 0
non-missing edges= 157
Vertex attribute names:
vertex.names
Edge attribute names:
name weight
matrix.exp.19 <- as.matrix(as_adjacency_matrix(ig.exp.19, attr = "weight"))
transfer.se.exp<-equiv.clust(matrix.exp.19, equiv.fun="sedist", cluster.method="average", method="hamming",mode="graph")
plot(transfer.se.exp,labels=transfer.se.exp$glabels, cex = 0.2, main="Cluster Dendrogram Expensive 2019 Transfers- Hamming (Average)")
rect.hclust(transfer.se.exp$cluster, k = 3)
plot_dendrogram(cluster_leading_eigen(as.undirected(ig.exp.19)), mode = igraph_opt("dend.plot.type"))
#convert data into matrix format
data.mat.med <- as.matrix(medium_transfers_2019)
#create igraph object from data
ig.med.19 <- graph_from_data_frame(data.mat.med, directed = TRUE)
# add edge attribute weight i.e transfer fee
ig.med.19<- set_edge_attr(ig.med.19, "weight", value = na.omit(medium_transfers_2019$fee))
ig.med.19<- delete_edge_attr(ig.med.19, "fee")
#check summary of the igraph object
summary(ig.med.19)
IGRAPH 3ffce03 DNW- 461 718 --
+ attr: name (v/c), name (e/c), weight (e/n)
network.med.19 <- intergraph::asNetwork(ig.med.19)
network.med.19
Network attributes:
vertices = 461
directed = TRUE
hyper = FALSE
loops = FALSE
multiple = TRUE
bipartite = FALSE
total edges= 718
missing edges= 0
non-missing edges= 718
Vertex attribute names:
vertex.names
Edge attribute names:
name weight
matrix.med.19 <- as.matrix(as_adjacency_matrix(ig.med.19, attr = "weight"))
transfer.se.med<-equiv.clust(matrix.med.19, equiv.fun="sedist", cluster.method="average", method="hamming",mode="graph")
plot(transfer.se.med,labels=transfer.se.med$glabels, cex = 0.2, main="Cluster Dendrogram Medium 2019 Transfers - Hamming (Average)")
rect.hclust(transfer.se.med$cluster, k = 3)
plot_dendrogram(cluster_leading_eigen(as.undirected(ig.med.19)), mode = igraph_opt("dend.plot.type"))
#convert data into matrix format
data.mat.sm <- as.matrix(small_transfers_2019)
#create igraph object from data
ig.sm.19 <- graph_from_data_frame(data.mat.sm, directed = TRUE)
# add edge attribute weight i.e transfer fee
ig.sm.19<- set_edge_attr(ig.sm.19, "weight", value = na.omit(small_transfers_2019$fee))
ig.sm.19<- delete_edge_attr(ig.sm.19, "fee")
#check summary of the igraph object
summary(ig.sm.19)
IGRAPH ba13c7e DNW- 283 312 --
+ attr: name (v/c), name (e/c), weight (e/n)
network.sm.19 <- intergraph::asNetwork(ig.sm.19)
network.sm.19
Network attributes:
vertices = 283
directed = TRUE
hyper = FALSE
loops = FALSE
multiple = TRUE
bipartite = FALSE
total edges= 312
missing edges= 0
non-missing edges= 312
Vertex attribute names:
vertex.names
Edge attribute names:
name weight
matrix.sm.19 <- as.matrix(as_adjacency_matrix(ig.sm.19, attr = "weight"))
transfer.se.sm<-equiv.clust(matrix.sm.19, equiv.fun="sedist", cluster.method="average", method="hamming",mode="graph")
plot(transfer.se.sm,labels=transfer.se.sm$glabels,cex = 0.2, main="Cluster Dendrogram Small 2019 Transfers - Hamming (Average)")
rect.hclust(transfer.se.sm$cluster, k = 3)
#dd <- plot_dendrogram(cluster_leading_eigen(as.undirected(ig.sm.19)), mode = igraph_opt("dend.plot.type"))
#dd
g_mat <- as.matrix(get.adjacency(simplify(ig.exp.19)))
g_eq <- equiv.clust(g_mat, mode = "graph")
plot(g_eq)
g_block <- blockmodel(g_mat, g_eq, h= 5, mode="graph")
plot(g_block, cex=0.1)
gplot(g_mat, vertex.col = g_block$block.membership, gmode="graph")
gplot(g_block$block.model, gmode="graph", label=rownames(g_block$block.model), edge.lwd = g_block$block.model*10)
# for this package to work, we will need to return to our original artist-artist adjacency matrix