I have written a custom function that calculates pagerank from an adjacency matrix (based on the solution in https://en.wikipedia.org/wiki/PageRank#Algebraic ,
where I change the column vector of ones to personalization vector) and compared it to the igraph version page_rank().
- they give identical results when there is no personalization
- if I use personalized and all vertices have at least one outgoing edge, they still give identical results
- However, if I use personalized but some nodes have no outgoing edges, then the results differ.
I wonder the reason why the results from my implementation differs from that of igraph?
Thanks! Here is the code:
library(igraph)
# Create an adjacency matrix for illustration:
n_nodes <- 5
adj_mat <- matrix(nrow = n_nodes, ncol = n_nodes)
adj_mat[2:3,1] <- 1
adj_mat[3:4,2] <- 1
adj_mat[is.na(adj_mat)] <- 0
# Create a vector for "personalization" (called pers_vec)
a <- rbeta(n = n_nodes, 1, 1)
pers_vec <- a/sum(a) # I want "pers_vec" to sum up to 1.
# Create igraph object, calculate personalized pagerank
g <- graph_from_adjacency_matrix(adj_mat, "directed", diag = F)
pr1 <- page_rank(g, "prpack", directed = T, personalized = pers_vec)$vector
# Create custom pagerank function based on this solution: https://en.wikipedia.org/wiki/PageRank#Algebraic
page_rank1 <- function(adjmat, d = 0.85, personalized = rep(1, n_nodes) , n_nodes){
if(n_nodes == 1){
return(1)
} else {
# create "i" identity matrix
i<- diag(n_nodes)
# create "M" column-stochastic matrix where columns sum up to 1
M <- t(adjmat) / matrix( nrow = n_nodes, ncol = n_nodes, rep(rowSums(adjmat, na.rm = T), n_nodes), byrow = T)
M[is.nan(M) | is.na(M) | is.infinite(M)] <- 0
# Use the algebraic formula from wikipedia
pr <- solve(i - (d * M)) %*% ( ((1-d) / n_nodes) * personalized)
# Normalize so they sum up to 1
pr <- pr/sum(pr)
return(as.numeric(pr))
}
}
# use the custom function on the adjacency matrix:
pr2 <- page_rank1(adjmat = adj_mat, personalized = pers_vec, n_nodes = n_nodes)
# They don't give the same results
print(pr1)
print(pr2)
# BUT:
# When we make sure that all nodes haveat least one outgoing edge,
# the functions give identical results
# add outgoing edges to node 1 and node 5
adj_mat[1,2] <- 1
adj_mat[5,3] <- 1
# recalculate
g <- graph_from_adjacency_matrix(adj_mat, "directed", diag = F)
pr1 <- page_rank(g, "prpack", directed = T, personalized = pers_vec)$vector
pr2 <- page_rank1(adjmat = adj_mat, personalized = pers_vec, n_nodes = n_nodes)
print(pr1)
print(pr2)
# Same is true when pageranks are not personalized, for both versions of the graph
# (even when not all nodes have outgoing edges)
# e.g. delete the edges of node 1 and node 5
adj_mat[1,2] <- 0
adj_mat[5,3] <- 0
# recalculate
g <- graph_from_adjacency_matrix(adj_mat, "directed", diag = F)
pr1 <- page_rank(g, "prpack", directed = T)$vector
pr2 <- page_rank1(adjmat = adj_mat, n_nodes = n_nodes)
print(pr1)
print(pr2)