/* * Implementation of the community structure algorithm originally published * by Blondel et al in: * * V. D. Blondel, J.-L. Guillaume, R. Lambiotte and E. Lefebvre (2008). * "Fast unfolding of community hierarchies in large networks.". * J. Stat. Mech., P10008 (2008) * /* /* Structure storing a community */ typedef struct { igraph_integer_t size; /* Size of the community */ igraph_real_t weight_inside; /* Sum of edges weight inside community */ igraph_real_t weight_all;/* Sum of edges weight starting/ending in community */ } igraph_i_multilevel_community; /* Global community list structure */ typedef struct { long int communities_no, vertices_no; /* Number of communities, number of vertices */ igraph_real_t weight_sum; /* Sum of edges weight in the whole graph */ igraph_i_multilevel_community *item; /* List of communities */ igraph_vector_t *membership; /* Community IDs */ igraph_vector_t *weights; /* Graph edge weights */ } igraph_i_multilevel_community_list; /* Computes modularity of community partitioning */ igraph_real_t igraph_i_multilevel_community_modularity( const igraph_i_multilevel_community_list *communities) { igraph_real_t result = 0; long int i; igraph_real_t m = communities->weight_sum; for (i = 0; i < communities->vertices_no; i++) { if (communities->item[i].size > 0) { result += (communities->item[i].weight_inside - communities->item[i].weight_all*communities->item[i].weight_all/m)/m; } } return result; } typedef struct { long int from; long int to; long int id; } igraph_link; int cmpLinks(const void *a, const void *b) { long int r = (((igraph_link*)a)->from - ((igraph_link*)b)->from); if (r != 0) return r; return (((igraph_link*)a)->to - ((igraph_link*)b)->to); } // removes multiple edges and returns new edge id's for each edge in |E|log|E| int igraph_simplify_multiple(igraph_t *graph, igraph_vector_t *eids) { long int i, ecount = igraph_ecount(graph); igraph_integer_t from, to; IGRAPH_CHECK(igraph_vector_init(eids, ecount)); igraph_link *links = (igraph_link*)calloc(ecount, sizeof(igraph_link)); IGRAPH_FINALLY(free, links); for (i = 0; i < ecount; i++) { igraph_edge(graph, i, &from, &to); links[i].from = from; links[i].to = to; links[i].id = i; } qsort((void*)links, ecount, sizeof(igraph_link), cmpLinks); igraph_vector_t edges; IGRAPH_VECTOR_INIT_FINALLY(&edges, 0); long int last_from = -1, last_to = -1, l = -1; for (i = 0; i < ecount; i++) { if (links[i].from != last_from || links[i].to != last_to) { last_from = links[i].from; last_to = links[i].to; igraph_vector_push_back(&edges, last_from); igraph_vector_push_back(&edges, last_to); l++; } VECTOR(*eids)[links[i].id] = l; } long int vcount = igraph_vcount(graph); igraph_bool_t directed = igraph_is_directed(graph); igraph_destroy(graph); IGRAPH_CHECK(igraph_create(graph, &edges, vcount, directed)); free(links); igraph_vector_destroy(&edges); IGRAPH_FINALLY_CLEAN(2); return 0; } typedef struct { long int community; igraph_real_t weight; } igraph_community_link; int compareLinks(const void *a, const void *b) { return (((igraph_community_link*)a)->community - ((igraph_community_link*)b)->community); } /* Returns links and their weights to communities from the specified vertex and weight of all, inside community * and loop vertex links */ int igraph_i_multilevel_community_links(const igraph_t *graph, const igraph_i_multilevel_community_list *communities, igraph_integer_t vertex, igraph_vector_t *edges, igraph_real_t *weight_all, igraph_real_t *weight_inside, igraph_real_t *weight_loop, igraph_vector_t *links_community, igraph_vector_t *links_weight) { long int i; igraph_real_t weight = 1; igraph_integer_t ffrom, fto; long int from, to, to_community; long int community = (long int) VECTOR(*(communities->membership))[(long int)vertex]; *(weight_all) = 0; *(weight_inside) = 0; *(weight_loop) = 0; igraph_vector_clear(links_community); igraph_vector_clear(links_weight); igraph_vector_clear(edges); /* Go through vertex adjacent edges */ //igraph_vector_init(&edges, 0); igraph_adjacent(graph, edges, vertex, IGRAPH_ALL); long int n = igraph_vector_size(edges); igraph_community_link *links = (igraph_community_link*)calloc(n, sizeof(igraph_community_link)); for (i = 0; i < n; i++) { long int eidx = VECTOR(*edges)[i]; weight = VECTOR(*communities->weights)[eidx]; igraph_edge(graph, eidx, &ffrom, &fto); /* Ensure from is the vertex, otherwise swap */ if (ffrom != vertex) { from = (long int) fto; to = (long int) ffrom; } else { from = (long int) ffrom; to = (long int) fto; } *(weight_all) += weight; if (to == vertex) { *(weight_loop) += weight; links[i].community = community; links[i].weight = 0; continue; } to_community = (long int)VECTOR(*(communities->membership))[to]; if (community == to_community) *(weight_inside) += weight; debug("Link %ld (C: %ld) <-> %ld (C: %ld)\n", from, (long int)VECTOR(*(communities->membership))[from], to, community); links[i].community = to_community; links[i].weight = weight; } //igraph_vector_destroy(&edges); // Sort links by community ID and merge the same qsort((void*)links, n, sizeof(igraph_community_link), compareLinks); long int last = -1, c = -1; for (i = 0; i < n; i++) { to_community = links[i].community; weight = links[i].weight; if (to_community != last) { igraph_vector_push_back(links_community, to_community); igraph_vector_push_back(links_weight, weight); last = to_community; c++; } else { VECTOR(*links_weight)[c] += weight; } } free(links); return 0; } /* Returns gain of modularity if vertex is added into community */ inline igraph_real_t igraph_i_multilevel_community_modularity_gain( const igraph_i_multilevel_community_list *communities, igraph_integer_t community, igraph_integer_t vertex, igraph_real_t weight_all, igraph_real_t weight_inside) { return weight_inside - communities->item[(long int)community].weight_all*weight_all/communities->weight_sum; } /* One level of mutli-level community finging algorithm */ int igraph_community_multilevel_level(igraph_t *graph, igraph_vector_t *membership, igraph_vector_t *weights, igraph_real_t *modularity) { long int i, j, e; long int vcount = igraph_vcount(graph); long int ecount = igraph_ecount(graph); igraph_integer_t ffrom, fto; igraph_real_t q, pass_q; int pass; int changed = 0; igraph_vector_t links_community; igraph_vector_t links_weight; igraph_vector_t edges; if (igraph_is_directed(graph)) { IGRAPH_ERROR("multi-level community detection works for undirected graphs only", IGRAPH_UNIMPLEMENTED); } if (igraph_vector_size(weights) < igraph_ecount(graph)) IGRAPH_ERROR("multi-level community detection: weight vector too short", IGRAPH_EINVAL); if (igraph_vector_any_smaller(weights, 0)) IGRAPH_ERROR("weights must be positive", IGRAPH_EINVAL); IGRAPH_VECTOR_INIT_FINALLY(&links_community, 0); IGRAPH_VECTOR_INIT_FINALLY(&links_weight, 0); IGRAPH_VECTOR_INIT_FINALLY(&edges, 0); igraph_i_multilevel_community_list communities; communities.item = NULL; /* Initialize list of communities from graph vertices */ communities.vertices_no = vcount; communities.communities_no = vcount; communities.weights = weights; communities.weight_sum = 2 * igraph_vector_sum(weights); communities.item = (igraph_i_multilevel_community*)calloc(vcount, sizeof(igraph_i_multilevel_community)); if (communities.item == 0) { IGRAPH_ERROR("can't run multi-level community detection", IGRAPH_ENOMEM); } IGRAPH_FINALLY(free, communities.item); igraph_vector_init(membership, vcount); communities.membership = membership; for (i=0; i < vcount; i++) { igraph_vector_set(communities.membership, i, i); communities.item[i].size = 1; communities.item[i].weight_inside = 0; communities.item[i].weight_all = 0; } igraph_real_t weight = 1; /* Initialize community link weights */ for (e = 0; e < ecount; e++) { igraph_edge(graph, e, &ffrom, &fto); weight = VECTOR(*weights)[e]; communities.item[(long int) ffrom].weight_all += weight; communities.item[(long int) fto].weight_all += weight; if (ffrom == fto) communities.item[(long int) ffrom].weight_inside += 2*weight; } q = igraph_i_multilevel_community_modularity(&communities); pass = 1; do { /* Pass begin */ pass_q = q; changed = 0; // Save membership, it will be restored in case of worse result igraph_vector_t temp_membership; IGRAPH_CHECK(igraph_vector_copy(&temp_membership, communities.membership)); long int temp_communities_no = communities.communities_no; for (i = 0; i < vcount; i++) { /* Exclude vertex from its current community */ igraph_real_t weight_all = 0; igraph_real_t weight_inside = 0; igraph_real_t weight_loop = 0; igraph_i_multilevel_community_links(graph, &communities, i, &edges, &weight_all, &weight_inside, &weight_loop, &links_community, &links_weight); long int old_id = (long int)VECTOR(*(communities.membership))[i]; long int new_id = old_id; /* Update old community */ igraph_vector_set(communities.membership, i, -1); communities.item[old_id].size--; if (communities.item[old_id].size == 0) {communities.communities_no--;} communities.item[old_id].weight_all -= weight_all; communities.item[old_id].weight_inside -= 2*weight_inside + weight_loop; debug("Remove %ld all: %lf Inside: %lf\n", i, -weight_all, -2*weight_inside + weight_loop); /* Find new community to join with the best modification gain */ igraph_real_t max_q_gain = 0; igraph_integer_t max_weight = weight_inside; long int n = igraph_vector_size(&links_community); for (j = 0; j < n; j++) { long int c = (long int) VECTOR(links_community)[j]; igraph_real_t w = VECTOR(links_weight)[j]; igraph_real_t q_gain = igraph_i_multilevel_community_modularity_gain(&communities, c, i, weight_all, w); debug("Link %ld -> %ld weight: %lf gain: %lf\n", i, c, (double) w, (double) q_gain); if (q_gain > max_q_gain) { new_id = c; max_q_gain = q_gain; max_weight = w; } } debug("Added vertex %ld to community %ld (gain %lf).\n", i, new_id, (double) max_q_gain); /* Add vertex to "new" community and update it */ igraph_vector_set(communities.membership, i, new_id); if (communities.item[new_id].size == 0) {communities.communities_no++;} communities.item[new_id].size++; communities.item[new_id].weight_all += weight_all; communities.item[new_id].weight_inside += 2*max_weight + weight_loop; if (new_id != old_id) { changed++; } } q = igraph_i_multilevel_community_modularity(&communities); if (changed && (q > pass_q)) { debug("Pass %d (changed: %d) Communities: %ld Modularity from %lf to %lf\n", pass, changed, communities.communities_no, (double) pass_q, (double) q); pass++; } else { // Restore last membership IGRAPH_CHECK(igraph_vector_copy(communities.membership, &temp_membership)); communities.communities_no = temp_communities_no; igraph_vector_destroy(&temp_membership); break; } IGRAPH_ALLOW_INTERRUPTION(); } while (changed && (q > pass_q)); /* Pass end */ if (modularity) { *modularity = q; } debug("Result Communities: %ld Modularity: %lf\n", communities.communities_no, (double) q); IGRAPH_CHECK(igraph_membership_reindex(membership, NULL)); // shrink graph and remove multiple edges according to new membership igraph_vector_t temp_membership; IGRAPH_CHECK(igraph_vector_copy(&temp_membership, membership)); IGRAPH_FINALLY(igraph_vector_destroy, &temp_membership); IGRAPH_CHECK(igraph_shrink(graph, &temp_membership)); igraph_vector_destroy(&temp_membership); IGRAPH_FINALLY_CLEAN(1); // update weights after shrink and simplification igraph_vector_t eids; IGRAPH_CHECK(igraph_simplify_multiple(graph, &eids)); IGRAPH_FINALLY(igraph_vector_destroy, &eids); igraph_vector_t w; IGRAPH_CHECK(igraph_vector_copy(&w, weights)); IGRAPH_CHECK(igraph_vector_resize(weights, igraph_ecount(graph))); igraph_vector_fill(weights, 0); for (i = 0; i < ecount; i++) { VECTOR(*weights)[(long int)VECTOR(eids)[i]] += VECTOR(w)[i]; } free(communities.item); igraph_vector_destroy(&links_community); igraph_vector_destroy(&links_weight); igraph_vector_destroy(&edges); igraph_vector_destroy(&eids); IGRAPH_FINALLY_CLEAN(5); return 0; } /** * \function igraph_community_multilevel * \brief Finding community structure by multi-level optimization of modularity * * This function implements the multi-level modularity optimization * algorithm for finding community structure, see * VD Blondel, J-L Guillaume, R Lambiotte and E Lefebvre: Fast unfolding of * community hierarchies in large networks, http://arxiv.org/abs/arXiv:0803.0476 * for the details. * * It is based on modularity measure and hierarchial approach. * In each level it searches for the local maximum of modularity gain for each * vertex, while modularity increases or any change is done. Then it shrinks the * graph according to found membership id's and updates the edge weights. It * continues to the next level while modularity grows. * * \param graph The input graph. It must be an undirected graph. * \param weights Numeric vector containing edge * weights. If NULL, every edge has weight one. * The weights are expected to be non-negative. * \param membership The membership vector, the result is returned here. * For each vertex it gives the ID of its community. * \param memberships Numeric matrix will contain graph membership * after each level, if not NULL. * \param modularity Numeric vector will contain graph modularity * after each level, if not NULL. * \return Error code. * * * Time complexity: in average near linear on sparse graphs. */ int igraph_community_multilevel(const igraph_t *graph, const igraph_vector_t *weights, igraph_vector_t *membership, igraph_matrix_t *memberships, igraph_vector_t *modularity) { igraph_t g; igraph_vector_t w, m, level_membership; igraph_real_t prev_q, q = -1; int i, level = 1; long int vcount = igraph_vcount(graph); IGRAPH_FINALLY(igraph_destroy, &g); IGRAPH_CHECK(igraph_copy(&g, graph)); IGRAPH_FINALLY(igraph_vector_destroy, &w); if (weights) { IGRAPH_CHECK(igraph_vector_copy(&w, weights)); } else { IGRAPH_CHECK(igraph_vector_init(&w, igraph_ecount(&g))); igraph_vector_fill(&w, 1); } IGRAPH_VECTOR_INIT_FINALLY(&level_membership, vcount); if (memberships || membership) { for (i = 0; i < vcount; i++) { VECTOR(level_membership)[i] = i; } } if (memberships) { IGRAPH_CHECK(igraph_matrix_resize(memberships, 1, vcount)); } if (modularity) { IGRAPH_CHECK(igraph_vector_resize(modularity, 0)); } do { prev_q = q; IGRAPH_CHECK(igraph_community_multilevel_level(&g, &m, &w, &q)); IGRAPH_FINALLY(igraph_vector_destroy, &m); if (q > prev_q) { if (memberships || membership) { for (i = 0; i < vcount; i++) { VECTOR(level_membership)[i] = VECTOR(m)[(long int) VECTOR(level_membership)[i]]; } } if (modularity) { IGRAPH_CHECK(igraph_vector_push_back(modularity, q)); } if (memberships) { IGRAPH_CHECK(igraph_matrix_add_rows(memberships, 1)); IGRAPH_CHECK(igraph_matrix_set_row(memberships, &level_membership, level - 1)); } debug("Level: %d Communities: %ld Modularity: %f\n", level, (long int) igraph_vcount(&g), (double) q); level++; } igraph_vector_destroy(&m); IGRAPH_FINALLY_CLEAN(1); } while (q > prev_q); if (membership) { IGRAPH_CHECK(igraph_vector_resize(membership, vcount)); for (i = 0; i < vcount; i++) { VECTOR(*membership)[i] = VECTOR(level_membership)[i]; } } igraph_destroy(&g); igraph_vector_destroy(&w); igraph_vector_destroy(&level_membership); IGRAPH_FINALLY_CLEAN(3); return 0; }