Skip to content

Louvain clustering implementation #104

@scottyler89

Description

@scottyler89

Hi all, I'm new to pygraphblas & was just trying to re-implement the Louvain modularity tutorial, but the code there gives an error, traceable to k.reduce() returning a traditional float, but I'm guessing it was supposed to by another pygraphblas object.

import random
from random import choice
from time import time

from pygraphblas import *
from pygraphblas import lib
from pygraphblas.gviz import draw

from collections import defaultdict
from itertools import groupby
from operator import itemgetter

def group_labels(T):
    d = defaultdict(list)
    for k,v in groupby(T, itemgetter(1)):
        d[k].append(list(v)[0][0])
    return d

def compare_groups(left, right):
    left = {k: set(v) for k, v in left.items()}
    right = {k: set(v) for k, v in right.items()}
    return sorted(left.values()) == sorted(right.values())

def get_louvain_cluster_assignments(cluster_matrix):
    return cluster_matrix.cast(INT64).apply(INT64.POSITIONJ).reduce_vector()

######################################################
## make a dummy adj matrix
I = [0, 0, 0, 0,
    1, 1, 1, 1,
    2, 2, 2, 2,
    3, 3, 3, 3,
    4, 4, 4, 4,
    5, 5, 5, 5, 5,
    6, 6, 6,
    7, 7, 7, 7]

J = [0, 2, 3, 6,
    1, 2, 3, 7,
    0, 2, 4, 6,
    0, 1, 3, 5,
    0, 2, 4, 6,
    1, 3, 5, 6, 7,
    0, 4, 6,
    1, 3, 5, 7]

M = Matrix.from_lists(I, J, [1.0] * len(I), typ=FP64)
#####################################################

def louvain_cluster_easy(A, max_iters=10):
    S = Matrix.identity(BOOL, A.nrows)
    empty = Vector.sparse(BOOL, A.nrows)
    i = 0
    changed = True
    start = time()
    G = A.T + A
    k = A.reduce_vector()
    m = k.reduce_int() / 2.0
    k = (-k) / m
    while changed and i < max_iters:
        changed = False
        for j in set(k.indexes):
            Sj = S[j]
            S[j] = empty
            v = G[j] + k[j]
            q = v @ S
            t = q.select('max')
            if t:
                r = choice(list(t.indexes))
                S[j, r] = True
                if Sj.get(r) is None:
                    changed = True
        i += 1
    return S, time() - start



def louvain_cluster(A, max_iters=10):
    An = A.nrows
    S = Matrix.identity(BOOL, An)
    empty = Vector.sparse(BOOL, An)
    Sj = Vector.sparse(BOOL, An)
    v = Vector.sparse(FP64, An)
    q = Vector.sparse(FP64, An)
    t = Vector.sparse(FP64, An)
    i = 0
    changed = True
    start = time()
    G = A.T + A
    k = A.reduce()
    m = k.reduce_int() / 2.0
    k = (-k) / m
    kI = set(k.I)
    while changed and i < max_iters:
        changed = False
        for j in kI:
            S.extract_row(j, out=Sj)                  # Sj = S[j]
            S.assign_row(j, empty)                    # S[j] = empty
            G.extract_row(j, out=v)                   # v = G[j] + nkm[j]
            v.apply_second(FP64.PLUS, k[j], out=v)
            v.vxm(S, out=q)                           # q = v @ S
            if not q:
                continue
            q.select('max', out=t)                    # t = q.select('max')
            if t:
                r = choice(list(t.indexes))
                S[j, r] = True
                if Sj.get(r) is None:
                    changed = True
        i += 1
    return S, time() - start



ans, took = louvain_cluster(M, 5)

Gives the following error:

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "<stdin>", line 14, in louvain_cluster
AttributeError: 'float' object has no attribute 'reduce_int'

If anyone has a working implementation of Louvain modularity, or can help troubleshoot, I would be super appreciative!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions