2017-02-14 101 views
2

我参考PageRank - Wikipedia并用下面的公式代数计算PageRank,但我得到nx.pagerank_numpy的不同结果。不正确的PageRank计算结果

例如(维基百科图片),

我,

# 'A', 'B', 'C', 'D', 'E', 'F' 
[[ 0.028] 
[ 0.324] 
[ 0.289] 
[ 0.033] 
[ 0.068] 
[ 0.033]] 

为何结果不同?


这里是源代码。

import networkx as nx 
import numpy as np 

# Step 1: Build up a graph 
G = build_graph_wikipedia_pagerank_example() 

# Step 2: PageRank calculation 

# Part 1: \mathbf {1} is the column vector of length N containing only ones. 
N = len(G.nodes())  # N = 11 
column_vector = np.ones((N, 1), dtype=np.int) 
#print(column_vector) 

# Part 2: Matrix M 
# Adjacency matrix A 
nodelist = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K'] # sorted(G.nodes()) 
A = nx.to_numpy_matrix(G, nodelist) 

# K is the diagonal matrix with the outdegrees in the diagonal. 
list_outdegree = map(operator.itemgetter(1), sorted(G.out_degree().items())) 
K = np.diag(list_outdegree) 

K_inv = np.linalg.pinv(K) 

# Matrix M 
M = (K_inv * A).transpose() 

# Part 3: PageRank calculation 
d = 0.85 
I = np.identity(N) 
R = np.linalg.pinv(I - d*M) * (1-d)/N * column_vector 

为了构建图,我用,

def build_graph_wikipedia_pagerank_example(): 
    """ 
    Build a graph for https://en.wikipedia.org/wiki/File:PageRanks-Example.svg 
    """ 

    G = nx.DiGraph() 

    # A 

    # B --> 
    G.add_path(['B', 'C']) 

    # C --> 
    G.add_path(['C', 'B']) 

    # D --> 
    G.add_path(['D', 'A']) 
    G.add_path(['D', 'B']) 

    # E --> 
    G.add_path(['E', 'B']) 
    G.add_path(['E', 'D']) 
    G.add_path(['E', 'F']) 

    # F --> 
    G.add_path(['F', 'B']) 
    G.add_path(['F', 'E']) 

    # G --> 
    G.add_path(['G', 'B']) 
    G.add_path(['G', 'E']) 

    # H --> 
    G.add_path(['H', 'B']) 
    G.add_path(['H', 'E']) 

    # I --> 
    G.add_path(['I', 'B']) 
    G.add_path(['I', 'E']) 

    # J --> 
    G.add_path(['J', 'E']) 

    # J --> 
    G.add_path(['K', 'E']) 

    return G 

回答

3

你只需要与正常化矩阵方程获得的网页排名,因为网页排名的总和应为1

R = R/sum(R) 
print R 
#[[ 0.03278149] 
# [ 0.38440095] 
# [ 0.34291029] 
# [ 0.03908709] 
# [ 0.08088569] 
# [ 0.03908709] 
# [ 0.01616948] 
# [ 0.01616948] 
# [ 0.01616948] 
# [ 0.01616948] 
# [ 0.01616948]] 
print nx.pagerank_numpy(G, alpha=d) 
#{'A': 0.032781493159344234, 'C': 0.34291028550837976, 'B': 0.3844009488135542, 'E': 0.08088569323449775, 'D': 0.03908709209996617, 'G': 0.016169479016858397, 'F': 0.03908709209996617, 'I': 0.016169479016858397, 'H': 0.016169479016858397, 'K': 0.016169479016858397, 'J': 0.016169479016858397}