public class PageRank { #region Private Fields ArrayList _incomingLinks, _leafNodes; Vector _numLinks; double _alpha, _convergence; int _checkSteps; #endregion #region Constructor public PageRank(ArrayList linkMatrix, double alpha = 0.85, double convergence = 0.0001, int checkSteps = 10) { Tuple, ArrayList> tuple = TransposeLinkMatrix(linkMatrix); _incomingLinks = tuple.Item1; _numLinks = tuple.Item2; _leafNodes = tuple.Item3; _alpha = alpha; _convergence = convergence; _checkSteps = checkSteps; } #endregion #region Methods /// /// Convenience wrap for the link matrix transpose and the generator. /// See PageRankGenerator method for parameter descriptions /// public double[] ComputePageRank() { Vector final = null; foreach (Vector generator in PageRankGenerator(_incomingLinks, _numLinks, _leafNodes, _alpha, _convergence, _checkSteps)) final = generator; return final.ToArray(); } /// /// Transposes the link matrix which contains the links from each page. /// Returns a Tuple of: /// 1) pages pointing to a given page, /// 2) how many links each page contains, and /// 3) which pages contain no links at all. /// We want to know is which pages /// /// outGoingLinks[i] contains the indices of the pages pointed to by page i /// A tuple of (incomingLinks, numOutGoingLinks, leafNodes) protected Tuple, ArrayList> TransposeLinkMatrix(ArrayList outGoingLinks) { int nPages = outGoingLinks.Count; // incomingLinks[i] will contain the indices jj of the pages // linking to page i ArrayList incomingLinks = new ArrayList(nPages); for (int i = 0; i < nPages; i++) incomingLinks.Add(new List()); // the number of links in each page Vector numLinks = new DenseVector(nPages); // the indices of the leaf nodes ArrayList leafNodes = new ArrayList(); for (int i = 0; i < nPages; i++) { List values = outGoingLinks[i] as List; if (values.Count == 0) leafNodes.Add(i); else { numLinks[i] = values.Count; // transpose the link matrix foreach (int j in values) { List list = (List)incomingLinks[j]; list.Add(i); incomingLinks[j] = list; } } } return new Tuple, ArrayList>(incomingLinks, numLinks, leafNodes); } /// /// Computes an approximate page rank vector of N pages to within some convergence factor. /// /// At a sparse square matrix with N rows. At[i] contains the indices of pages jj linking to i /// contains the indices of pages without links /// iNumLinks[i] is the number of links going out from i. /// a value between 0 and 1. Determines the relative importance of "stochastic" links. /// a relative convergence criterion. Smaller means better, but more expensive. /// check for convergence after so many steps protected IEnumerable> PageRankGenerator(ArrayList at, Vector numLinks, ArrayList leafNodes, double alpha, double convergence, int checkSteps) { int N = at.Count; int M = leafNodes.Count; Vector iNew = Ones(N) / N; Vector iOld = Ones(N) / N; bool done = false; while (!done) { // normalize every now and then for numerical stability iNew /= Sum(iNew); for (int i = 0; i < checkSteps; i++) { // swap arrays Vector temp = iOld; iOld = iNew; iNew = temp; // an element in the 1 x I vector. // all elements are identical. double oneIv = (1 - alpha) * Sum(iOld) / N; // an element of the A x I vector. // all elements are identical. double oneAv = 0.0; if (M > 0) oneAv = alpha * Sum(Take(iOld, leafNodes)) / N; // the elements of the H x I multiplication for(int j = 0;j < N;j++) { List page = (List)at[j]; double h = 0; if (page.Count > 0) h = alpha * Take(iOld, page).DotProduct(1.0 / Take(numLinks, page)); iNew[j] = h + oneAv + oneIv; } } Vector diff = iNew - iOld; done = diff.SumMagnitudes() < convergence; yield return iNew; } } private Vector Ones(int n) { Vector result = new DenseVector(n); for (int i = 0; i < result.Count; i++) result[i] = 1.0; return result; } private double Sum(Vector vector) { double sum = 0; foreach (double val in vector) sum += val; return sum; } /// /// Simplified (numPy) take method: 1) axis is always 0, 2) first argument is always a vector /// /// List of values /// List of indices /// Vector containing elements from vector 1 at the indicies in vector 2 private Vector Take(Vector vector1, IList vector2) { Vector result = new DenseVector(vector2.Count); for (int i = 0; i < vector2.Count; i++) result[i] = vector1[Convert.ToInt32(vector2[i])]; return result; } #endregion }