Package net.relatedwork.server.datamining

Source Code of net.relatedwork.server.datamining.CalculatePageRank

package net.relatedwork.server.datamining;

import java.util.HashMap;
import java.util.Map.Entry;

import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.index.Index;
import org.neo4j.index.lucene.ValueContext;
import org.neo4j.kernel.EmbeddedGraphDatabase;

import net.relatedwork.server.utils.IOHelper;

/**
* this is a small hack which helps to calculate page rank values for rather
* small graphs iteratively. I am sure a modern computer should be able to
* handle graphs with about 1 Mio. nodes
*
* for the sake of simpleness this is not the original algoirthm but the results
* will be close ther is no support for calculating pagerank only on certain
* relationship types. This might be added soon
*
* @author rpickhardt
*
*/

public class CalculatePageRank {
  private EmbeddedGraphDatabase db;

  public EmbeddedGraphDatabase getGraphDb() {
    return db;
  }

  public void setGraphDb(EmbeddedGraphDatabase db) {
    this.db = db;
  }

  private Double d;

  public CalculatePageRank(EmbeddedGraphDatabase db2) {
    db = db2;
    d = 0.85;
  }

  CalculatePageRank(String path) {
    db = new EmbeddedGraphDatabase(path);
    d = 0.85;
  }

  public void dcalculatePageRank(Double _d, int iterationen) {
    d = _d;
    HashMap<Long, Double> nodeIndex = getNodesToIndex();
    HashMap<Long, Double> newPageRankValues = new HashMap<Long, Double>(
        nodeIndex);

    for (int i = 0; i < iterationen; i++) {
      IOHelper.strongLog("Calculateing page rank. Iteration number: \t" + i);
      for (Long key:newPageRankValues.keySet()){
        newPageRankValues.put(key,newPageRankValues.get(key)*0.15);
      }
     
      int nodeCnt = 0;
      for (Entry<Long, Double> e : nodeIndex.entrySet()) {
        Node n = db.getNodeById(e.getKey());

        if (nodeCnt++%5000==0){
          IOHelper.log((nodeCnt-1) + " nodes have been assigned new PR values in iteration number: " + i );
        }
       
        int degree = 1;
        for (Relationship rel : n.getRelationships(Direction.OUTGOING)) {
          degree++;
        }

        Double currentPageRank = e.getValue();
        // this amout of PR is given to all nodes the current node links
        // to.
        Double delta = d * currentPageRank.doubleValue() / degree;

        for (Relationship rel : n.getRelationships(Direction.OUTGOING)) {
          Long otherNodeId = rel.getEndNode().getId();
          if (otherNodeId == null)
            continue;
          Double currentPRValueOfOtherNode = newPageRankValues
              .get(otherNodeId);
          newPageRankValues.put(otherNodeId,
              currentPRValueOfOtherNode + delta);
        }
      }
      nodeIndex = newPageRankValues;
    }

    updatePageRankValues(nodeIndex);

  }

  private void updatePageRankValues(HashMap<Long, Double> nodeIndex) {
    IOHelper.strongLog("start to hit the disk");
    int cnt = 0;
    Transaction tx = db.beginTx();
    try {
      for (Entry<Long, Double> e : nodeIndex.entrySet()) {
        cnt++;
        if (cnt > 50000){
          tx.success();
          tx.finish();
          cnt=0;
          tx = db.beginTx();
          IOHelper.log("updated another 50k nodes with PR properties");
        }
        Node n = db.getNodeById(e.getKey());
        n.setProperty("pageRankValue", e.getValue());
      }
      tx.success();
    } finally {
      tx.finish();
    }
  }

  private HashMap<Long, Double> getNodesToIndex() {
    HashMap<Long, Double> nodeIndex = new HashMap<Long, Double>();
    for (Node n : db.getAllNodes()) {
      nodeIndex.put(n.getId(), 1.0);
    }
    return nodeIndex;
  }
}
TOP

Related Classes of net.relatedwork.server.datamining.CalculatePageRank

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.