Package org.archive.access.nutch.jobs.graph

Examples of org.archive.access.nutch.jobs.graph.GraphManager


   * @throws IOException
   */
  public int buildGraph(Path inputSegments[], Path outputPath) throws IOException {       
 
    final SequenceFile.Writer writer=SequenceFile.createWriter(fs, conf, new Path(outputPath,GRAPH_FILE), ArquivoWebKeyValueWritable.class, NullWritable.class, SequenceFile.CompressionType.BLOCK, new DefaultCodec());   
    final GraphManager graph = new GraphManager()
   
    readLinks(inputSegments, new ReadLinksProcessor() {     
      public void run(String fromUrl, String toUrl) throws IOException {               
        writer.append(new ArquivoWebKeyValueWritable(graph.getId(fromUrl),graph.getId(toUrl)),NullWritable.get());       
      }
    });

    writer.close();
    return graph.numNodes();   
  }
View Full Code Here


   * Write file with pagerank scores  
   */
  public void writeFileScores(Path inputSegments[], Path outputFile, final double scores[]) throws IOException {
   
    final SequenceFile.Writer writer=SequenceFile.createWriter(fs, conf, outputFile, Text.class, FloatWritable.class, SequenceFile.CompressionType.BLOCK, new DefaultCodec());
    final GraphManager graph = new GraphManager();

    readLinks(inputSegments, new ReadLinksProcessor() {     
      public void run(String fromUrl, String toUrl) throws IOException // read urls in the same order when it created the web graph, to write scores
       
        int id;
        if (!graph.hasId(fromUrl)) {
          id=graph.getId(fromUrl);
          writer.append(new Text(fromUrl), new FloatWritable( (float)scores[id] ));               
        }
        if (!graph.hasId(toUrl)) {
          id=graph.getId(toUrl);
          writer.append(new Text(toUrl), new FloatWritable( (float)scores[id] ));               
        }
      } 
    });
     
View Full Code Here

   */
  public void writeFileScores2debug(Path inputSegments[], Path outputFile, final double scores[]) throws IOException {
   
    FSDataOutputStream out = fs.create(outputFile);
    final PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out)));    
    final GraphManager graph = new GraphManager();
   
    readLinks(inputSegments, new ReadLinksProcessor() {   
      public void run(String fromUrl, String toUrl) throws IOException // read urls in the same order when it created the web graph, to write scores
       
        int id;
        if (!graph.hasId(fromUrl)) {
          id=graph.getId(fromUrl);
          writer.println(fromUrl+" "+scores[id]);               
        }
        if (!graph.hasId(toUrl)) {
          id=graph.getId(toUrl);
          writer.println(toUrl+" "+scores[id]);               
        }
      }
    });
     
View Full Code Here

TOP

Related Classes of org.archive.access.nutch.jobs.graph.GraphManager

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.