Examples of ParseData


Examples of org.apache.nutch.parse.ParseData

            Metadata metadata = new Metadata();
            metadata.set(Response.CONTENT_LENGTH, Long.toString(entry.getSize()));
            metadata.set(Response.CONTENT_TYPE, contentType);
            Content content = new Content(newurl, base, b, contentType, metadata, this.conf);
            Parse parse = new ParseUtil(this.conf).parse(content);
            ParseData theParseData = parse.getData();
            Outlink[] theOutlinks = theParseData.getOutlinks();
           
            for(int count = 0; count < theOutlinks.length; count++) {
              outLinksList.add(new Outlink(theOutlinks[count].getToUrl(), theOutlinks[count].getAnchor(), this.conf));
            }
           
View Full Code Here

Examples of org.apache.nutch.parse.ParseData

    metadataCollector.notifyProperty("TPE1-Text", tag.getArtist());
    metadataCollector.notifyProperty("COMM-Text", tag.getComment());
    metadataCollector.notifyProperty("TCON-Text", "(" + tag.getGenre() + ")");
    metadataCollector.notifyProperty("TIT2-Text", tag.getTitle());
    metadataCollector.notifyProperty("TYER-Text", tag.getYear());
    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
                                        metadataCollector.getTitle(),
                                        metadataCollector.getOutlinks(),
                                        contentMeta,
                                        metadataCollector.getData());
    return new ParseImpl(metadataCollector.getText(), parseData);
View Full Code Here

Examples of org.apache.nutch.parse.ParseData

            metadataCollector.notifyProperty(name + "-" + bodyName, bodyValue);
          }
        }
      }
    }
    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
                                        metadataCollector.getTitle(),
                                        metadataCollector.getOutlinks(),
                                        contentMeta,
                                        metadataCollector.getData());
    return new ParseImpl(metadataCollector.getText(), parseData);
View Full Code Here

Examples of org.apache.nutch.parse.ParseData

    if (resultTitle == null) {
      resultTitle = "";
    }

    outlinks = (Outlink[]) outLinksList.toArray(new Outlink[0]);
    final ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
                                              resultTitle, outlinks,
                                              content.getMetadata());
    parseData.setConf(this.conf);

    if (LOG.isTraceEnabled()) { LOG.trace("Zip file parsed sucessfully !!"); }
    return new ParseImpl(resultText, parseData);
  }
View Full Code Here

Examples of org.apache.nutch.parse.ParseData

    String class1 = "NonExistingFilter";
    String class2 = "org.apache.nutch.indexer.basic.BasicIndexingFilter";
    conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);

    IndexingFilters filters = new IndexingFilters(conf);
    filters.filter(new Document(), new ParseImpl("text", new ParseData(
        new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text(
        "http://www.example.com/"), new CrawlDatum(), new Inlinks());
  }
View Full Code Here

Examples of org.apache.nutch.parse.ParseData

        }
        // if (LOG.isInfoEnabled()) {
        //   LOG.info("Outlinks: "+outlinks);
        // }

        ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
                contentTitle.toString(), outlinks, content.getMetadata());
        parseData.setConf(this.conf);
        return new ParseImpl(indexText.toString(), parseData);
    }
View Full Code Here

Examples of org.apache.nutch.parse.ParseData

      title = "";

    // collect outlink
    Outlink[] outlinks = OutlinkExtractor.getOutlinks(text, getConf());

    ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title,
                                        outlinks, content.getMetadata());
    parseData.setConf(this.conf);
    return new ParseImpl(text, parseData);
  }
View Full Code Here

Examples of org.apache.nutch.parse.ParseData

    }

    String text = delegate.getText();

    return new ParseImpl(text,
                         new ParseData(ParseStatus.STATUS_SUCCESS,
                                       title,
                                       OutlinkExtractor
        .                              getOutlinks(text, this.conf),
                                       content.getMetadata(),
                                       metadata));
View Full Code Here

Examples of org.apache.nutch.parse.ParseData

            //check that there are 3 outlinks:
            //http://test.channel.com
            //http://www-scf.usc.edu/~mattmann/
            //http://www.nutch.org

            ParseData theParseData = parse.getData();

            Outlink[] theOutlinks = theParseData.getOutlinks();

            assertTrue("There aren't 3 outlinks read!", theOutlinks.length == 3);

            //now check to make sure that those are the two outlinks
            boolean hasLink1 = false, hasLink2 = false, hasLink3 = false;
View Full Code Here

Examples of org.apache.nutch.parse.ParseData

  public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter) throws IOException {
    CrawlDatum lastG = null;
    CrawlDatum lastF = null;
    CrawlDatum lastSig = null;
    Content lastC = null;
    ParseData lastPD = null;
    ParseText lastPT = null;
    String lastGname = null;
    String lastFname = null;
    String lastSigname = null;
    String lastCname = null;
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.