Package opennlp.tools.util

Examples of opennlp.tools.util.PlainTextByLineStream


    trainingData.append("c\n");
    trainingData.append("\n");
    trainingData.append("d\n");
   
    ObjectStream<String> untokenizedLineStream =
      new PlainTextByLineStream(new StringReader(trainingData.toString()));
   
    ObjectStream<NameSample> trainingStream = new NameSampleDataStream(untokenizedLineStream);
   
    assertFalse(trainingStream.read().isClearAdaptiveDataSet());
    assertFalse(trainingStream.read().isClearAdaptiveDataSet());
View Full Code Here


 
  @Test
  public void testIncludeFeats() throws IOException {
    // add one sentence with includeFeats = true
    ADPOSSampleStream stream = new ADPOSSampleStream(
        new PlainTextByLineStream(
            ADParagraphStreamTest.class
                .getResourceAsStream("/opennlp/tools/formats/ad.sample"),
            "UTF-8"), false, true);

    POSSample sample = stream.read();
View Full Code Here

  public void testHtmlNameSampleParsing() throws IOException {
    InputStream in = getClass().getClassLoader().getResourceAsStream(
        "opennlp/tools/namefind/html1.train");
   
    NameSampleDataStream ds = new NameSampleDataStream(
        new PlainTextByLineStream(new InputStreamReader(in, "UTF-8")));

    NameSample ns = ds.read();
   
    assertEquals(1, ns.getSentence().length);
    assertEquals("<html>", ns.getSentence()[0]);
View Full Code Here

  public void setup() throws IOException {
    InputStream in = ADSentenceSampleStreamTest.class
        .getResourceAsStream("/opennlp/tools/formats/ad.sample");

    ADSentenceSampleStream stream = new ADSentenceSampleStream(
        new PlainTextByLineStream(in, "UTF-8"), true);

    SentenceSample sample = stream.read();

    while (sample != null) {
      System.out.println(sample.getDocument());
View Full Code Here

      SentenceModel model = new SentenceModelLoader().load(new File(args[0]));

      SentenceDetectorME sdetector = new SentenceDetectorME(model);

      ObjectStream<String> paraStream =
        new ParagraphStream(new PlainTextByLineStream(new InputStreamReader(System.in)));

      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
      perfMon.start();

      try {
View Full Code Here

  }
 
  private static ADSentenceStream openData() throws IOException {
    InputStream in = ADParagraphStreamTest.class.getResourceAsStream("/opennlp/tools/formats/ad.sample");
   
    return new ADSentenceStream(new PlainTextByLineStream(in, "UTF-8"));
  }
View Full Code Here

  public void setup() throws IOException {
    InputStream in = ADParagraphStreamTest.class
  .getResourceAsStream("/opennlp/tools/formats/ad.sample");

    ADChunkSampleStream stream = new ADChunkSampleStream(
      new PlainTextByLineStream(in, "UTF-8"));

    ChunkSample sample = stream.read();

    while (sample != null) {
      samples.add(sample);
View Full Code Here

   
      Detokenizer detokenizer = new DictionaryDetokenizer(
          new DetokenizationDictionaryLoader().load(new File(args[0])));

      ObjectStream<String> tokenizedLineStream =
        new PlainTextByLineStream(new InputStreamReader(System.in));

      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
      perfMon.start();

      try {
        String tokenizedLine;
        while ((tokenizedLine = tokenizedLineStream.read()) != null) {

          // white space tokenize line
          String tokens[] = WhitespaceTokenizer.INSTANCE.tokenize(tokenizedLine);

          System.out.println(detokenizer.detokenize(tokens, null));
View Full Code Here

      DoccatModel model = new DoccatModelLoader().load(new File(args[0]));

      DocumentCategorizerME doccat = new DocumentCategorizerME(model);

      ObjectStream<String> documentStream = new ParagraphStream(
          new PlainTextByLineStream(new InputStreamReader(System.in)));

      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc");
      perfMon.start();

      try {
View Full Code Here

      throws IOException {
    InputStream in = ChunkerFactoryTest.class.getClassLoader()
        .getResourceAsStream("opennlp/tools/chunker/test.txt");
    Reader sentences = new InputStreamReader(in);

    ChunkSampleStream stream = new ChunkSampleStream(new PlainTextByLineStream(
        sentences));
    return stream;
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.util.PlainTextByLineStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.