Package opennlp.tools.postag

Examples of opennlp.tools.postag.POSSample


    ADPOSSampleStream stream = new ADPOSSampleStream(
        new PlainTextByLineStream(new ResourceAsStreamFactory(
            ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"),
            "UTF-8"), false, false);

    POSSample sample = stream.read();

    assertEquals(23, sample.getSentence().length);

    assertEquals("Inicia", sample.getSentence()[0]);
    assertEquals("v-fin", sample.getTags()[0]);

    assertEquals("em", sample.getSentence()[1]);
    assertEquals("prp", sample.getTags()[1]);

    assertEquals("o", sample.getSentence()[2]);
    assertEquals("art", sample.getTags()[2]);

    assertEquals("Porto_Poesia", sample.getSentence()[9]);
    assertEquals("prop", sample.getTags()[9]);
  }
View Full Code Here


    ADPOSSampleStream stream = new ADPOSSampleStream(
        new PlainTextByLineStream(new ResourceAsStreamFactory(
            ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"),
            "UTF-8"), true, false);

    POSSample sample = stream.read();

    assertEquals(27, sample.getSentence().length);

    assertEquals("Inicia", sample.getSentence()[0]);
    assertEquals("v-fin", sample.getTags()[0]);

    assertEquals("em", sample.getSentence()[1]);
    assertEquals("prp", sample.getTags()[1]);

    assertEquals("o", sample.getSentence()[2]);
    assertEquals("art", sample.getTags()[2]);

    assertEquals("Porto", sample.getSentence()[9]);
    assertEquals("B-prop", sample.getTags()[9]);

    assertEquals("Poesia", sample.getSentence()[10]);
    assertEquals("I-prop", sample.getTags()[10]);
  }
View Full Code Here

    ADPOSSampleStream stream = new ADPOSSampleStream(
        new PlainTextByLineStream(new ResourceAsStreamFactory(
            ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"),
            "UTF-8"), false, true);

    POSSample sample = stream.read();

    assertEquals(23, sample.getSentence().length);

    assertEquals("Inicia", sample.getSentence()[0]);
    assertEquals("v-fin=PR=3S=IND=VFIN", sample.getTags()[0]);

    assertEquals("em", sample.getSentence()[1]);
    assertEquals("prp", sample.getTags()[1]);

    assertEquals("o", sample.getSentence()[2]);
    assertEquals("art=DET=M=S", sample.getTags()[2]);

    assertEquals("Porto_Poesia", sample.getSentence()[9]);
    assertEquals("prop=M=S", sample.getTags()[9]);
  }
View Full Code Here

        Parse tok = nodes[ti];
        toks[ti] = tok.getCoveredText();
        preds[ti] = tok.getType();
      }

      return new POSSample(toks, preds);
    }
    else {
      return null;
    }
  }
View Full Code Here

        while ((line = lineStream.read()) != null) {

          String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
          String[] tags = tagger.tag(whitespaceTokenizerLine);

          POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
          System.out.println(sample.toString());

          perfMon.incrementCounter();
        }
      } catch (IOException e) {
        CmdLineUtil.handleStdinIoError(e);
View Full Code Here

        lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding());
        perfMon = new PerformanceMonitor(System.err, "sent");
        String line;
        while ((line = lineStream.read()) != null) {

          POSSample posSample;
          try {
            posSample = POSSample.parse(line);
          } catch (InvalidFormatException e) {
            System.err.println("Invalid format:");
            System.err.println(line);
            continue;
          }

          String[] chunks = chunker.chunk(posSample.getSentence(),
                  posSample.getTags());

          System.out.println(new ChunkSample(posSample.getSentence(),
                  posSample.getTags(), chunks).nicePrint());

          perfMon.incrementCounter();
        }
      } catch (IOException e) {
        CmdLineUtil.handleStdinIoError(e);
View Full Code Here

      Node root = paragraph.getRoot();
      List<String> sentence = new ArrayList<String>();
      List<String> tags = new ArrayList<String>();
      process(root, sentence, tags);

      return new POSSample(sentence, tags);
    }
    return null;
  }
View Full Code Here

    this.detokenizer = detokenizer;
  }

  public TokenSample read() throws IOException {

    POSSample posSample = samples.read();

    TokenSample tokenSample = null;

    if (posSample != null ) {
      tokenSample = new TokenSample(detokenizer, posSample.getSentence());
    }

    return tokenSample;
  }
View Full Code Here

      for(Parse tagNode : parse.getTagNodes()) {
        sentence.add(tagNode.getCoveredText());
        tags.add(tagNode.getType());
      }

      return new POSSample(sentence, tags);
    }
    else {
      return null;
    }
  }
View Full Code Here

    // One paragraph contains a whole sentence and, the token
    // and tag will be read from the FORM and POSTAG field.

   String paragraph = samples.read();

   POSSample sample = null;

   if (paragraph != null) {

     // paragraph get lines
     BufferedReader reader = new BufferedReader(new StringReader(paragraph));

     List<String> tokens = new ArrayList<String>(100);
     List<String> tags = new ArrayList<String>(100);

     String line;
     while ((line = reader.readLine())  != null) {

       final int minNumberOfFields = 5;

       String parts[] = line.split("\t");

       if (parts.length >= minNumberOfFields) {
         tokens.add(parts[1]);
         tags.add(parts[4]);
       }
       else {
         throw new InvalidFormatException("Every non-empty line must have at least " +
             minNumberOfFields + " fields: '" + line + "'!");
       }
     }

     // just skip empty samples and read next sample
     if (tokens.size() == 0)
       sample = read();

     sample = new POSSample(tokens.toArray(new String[tokens.size()]), tags.toArray(new String[tags.size()]));
   }

   return sample;
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.postag.POSSample

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.