Package opennlp.tools.postag

Examples of opennlp.tools.postag.POSSample


    this.detokenizer = detokenizer;
  }
 
  public TokenSample read() throws IOException {
   
    POSSample posSample = samples.read();
   
    TokenSample tokenSample = null;
   
    if (posSample != null ) {
      tokenSample = new TokenSample(detokenizer, posSample.getSentence());
    }
   
    return tokenSample;
  }
View Full Code Here


      for(Parse tagNode : parse.getTagNodes()) {
        sentence.add(tagNode.getCoveredText());
        tags.add(tagNode.getType());
      }
     
      return new POSSample(sentence, tags);
    }
    else {
      return null;
    }
  }
View Full Code Here

        while ((line = lineStream.read()) != null) {

          String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
          String[] tags = tagger.tag(whitespaceTokenizerLine);

          POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
          System.out.println(sample.toString());

          perfMon.incrementCounter();
        }
      }
      catch (IOException e) {
View Full Code Here

        new PlainTextByLineStream(
            ADParagraphStreamTest.class
                .getResourceAsStream("/opennlp/tools/formats/ad.sample"),
            "UTF-8"), false, false);

    POSSample sample = stream.read();
   
    assertEquals(23, sample.getSentence().length);
   
    assertEquals("Inicia", sample.getSentence()[0]);
    assertEquals("v-fin", sample.getTags()[0]);
   
    assertEquals("em", sample.getSentence()[1]);
    assertEquals("prp", sample.getTags()[1]);
   
    assertEquals("o", sample.getSentence()[2]);
    assertEquals("art", sample.getTags()[2]);
   
    assertEquals("Porto_Poesia", sample.getSentence()[9]);
    assertEquals("prop", sample.getTags()[9]);
  }
View Full Code Here

        new PlainTextByLineStream(
            ADParagraphStreamTest.class
                .getResourceAsStream("/opennlp/tools/formats/ad.sample"),
            "UTF-8"), true, false);

    POSSample sample = stream.read();
   
    assertEquals(27, sample.getSentence().length);
   
    assertEquals("Inicia", sample.getSentence()[0]);
    assertEquals("v-fin", sample.getTags()[0]);
   
    assertEquals("em", sample.getSentence()[1]);
    assertEquals("prp", sample.getTags()[1]);
   
    assertEquals("o", sample.getSentence()[2]);
    assertEquals("art", sample.getTags()[2]);
   
    assertEquals("Porto", sample.getSentence()[9]);
    assertEquals("B-prop", sample.getTags()[9]);
   
    assertEquals("Poesia", sample.getSentence()[10]);
    assertEquals("I-prop", sample.getTags()[10]);
  }
View Full Code Here

        new PlainTextByLineStream(
            ADParagraphStreamTest.class
                .getResourceAsStream("/opennlp/tools/formats/ad.sample"),
            "UTF-8"), false, true);

    POSSample sample = stream.read();
   
    assertEquals(23, sample.getSentence().length);
   
    assertEquals("Inicia", sample.getSentence()[0]);
    assertEquals("v-fin=PR=3S=IND=VFIN", sample.getTags()[0]);
   
    assertEquals("em", sample.getSentence()[1]);
    assertEquals("prp", sample.getTags()[1]);
   
    assertEquals("o", sample.getSentence()[2]);
    assertEquals("art=DET=M=S", sample.getTags()[2]);
   
    assertEquals("Porto_Poesia", sample.getSentence()[9]);
    assertEquals("prop=M=S", sample.getTags()[9]);
  }
View Full Code Here

      try {
        String line;
        while ((line = lineStream.read()) != null) {

          POSSample posSample;
          try {
            posSample = POSSample.parse(line);
          } catch (InvalidFormatException e) {
            System.err.println("Invalid format:");
            System.err.println(line);
            continue;
          }

          String[] chunks = chunker.chunk(posSample.getSentence(),
              posSample.getTags());

          System.out.println(new ChunkSample(posSample.getSentence(),
              posSample.getTags(), chunks).nicePrint());

          perfMon.incrementCounter();
        }
      }
      catch (IOException e) {
View Full Code Here

      Node root = paragraph.getRoot();
      List<String> sentence = new ArrayList<String>();
      List<String> tags = new ArrayList<String>();
      process(root, sentence, tags);

      return new POSSample(sentence, tags);
    }
    return null;
  }
View Full Code Here

    // One paragraph contains a whole sentence and, the token
    // and tag will be read from the FORM and POSTAG field.
   
   String paragraph = samples.read();
  
   POSSample sample = null;
  
   if (paragraph != null) {
    
     // paragraph get lines
     BufferedReader reader = new BufferedReader(new StringReader(paragraph));
    
     List<String> tokens = new ArrayList<String>(100);
     List<String> tags = new ArrayList<String>(100);
    
     String line;
     while ((line = reader.readLine())  != null) {
    
       final int minNumberOfFields = 5;
      
       String parts[] = line.split("\t");
      
       if (parts.length >= minNumberOfFields) {
         tokens.add(parts[1]);
         tags.add(parts[4]);
       }
       else {
         throw new InvalidFormatException("Every non-empty line must have at least " +
             minNumberOfFields + " fields: '" + line + "'!");
       }
     }
    
     // just skip empty samples and read next sample
     if (tokens.size() == 0)
       sample = read();
      
     sample = new POSSample(tokens.toArray(new String[tokens.size()]), tags.toArray(new String[tags.size()]));
   }
  
   return sample;
  }
View Full Code Here

    InputStreamFactory in = new ResourceAsStreamFactory(ConllXPOSSampleStreamTest.class,
        "/opennlp/tools/formats/conllx.sample");

    ObjectStream<POSSample> sampleStream = new ConllXPOSSampleStream(in,Charset.forName("UTF-8"));

    POSSample a = sampleStream.read();

    String aSentence[] = a.getSentence();
    String aTags[] = a.getTags();

    assertEquals(22, aSentence.length);
    assertEquals(22, aTags.length);

    assertEquals("To", aSentence[0]);
    assertEquals("AC", aTags[0]);

    assertEquals("kendte", aSentence[1]);
    assertEquals("AN", aTags[1]);

    assertEquals("russiske", aSentence[2]);
    assertEquals("AN", aTags[2]);

    assertEquals("historikere", aSentence[3]);
    assertEquals("NC", aTags[3]);

    assertEquals("Andronik", aSentence[4]);
    assertEquals("NP", aTags[4]);

    assertEquals("Andronik", aSentence[5]);
    assertEquals("NP", aTags[5]);

    assertEquals("og", aSentence[6]);
    assertEquals("CC", aTags[6]);

    assertEquals("Igor", aSentence[7]);
    assertEquals("NP", aTags[7]);

    assertEquals("Klamkin", aSentence[8]);
    assertEquals("NP", aTags[8]);

    assertEquals("tror", aSentence[9]);
    assertEquals("VA", aTags[9]);

    assertEquals("ikke", aSentence[10]);
    assertEquals("RG", aTags[10]);

    assertEquals(",", aSentence[11]);
    assertEquals("XP", aTags[11]);

    assertEquals("at", aSentence[12]);
    assertEquals("CS", aTags[12]);

    assertEquals("Rusland", aSentence[13]);
    assertEquals("NP", aTags[13]);

    assertEquals("kan", aSentence[14]);
    assertEquals("VA", aTags[14]);

    assertEquals("udvikles", aSentence[15]);
    assertEquals("VA", aTags[15]);

    assertEquals("uden", aSentence[16]);
    assertEquals("SP", aTags[16]);

    assertEquals("en", aSentence[17]);
    assertEquals("PI", aTags[17]);

    assertEquals("\"", aSentence[18]);
    assertEquals("XP", aTags[18]);

    assertEquals("jernnæve", aSentence[19]);
    assertEquals("NC", aTags[19]);

    assertEquals("\"", aSentence[20]);
    assertEquals("XP", aTags[20]);

    assertEquals(".", aSentence[21]);
    assertEquals("XP", aTags[21]);

    POSSample b = sampleStream.read();

    String bSentence[] = b.getSentence();
    String bTags[] = b.getTags();

    assertEquals(12, bSentence.length);
    assertEquals(12, bTags.length);

    assertEquals("De", bSentence[0]);
View Full Code Here

TOP

Related Classes of opennlp.tools.postag.POSSample

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.