Package opennlp.tools.postag

Examples of opennlp.tools.postag.POSTaggerME


     * @throws IOException on any error while reading the model data
     */
    public POSTagger getPartOfSpeechTagger(String language) throws IOException {
        POSModel posModel = getPartOfSpeachModel(language);
        if(posModel != null){
            return new POSTaggerME(posModel);
        } else {
            log.debug("No POS Model for language '{}'",language);
            return null;
        }
    }
View Full Code Here


        return new TokenizerME(new TokenizerModel(
                getResourceAsStream(tokenizerModelFile)));
    }

    public static POSTagger getDefaultPosTagger() throws IOException {
        return new POSTaggerME(new POSModel(
                getResourceAsStream(taggerModelFile)));
    }
View Full Code Here

  private int completeIndex;
  private int incompleteIndex;

  public Parser(ParserModel model, int beamSize, double advancePercentage) {
    this(model.getBuildModel(), model.getCheckModel(),
        new POSTaggerME(model.getParserTaggerModel(), 10, 0),
        new ChunkerME(model.getParserChunkerModel(),
            ChunkerME.DEFAULT_BEAM_SIZE,
            new ParserChunkerSequenceValidator(model.getParserChunkerModel()),
            new ChunkContextGenerator(ChunkerME.DEFAULT_BEAM_SIZE)),
            model.getHeadRules(), beamSize, advancePercentage);
View Full Code Here

        String intext = text.text();
    //System.out.println("\n\nRR- nextractNPNGrams(...) method called! with text: " + intext + "\n\n");
    List<SurfaceFormOccurrence> npNgramSFLst = new ArrayList<SurfaceFormOccurrence>();
    SentenceDetectorME  sentenceDetector = new SentenceDetectorME((SentenceModel)sentenceModel);
    TokenizerME tokenizer = new TokenizerME((TokenizerModel)tokenModel);
    POSTaggerME posTagger = new POSTaggerME((POSModel)posModel);
    ChunkerME chunker = new ChunkerME((ChunkerModel)chunkModel);

    Span[] sentSpans = sentenceDetector.sentPosDetect(intext);
    for (Span sentSpan : sentSpans) {
      String sentence = sentSpan.getCoveredText(intext).toString();
      int start = sentSpan.getStart();
      Span[] tokSpans = tokenizer.tokenizePos(sentence);
      String[] tokens = new String[tokSpans.length];
      // System.out.println("\n\nTokens:");
      for (int i = 0; i < tokens.length; i++) {
        tokens[i] = tokSpans[i].getCoveredText(sentence).toString();
        // System.out.println(tokens[i]);
      }
      String[] tags = posTagger.tag(tokens);
      Span[] chunks = chunker.chunkAsSpans(tokens, tags);
      for (Span chunk : chunks) {
        if ("NP".equals(chunk.getType())) {
          //Note: getStart()/getEnd() methods of Chunk spans only give the start and end token indexes of the chunk.
          //The actual Start/End positions of the chunk in the sentence need to be extracted from POS sentenceSpans.
View Full Code Here

    String cleanString = sentence.replace('’', '\'')// this is the type of apostrophe that OpenNLP expects
    return tokenizer.tokenize(cleanString);
  }

  private String[] posTag(String[] tokens) {
    POSTaggerME posTagger = new POSTaggerME(posModel);
    return posTagger.tag(tokens);
  }
View Full Code Here

  private int[] attachments;

  public Parser(ParserModel model, int beamSize, double advancePercentage) {
    this(model.getBuildModel(), model.getAttachModel(), model.getCheckModel(),
        new POSTaggerME(model.getParserTaggerModel()),
        new ChunkerME(model.getParserChunkerModel(),
        ChunkerME.DEFAULT_BEAM_SIZE,
        new ParserChunkerSequenceValidator(model.getParserChunkerModel()),
        new ChunkContextGenerator(ChunkerME.DEFAULT_BEAM_SIZE)),
        model.getHeadRules(),
View Full Code Here

  private int completeIndex;
  private int incompleteIndex;

  public Parser(ParserModel model, int beamSize, double advancePercentage) {
    this(model.getBuildModel(), model.getCheckModel(),
        new POSTaggerME(model.getParserTaggerModel(), 10, 0),
        new ChunkerME(model.getParserChunkerModel(),
            ChunkerME.DEFAULT_BEAM_SIZE,
            new ParserChunkerSequenceValidator(model.getParserChunkerModel()),
            new ChunkContextGenerator(ChunkerME.DEFAULT_BEAM_SIZE)),
            model.getHeadRules(), beamSize, advancePercentage);
View Full Code Here

      System.out.println(getHelp());
    } else {

      POSModel model = new POSModelLoader().load(new File(args[0]));

      POSTaggerME tagger = new POSTaggerME(model);

      ObjectStream<String> lineStream = null;
      PerformanceMonitor perfMon = null;

      try {
        lineStream = new PlainTextByLineStream(new SystemInputStreamFactory(), SystemInputStreamFactory.encoding());
        perfMon = new PerformanceMonitor(System.err, "sent");
        perfMon.start();
        String line;
        while ((line = lineStream.read()) != null) {

          String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
          String[] tags = tagger.tag(whitespaceTokenizerLine);

          POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
          System.out.println(sample.toString());

          perfMon.incrementCounter();
View Full Code Here

        UimaUtil.BEAM_SIZE_PARAMETER);

    if (beamSize == null)
      beamSize = POSTaggerME.DEFAULT_BEAM_SIZE;

    this.posTagger = new POSTaggerME(model, beamSize, 0);
  }
View Full Code Here

                            language, StringUtils.abbreviate(text, 100) });
       
        //first get the models
        Tokenizer tokenizer = initTokenizer(language);
        SentenceDetector sentenceDetector = initSentence(language);
        POSTaggerME posTagger;
        if(sentenceDetector != null){ //sentence detection is requirement
            posTagger = initTagger(language);
        } else {
            posTagger = null;
        }
        ChunkerME chunker;
        if(posTagger != null && useChunker ){ //pos tags requirement
            chunker = initChunker(language);
        } else {
            chunker = null;
        }
        Map<String,Suggestion> suggestionCache = new TreeMap<String,Suggestion>();
        if(sentenceDetector != null){
            //add dots for multiple line breaks
            text = text.replaceAll("\\n\\n", ".\n");
            Span[] sentenceSpans = sentenceDetector.sentPosDetect(text);
            for (int i = 0; i < sentenceSpans.length; i++) {
                String sentence = sentenceSpans[i].getCoveredText(text).toString();
                Span[] tokenSpans = tokenizer.tokenizePos(sentence);
                String[] tokens = getTokensForSpans(sentence, tokenSpans);
                String[] pos;
                double[] posProbs;
                if(posTagger != null){
                    pos = posTagger.tag(tokens);
                    posProbs = posTagger.probs();
                } else {
                    pos = null;
                    posProbs = null;
                }
                Span[] chunkSpans;
View Full Code Here

TOP

Related Classes of opennlp.tools.postag.POSTaggerME

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.