Package opennlp.tools.namefind

Examples of opennlp.tools.namefind.NameFinderME$NameFinderSequenceValidator


     * Name Finder and Tokenizer.
     *
     * @throws IOException
     */
    public ApacheExtractor() throws IOException {
        nameFinder = new NameFinderME(new TokenNameFinderModel(ApacheExtractor.class.getResourceAsStream(pathToNERModel)));
        tokenizer = new TokenizerME(new TokenizerModel(ApacheExtractor.class.getResourceAsStream(pathToTokenizerModel)));
        sentenceDetector = new SentenceDetectorME(new SentenceModel(ApacheExtractor.class.getResourceAsStream(pathToSentenceDetectorModel)));
    }
View Full Code Here


            try {
                model = new TokenNameFinderModel(finderModelInputStream);
            } finally {
                finderModelInputStream.close();
            }
            NameFinderME finder = new NameFinderME(model);
            finders.add(finder);
        }

        return finders;
    }
View Full Code Here

    File modelDir = getModelDir();
    
    finder = new NameFinderME[modelName.length];
    for (int i=0; i < modelName.length; i++) {
      finder[i] = new NameFinderME(new TokenNameFinderModel(
          new FileInputStream(
              new File(modelDir, "en-ner-" + modelName[i] + ".bin")
              )));
    }
View Full Code Here

              "said Joanne " +
      "Drake, chief of staff for the Reagan Foundation."};
    NameFinderME[] finders = new NameFinderME[3];
    String[] names = {"person", "location", "date"};
    for (int mi = 0; mi < names.length; mi++) {  //<co id="co.opennlp.name.1"/>
      finders[mi] = new NameFinderME(new TokenNameFinderModel(
          new FileInputStream(
              new File(modelDir, "en-ner-" + names[mi] + ".bin")
          )));
    }
View Full Code Here

   
    File outFile = new File(destDir,"multi-custom.bin");
    FileOutputStream outFileStream = new FileOutputStream(outFile);
    model.serialize(outFileStream);
   
    NameFinderME nameFinder = new NameFinderME(model);
   
    String[] tokens =
        (" Britney Spears was reunited with her sons Saturday .")
        .split("\\s+");
    Span[] names = nameFinder.find(tokens);
    displayNames(names, tokens);
    //<end id="ne-namesample-type"/>
   
    assertEquals("person", names[0].getType());
    assertEquals("date", names[1].getType());
View Full Code Here

    //String[] names = {"person"};
    //String[] names = {"date","location","money","organization","percentage","person","time"};
    String[] names = {"person","location","date"};
    NameFinderME[] finders = new NameFinderME[names.length];
    for (int mi = 0; mi < names.length; mi++) {
      finders[mi] = new NameFinderME(new TokenNameFinderModel(
          new FileInputStream(
              new File(modelDir, "en-ner-" + names[mi] + ".bin")
              )));
    }
    memStatus.dumpMemory("after non-pooled model load of " + Arrays.toString(names));
View Full Code Here

    //String[] names = {"date","location","money","organization","percentage","person","time"};
    //<start id="ne-pool"/>
    String[] names = {"person","location","date"};
    NameFinderME[] finders = new NameFinderME[names.length];
    for (int mi = 0; mi < names.length; mi++) { //<co id="co.opennlp.name.init4"/>
      finders[mi] = new NameFinderME(
        new PooledTokenNameFinderModel( //<co id="co.opennlp.name.pool"/>
          new FileInputStream(
              new File(modelDir, "en-ner-"
                      + names[mi] + ".bin"))));
    }
View Full Code Here

   <callout arearefs="co.opennlp.name.persist2"><para>Save the model to a file.</para></callout>
   </calloutlist>*/
    //<end id="ne-features-train"/>
   
    //<start id="ne-features-test"/>
    NameFinderME finder = new NameFinderME(
        new TokenNameFinderModel(
            new FileInputStream(
                new File(destDir, "person-custom2.bin")
                )), featureGenerators, NameFinderME.DEFAULT_BEAM_SIZE);
    //<end id="ne-features-test"/>
View Full Code Here

      "The 86-year-old Reagan will remain overnight for " +
      "observation at a hospital in Santa Monica, California, " +
              "said Joanne " +
      "Drake, chief of staff for the Reagan Foundation."};
   
    NameFinderME finder = new NameFinderME//<co id="co.opennlp.name.initmodel"/>
      new TokenNameFinderModel(new FileInputStream(getPersonModel()))
    );
   
    Tokenizer tokenizer = SimpleTokenizer.INSTANCE; //<co id="co.opennlp.name.inittokenizer2"/>
   
    for (int si = 0; si < sentences.length; si++) {
      String[] tokens = tokenizer.tokenize(sentences[si]); //<co id="co.opennlp.name.tokenize2"/>
      Span[] names = finder.find(tokens); //<co id="co.opennlp.name.findnames3"/>
      displayNames(names, tokens);
    }
   
    finder.clearAdaptiveData(); //<co id="co.opennlp.name.clear"/>
    /*<calloutlist>
    <callout arearefs="co.opennlp.name.initmodel">
      <para>Initialize a new model for identifying people names based on the
        binary compressed model in the file "en-ner-person.bin".</para>
    </callout>
    <callout arearefs="co.opennlp.name.inittokenizer2">
      <para>Initialize a tokenizer to split the sentence into individual words
        and symbols.</para>
    </callout>
    <callout arearefs="co.opennlp.name.tokenize2">
      <para>Split the sentence into an array of tokens.</para>
    </callout>
    <callout arearefs="co.opennlp.name.findnames3">
      <para>Identify the names in the sentence and return token-based offsets
      to these names.</para>
    </callout>
    <callout arearefs="co.opennlp.name.clear">
      <para>Clear data structures that store which words have been seen
      previously in the document and whether these words were considered part
      of a person's name.</para>
    </callout>   
    </calloutlist>*/
    //<end id="ne-setup"/>

    //<start id="ne-display2"/>
    for (int si = 0; si < sentences.length; si++) { //<co id="co.opennlp.name.eachsent2"/>
      Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]); //<co id="co.opennlp.name.tokenizepos"/>
      String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]); //<co id="co.opennlp.name.convert2strings"/>
      Span[] names = finder.find(tokens); //<co id="co.opennlp.name.findnames4"/>

      for (int ni = 0; ni < names.length; ni++) {
        Span startSpan = tokenSpans[names[ni].getStart()]; //<co id="co.opennlp.name.computestart"/>
        int nameStart  = startSpan.getStart();
       
        Span endSpan   = tokenSpans[names[ni].getEnd() - 1]; //<co id="co.opennlp.name.computeend"/>
        int nameEnd    = endSpan.getEnd();
       
        String name = sentences[si].substring(nameStart, nameEnd); //<co id="co.opennlp.name.namestring"/>
        System.out.println(name);
      }
    }
    /*<calloutlist>
    <callout arearefs="co.opennlp.name.eachsent2">
      <para>Iterate over each sentence.</para>
    </callout>
    <callout arearefs="co.opennlp.name.tokenizepos">
      <para>Split the sentence into an array of tokens and return the
        character offsets (spans) of those tokens.</para>
    </callout>
    <callout arearefs="co.opennlp.name.findnames4">
      <para>
      Identify the names in the sentence and return token-based offsets to these names.
      </para>
    </callout>
    <callout arearefs="co.opennlp.name.computestart">
      <para>
      Compute the start character index of the name.
      </para>
    </callout>   
    <callout arearefs="co.opennlp.name.computeend">
      <para>
      Compute the end character index (last character +1) of the name.
      </para>
    </callout>
    <callout arearefs="co.opennlp.name.computeend">
      <para>
      Compute the string which represents the name.
      </para>
    </callout>
    </calloutlist>*/
    //<end id="ne-display2"/>
    //<start id="ne-prob"/>
    for (int si = 0; si < sentences.length; si++) {//<co id="co.opennlp.name.eachsent3"/>
      String[] tokens = tokenizer.tokenize(sentences[si]); //<co id="co.opennlp.name.tokenize3"/>
      Span[] names = finder.find(tokens); //<co id="co.opennlp.name.findnames1"/>
      double[] spanProbs = finder.probs(names); //<co id="co.opennlp.name.probs"/>
    }
    /*<calloutlist>
    <callout arearefs="co.opennlp.name.eachsent3"><para>Iterate over each sentence.</para></callout>
    <callout arearefs="co.opennlp.name.tokenize3"><para>Split the sentence into an array of tokens.</para></callout>
    <callout arearefs="co.opennlp.name.findnames1"><para>Identify the names in the sentence and return token-based offsets to these names.</para></callout>
View Full Code Here

    File modelFile = new File(models, "en-sent.bin");
    InputStream modelStream = new FileInputStream(modelFile);
    SentenceModel model = new SentenceModel(modelStream);
    sentenceDetector = new SentenceDetectorME(model);
    finders = new HashMap<String, NameFinderME>();
    finders.put("Names", new NameFinderME(new TokenNameFinderModel(
            new FileInputStream(getPersonModel()))));
    finders.put("Dates", new NameFinderME(new TokenNameFinderModel(
            new FileInputStream(getDateModel()))));
    finders.put("Locations", new NameFinderME(new TokenNameFinderModel(
            new FileInputStream(getLocationModel()))));

    tokenizer = SimpleTokenizer.INSTANCE;
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.namefind.NameFinderME$NameFinderSequenceValidator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.