Examples of opennlp.tools.namefind.NameFinderME

opennlp.tools.namefind.NameFinderME
Class for creating a maximum-entropy-based name finder.


        SentenceDetectorME sentenceDetector = new SentenceDetectorME(getSentenceModel("en"));


        Span[] sentenceSpans = sentenceDetector.sentPosDetect(textWithDots);


        NameFinderME finder = new NameFinderME(nameFinderModel);
        Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
        Map<String,List<NameOccurrence>> nameOccurrences = new LinkedHashMap<String,List<NameOccurrence>>();
        for (int i = 0; i < sentenceSpans.length; i++) {
            String sentence = sentenceSpans[i].getCoveredText(text).toString().trim();


            // build a context by concatenating three sentences to be used for
            // similarity ranking / disambiguation + contextual snippet in the
            // extraction structure
            List<String> contextElements = new ArrayList<String>();
            if (i > 0) {
                CharSequence previousSentence = sentenceSpans[i - 1].getCoveredText(text);
                contextElements.add(previousSentence.toString().trim());
            }
            contextElements.add(sentence.toString().trim());
            if (i + 1 < sentenceSpans.length) {
                CharSequence nextSentence = sentenceSpans[i + 1].getCoveredText(text);
                contextElements.add(nextSentence.toString().trim());
            }
            String context = StringUtils.join(contextElements, " ");


            // extract the names in the current sentence and
            // keep them store them with the current context
            Span[] tokenSpans = tokenizer.tokenizePos(sentence);
            String[] tokens = Span.spansToStrings(tokenSpans, sentence);
            Span[] nameSpans = finder.find(tokens);
            double[] probs = finder.probs();
            String[] names = Span.spansToStrings(nameSpans, tokens);
            //int lastStartPosition = 0;
            for (int j = 0; j < names.length; j++) {
                String name = names[j];
                Double confidence = 1.0;
                for (int k = nameSpans[j].getStart(); k < nameSpans[j].getEnd(); k++) {
                    confidence *= probs[k];
                }
                int start = tokenSpans[nameSpans[j].getStart()].getStart();
                int absoluteStart = sentenceSpans[i].getStart() + start;
                int absoluteEnd = absoluteStart + name.length();
                NameOccurrence occurrence = new NameOccurrence(name, absoluteStart, absoluteEnd, context,
                        confidence);


                List<NameOccurrence> occurrences = nameOccurrences.get(name);
                if (occurrences == null) {
                    occurrences = new ArrayList<NameOccurrence>();
                }
                occurrences.add(occurrence);
                nameOccurrences.put(name, occurrences);
            }
        }
        finder.clearAdaptiveData();
        log.debug("{} name occurrences found: {}", nameOccurrences.size(), nameOccurrences);
        return nameOccurrences;
    }

View Full Code Here

    }


    TokenNameFinderModel model = new TokenNameFinderModelLoader().load(new File(CmdLineUtil.getParameter("-model", args)));


    opennlp.tools.namefind.TokenNameFinderEvaluator evaluator = new opennlp.tools.namefind.TokenNameFinderEvaluator(
        new NameFinderME(model));


    final ObjectStream<NameSample> sampleStream = TokenNameFinderTrainerTool.openSampleData("Test",
        testData, encoding);


    final PerformanceMonitor monitor = new PerformanceMonitor("sent");

View Full Code Here

   * Name Finder and Tokenizer.
   * 
   * @throws IOException 
   */
  public ApacheExtractor() throws IOException {
    nameFinder = new NameFinderME(new TokenNameFinderModel(ApacheExtractor.class.getResourceAsStream(pathToNERModel)));
    tokenizer = new TokenizerME(new TokenizerModel(ApacheExtractor.class.getResourceAsStream(pathToTokenizerModel)));
        sentenceDetector = new SentenceDetectorME(new SentenceModel(ApacheExtractor.class.getResourceAsStream(pathToSentenceDetectorModel)));
  }

View Full Code Here

0 1 2 3

TOP

Related Classes of opennlp.tools.namefind.NameFinderME

com.bericotech.clavin.extractor.ApacheExtractor

com.tamingtext.frankenstein.Frankenstein

com.tamingtext.opennlp.NameFinderTest

com.tamingtext.texttamer.solr.NameFilterTest

com.tamingtext.util.NameFinderFactory

gate.opennlp.OpenNLPNameFin$NameFinder

io.lumify.opennlpme.OpenNLPMaximumEntropyExtractorGraphPropertyWorker

opennlp.maxent.EventStream

opennlp.maxent.GISModel

opennlp.maxent.io.SuffixSensitiveGISModelWriter

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.