"The 86-year-old Reagan will remain overnight for " +
"observation at a hospital in Santa Monica, California, " +
"said Joanne " +
"Drake, chief of staff for the Reagan Foundation."};
NameFinderME finder = new NameFinderME( //<co id="co.opennlp.name.initmodel"/>
new TokenNameFinderModel(new FileInputStream(getPersonModel()))
);
Tokenizer tokenizer = SimpleTokenizer.INSTANCE; //<co id="co.opennlp.name.inittokenizer2"/>
for (int si = 0; si < sentences.length; si++) {
String[] tokens = tokenizer.tokenize(sentences[si]); //<co id="co.opennlp.name.tokenize2"/>
Span[] names = finder.find(tokens); //<co id="co.opennlp.name.findnames3"/>
displayNames(names, tokens);
}
finder.clearAdaptiveData(); //<co id="co.opennlp.name.clear"/>
/*<calloutlist>
<callout arearefs="co.opennlp.name.initmodel">
<para>Initialize a new model for identifying people names based on the
binary compressed model in the file "en-ner-person.bin".</para>
</callout>
<callout arearefs="co.opennlp.name.inittokenizer2">
<para>Initialize a tokenizer to split the sentence into individual words
and symbols.</para>
</callout>
<callout arearefs="co.opennlp.name.tokenize2">
<para>Split the sentence into an array of tokens.</para>
</callout>
<callout arearefs="co.opennlp.name.findnames3">
<para>Identify the names in the sentence and return token-based offsets
to these names.</para>
</callout>
<callout arearefs="co.opennlp.name.clear">
<para>Clear data structures that store which words have been seen
previously in the document and whether these words were considered part
of a person's name.</para>
</callout>
</calloutlist>*/
//<end id="ne-setup"/>
//<start id="ne-display2"/>
for (int si = 0; si < sentences.length; si++) { //<co id="co.opennlp.name.eachsent2"/>
Span[] tokenSpans = tokenizer.tokenizePos(sentences[si]); //<co id="co.opennlp.name.tokenizepos"/>
String[] tokens = Span.spansToStrings(tokenSpans, sentences[si]); //<co id="co.opennlp.name.convert2strings"/>
Span[] names = finder.find(tokens); //<co id="co.opennlp.name.findnames4"/>
for (int ni = 0; ni < names.length; ni++) {
Span startSpan = tokenSpans[names[ni].getStart()]; //<co id="co.opennlp.name.computestart"/>
int nameStart = startSpan.getStart();
Span endSpan = tokenSpans[names[ni].getEnd() - 1]; //<co id="co.opennlp.name.computeend"/>
int nameEnd = endSpan.getEnd();
String name = sentences[si].substring(nameStart, nameEnd); //<co id="co.opennlp.name.namestring"/>
System.out.println(name);
}
}
/*<calloutlist>
<callout arearefs="co.opennlp.name.eachsent2">
<para>Iterate over each sentence.</para>
</callout>
<callout arearefs="co.opennlp.name.tokenizepos">
<para>Split the sentence into an array of tokens and return the
character offsets (spans) of those tokens.</para>
</callout>
<callout arearefs="co.opennlp.name.findnames4">
<para>
Identify the names in the sentence and return token-based offsets to these names.
</para>
</callout>
<callout arearefs="co.opennlp.name.computestart">
<para>
Compute the start character index of the name.
</para>
</callout>
<callout arearefs="co.opennlp.name.computeend">
<para>
Compute the end character index (last character +1) of the name.
</para>
</callout>
<callout arearefs="co.opennlp.name.computeend">
<para>
Compute the string which represents the name.
</para>
</callout>
</calloutlist>*/
//<end id="ne-display2"/>
//<start id="ne-prob"/>
for (int si = 0; si < sentences.length; si++) {//<co id="co.opennlp.name.eachsent3"/>
String[] tokens = tokenizer.tokenize(sentences[si]); //<co id="co.opennlp.name.tokenize3"/>
Span[] names = finder.find(tokens); //<co id="co.opennlp.name.findnames1"/>
double[] spanProbs = finder.probs(names); //<co id="co.opennlp.name.probs"/>
}
/*<calloutlist>
<callout arearefs="co.opennlp.name.eachsent3"><para>Iterate over each sentence.</para></callout>
<callout arearefs="co.opennlp.name.tokenize3"><para>Split the sentence into an array of tokens.</para></callout>
<callout arearefs="co.opennlp.name.findnames1"><para>Identify the names in the sentence and return token-based offsets to these names.</para></callout>