String intext = text.text();
//System.out.println("\n\nRR- nextractNPNGrams(...) method called! with text: " + intext + "\n\n");
List<SurfaceFormOccurrence> npNgramSFLst = new ArrayList<SurfaceFormOccurrence>();
SentenceDetectorME sentenceDetector = new SentenceDetectorME((SentenceModel)sentenceModel);
TokenizerME tokenizer = new TokenizerME((TokenizerModel)tokenModel);
POSTaggerME posTagger = new POSTaggerME((POSModel)posModel);
ChunkerME chunker = new ChunkerME((ChunkerModel)chunkModel);
Span[] sentSpans = sentenceDetector.sentPosDetect(intext);
for (Span sentSpan : sentSpans) {
String sentence = sentSpan.getCoveredText(intext).toString();
int start = sentSpan.getStart();
Span[] tokSpans = tokenizer.tokenizePos(sentence);
String[] tokens = new String[tokSpans.length];
// System.out.println("\n\nTokens:");
for (int i = 0; i < tokens.length; i++) {
tokens[i] = tokSpans[i].getCoveredText(sentence).toString();
// System.out.println(tokens[i]);
}
String[] tags = posTagger.tag(tokens);
Span[] chunks = chunker.chunkAsSpans(tokens, tags);
for (Span chunk : chunks) {
if ("NP".equals(chunk.getType())) {
//Note: getStart()/getEnd() methods of Chunk spans only give the start and end token indexes of the chunk.
//The actual Start/End positions of the chunk in the sentence need to be extracted from POS sentenceSpans.