String paragraph = "can you find a plain-text file that is called \u201Cpippo\u201D ?";
int numParses = 5;
// the sentence detector and tokenizer constructors
// take paths to their respective models
SentenceDetectorME sdetector = new SentenceDetectorME(
new SentenceModel(new FileInputStream(
"models/en-sent.bin")));
Tokenizer tokenizer = new TokenizerME(new TokenizerModel(
new FileInputStream("models/en-token.bin")));
// the parser takes the path to the parser models
// directory and a few other options
/*
* boolean useTagDict = true; boolean useCaseInsensitiveTagDict = false;
* int beamSize = opennlp.tools.parser.chunking.Parser.defaultBeamSize;
* double advancePercentage =
* opennlp.tools.parser.chunking.Parser.defaultAdvancePercentage;
* opennlp.tools.parser.Parser parser = TreebankParser.getParser(
* "models/parser", useTagDict, useCaseInsensitiveTagDict, beamSize,
* advancePercentage);
*/Parser parser = ParserFactory.create(new ParserModel(
new FileInputStream("models/en-parser-chunking.bin")),
AbstractBottomUpParser.defaultBeamSize,
AbstractBottomUpParser.defaultAdvancePercentage);
// break a paragraph into sentences
String[] sents = sdetector.sentDetect(paragraph.toString());
// TODO handle paragraph (multiple sentences)
String sent = sents[0];
// tokenize brackets and parentheses by putting a space on either side.