Examples of opennlp.tools.sentdetect.SentenceDetector

opennlp.tools.sentdetect.SentenceDetector
The interface for sentence detectors, which find the sentence boundaries in a text.

    //<start id="openSentDetect"/>
    //... Setup the models
    File modelFile = new File(modelDir, "en-sent.bin");
    InputStream modelStream = new FileInputStream(modelFile);
    SentenceModel model = new SentenceModel(modelStream);
    SentenceDetector detector = //<co id="openSentDetect.co.detect"/>
      new SentenceDetectorME(model);
    String testString = "This is a sentence. It has fruits, vegetables," +
      " etc. but does not have meat. Mr. Smith went to Washington.";
    String[] result = detector.sentDetect(testString); //<co id="openSentDetect.co.run"/>
    for (int i = 0; i < result.length; i++) {
      System.out.println("Sentence: " + result[i]);
    }
    /*<calloutlist>
        <callout arearefs="openSentDetect.co.detect"><para>Create the <command>SentenceDetector</command> with the en-sent.bin model</para></callout>

View Full Code Here

            this.text = text;
            this.language = language;
            if(text == null || text.isEmpty()){
                sentenceSpans = new Span[]{};
            } else {
                SentenceDetector sd = getSentenceDetector();
                if(sd != null){
                    sentenceSpans = sd.sentPosDetect(text);
                } else {
                    sentenceSpans = new Span[]{new Span(0, text.length())};
                }
            }
        }

View Full Code Here

    
    @Test
    public void testLoadEnSentence() throws IOException{
        SentenceModel model = openNLP.getSentenceModel("en");
        Assert.assertNotNull(model);
        SentenceDetector sentDetector = openNLP.getSentenceDetector("en");
        Assert.assertNotNull(sentDetector);
    }

View Full Code Here

    }
    @Test
    public void testLoadMissingSentence() throws IOException{
        SentenceModel model = openNLP.getSentenceModel("ru");
        Assert.assertNull(model);
        SentenceDetector sentDetector = openNLP.getSentenceDetector("ru");
        Assert.assertNull(sentDetector);
    }

View Full Code Here

     */
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        AnalysedText at = initAnalysedText(this,analysedTextFactory,ci);
        String language = getLanguage(this, ci, true);
        SentenceDetector sentenceDetector = getSentenceDetector(language);
        if(sentenceDetector != null){
            for(opennlp.tools.util.Span sentSpan : sentenceDetector.sentPosDetect(at.getSpan())) {
                //detect sentences and add it to the AnalyzedText.
                Sentence sentence = at.addSentence(sentSpan.getStart(), sentSpan.getEnd());
                log.trace(" > add {}",sentence);
            }
        } else {

View Full Code Here

    protected Collection<String> extractCandidates(String htmlBlock) {


        String content = HtmlUtils.removeHtml(htmlBlock);
        String[] lines = content.split("\n");
        List<String> results = new ArrayList<String>();
        SentenceDetector detector = getSentenceDetector();
        for (String line : lines) {
            line = line.trim();
            for (String sent : detector.sentDetect(line)) {
                if (!sent.trim().equals("")) {
                    results.add(sent);
                }
            }
        }

View Full Code Here

     */
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        AnalysedText at = initAnalysedText(this,analysedTextFactory,ci);
        String language = getLanguage(this, ci, true);
        SentenceDetector sentenceDetector = getSentenceDetector(language);
        if(sentenceDetector != null){
            for(opennlp.tools.util.Span sentSpan : sentenceDetector.sentPosDetect(at.getSpan())) {
                //detect sentences and add it to the AnalyzedText.
                Sentence sentence = at.addSentence(sentSpan.getStart(), sentSpan.getEnd());
                log.trace(" > add {}",sentence);
            }
        } else {

View Full Code Here

          params.getBratDataDir(), params.getRecursive(), null);
    } catch (IOException e) {
      throw new TerminateToolException(-1, e.getMessage());
    }


    SentenceDetector sentDetector;


    if (params.getSentenceDetectorModel() != null) {
      try {
        sentDetector = new SentenceDetectorME(new SentenceModel(params.getSentenceDetectorModel()));
      } catch (IOException e) {

View Full Code Here

            new Object [] { contentPart.getKey(),ci.getUri().getUnicodeString(), 
                            language, StringUtils.abbreviate(text, 100) });
        
        //first get the models
        Tokenizer tokenizer = initTokenizer(language);
        SentenceDetector sentenceDetector = initSentence(language);
        POSTaggerME posTagger;
        if(sentenceDetector != null){ //sentence detection is requirement
            posTagger = initTagger(language);
        } else {
            posTagger = null;
        }
        ChunkerME chunker;
        if(posTagger != null && useChunker ){ //pos tags requirement
            chunker = initChunker(language);
        } else {
            chunker = null;
        }
        Map<String,Suggestion> suggestionCache = new TreeMap<String,Suggestion>();
        if(sentenceDetector != null){
            //add dots for multiple line breaks
            text = text.replaceAll("\\n\\n", ".\n");
            Span[] sentenceSpans = sentenceDetector.sentPosDetect(text);
            for (int i = 0; i < sentenceSpans.length; i++) {
                String sentence = sentenceSpans[i].getCoveredText(text).toString();
                Span[] tokenSpans = tokenizer.tokenizePos(sentence);
                String[] tokens = getTokensForSpans(sentence, tokenSpans);
                String[] pos;

View Full Code Here

    /**
     * @param language
     * @return
     */
    private SentenceDetector initSentence(String language) {
        SentenceDetector sentDetect;
        try {
            SentenceModel sentModel = openNLP.getSentenceModel(language);
            if(sentModel != null){
                sentDetect = new SentenceDetectorME(sentModel);
            } else {

View Full Code Here

0 1

TOP

Related Classes of opennlp.tools.sentdetect.SentenceDetector

com.tamingtext.sentences.SentenceDetectionTest

edu.washington.cs.knowitall.extractor.HtmlSentenceExtractor

opennlp.tools.formats.brat.BratNameSampleStreamFactory

org.apache.commons.opennlp.OpenNLPTest

org.apache.stanbol.commons.opennlp.TextAnalyzer$TextAnalysisIterator

org.apache.stanbol.enhancer.engines.opennlp.pos.services.OpenNlpPosTaggingEngine

org.apache.stanbol.enhancer.engines.opennlp.sentence.impl.OpenNlpSentenceDetectionEngine

org.apache.stanbol.enhancer.engines.taxonomy.impl.TaxonomyLinkingEngine

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.