Package com.tamingtext.sentences

Source Code of com.tamingtext.sentences.SentenceDetectionTest

/*
* Copyright 2008-2011 Grant Ingersoll, Thomas Morton and Drew Farris
*
*    Licensed under the Apache License, Version 2.0 (the "License");
*    you may not use this file except in compliance with the License.
*    You may obtain a copy of the License at
*
*        http://www.apache.org/licenses/LICENSE-2.0
*
*    Unless required by applicable law or agreed to in writing, software
*    distributed under the License is distributed on an "AS IS" BASIS,
*    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*    See the License for the specific language governing permissions and
*    limitations under the License.
* -------------------
* To purchase or learn more about Taming Text, by Grant Ingersoll, Thomas Morton and Drew Farris, visit
* http://www.manning.com/ingersoll
*/

package com.tamingtext.sentences;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;

import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;

import org.junit.Test;

import com.tamingtext.TamingTextTestJ4;

public class SentenceDetectionTest extends TamingTextTestJ4 {

  @Test
  public void testBreakIterator() {
    //<start id="sentDetect"/>
    BreakIterator sentIterator = BreakIterator.getSentenceInstance(Locale.US);
    String testString = "This is a sentence.  It has fruits, vegetables," +
            " etc. but does not have meat.  Mr. Smith went to Washington.";
    sentIterator.setText(testString);
    int start = sentIterator.first();
    int end = -1;
    List<String> sentences = new ArrayList<String>();
    while ((end = sentIterator.next()) != BreakIterator.DONE) {
      String sentence = testString.substring(start, end);
      start = end;
      sentences.add(sentence);
      System.out.println("Sentence: " + sentence);
    }
    //<end id="sentDetect"/>
  }
  @Test
  public void testOpenNLP() throws Exception {

    File modelDir = getModelDir();

    //<start id="openSentDetect"/>
    //... Setup the models
    File modelFile = new File(modelDir, "en-sent.bin");
    InputStream modelStream = new FileInputStream(modelFile);
    SentenceModel model = new SentenceModel(modelStream);
    SentenceDetector detector = //<co id="openSentDetect.co.detect"/>
      new SentenceDetectorME(model);
    String testString = "This is a sentence. It has fruits, vegetables," +
      " etc. but does not have meat. Mr. Smith went to Washington.";
    String[] result = detector.sentDetect(testString); //<co id="openSentDetect.co.run"/>
    for (int i = 0; i < result.length; i++) {
      System.out.println("Sentence: " + result[i]);
    }
    /*<calloutlist>
        <callout arearefs="openSentDetect.co.detect"><para>Create the <command>SentenceDetector</command> with the en-sent.bin model</para></callout>
        <callout arearefs="openSentDetect.co.run"><para>Invoke the detection process</para></callout>
    </calloutlist>*/
    //<end id="openSentDetect"/>
    assertTrue("result Size: " + result.length + " is not: " + 3, result.length == 3);
  }

}
TOP

Related Classes of com.tamingtext.sentences.SentenceDetectionTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.