Package opennlp.tools.sentdetect

Examples of opennlp.tools.sentdetect.SentenceModel


    logger.info("Training new model from " + inFile.getAbsolutePath());
    logger.info("Using " + numEosc + " end of sentence characters.");


    Charset charset = Charset.forName("UTF-8");
    SentenceModel mod = null;
   
    try(FileInputStream inStream = new FileInputStream(inFile)){
      ObjectStream<String> lineStream = new PlainTextByLineStream(inStream, charset);
      ObjectStream<SentenceSample> sampleStream = new SentenceSampleStream(lineStream);

      // Training Parameters
      TrainingParameters mlParams = new TrainingParameters();
      mlParams.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT");
      mlParams.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(iters));
      mlParams.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(cut));

      // Abbreviations dictionary
      // TODO: Actually import a Dictionary of abbreviations
      Dictionary dict = new Dictionary();

      try {
        mod = SentenceDetectorME.train("en", sampleStream, true, dict, mlParams);
      } finally {
        sampleStream.close();
      }
    }
   
    try(FileOutputStream outStream = new FileOutputStream(outFile)){
      logger.info("Saving the model as: " + outFile.getAbsolutePath());
      mod.serialize(outStream);
    }
  }
View Full Code Here


                getResourceAsStream(chunkerModelFile)));
    }

    public static SentenceDetector getDefaultSentenceDetector()
            throws IOException {
        return new SentenceDetectorME(new SentenceModel(
                getResourceAsStream(sentDetectorModelFile)));
    }
View Full Code Here

      GISModel sentModel = GIS.trainModel(hses, iterations, cutoff);

      manifestInfoEntries.put(BaseModel.TRAINING_EVENTHASH_PROPERTY,
          hses.calculateHashSum().toString(16));
     
      return new SentenceModel(languageCode, sentModel,
          useTokenEnd, abbreviations, manifestInfoEntries);
    }
View Full Code Here

        if ((lang == null) || (encoding == null)) {
          usage();
        }

       
        SentenceModel model = train(lang, new SentenceSampleStream(new PlainTextByLineStream(
            new InputStreamReader(new FileInputStream(inFile), encoding))), true, null, cutoff, iters);

        // TODO: add support for iterations and cutoff settings

//        if (args.length > ai)
//          mod = train(es, Integer.parseInt(args[ai++]), Integer.parseInt(args[ai++]));
//        else
//          mod = train(es, 100, 5);

        System.out.println("Saving the model as: " + outFile);
        model.serialize(new FileOutputStream(outFile));
      }
      catch (Exception e) {
        e.printStackTrace();
      }
    }
View Full Code Here

    String sdModelPath = (String) context
        .getConfigParameterValue(SD_MODEL_FILE_PARAM);
      InputStream is = FileLocator.getAsStream(sdModelPath);
      logger.info("Sentence detector model file: " + sdModelPath);
      sdmodel = new SentenceModel(is);
      is.close();
      EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl();
      char[] eosc = eoss.getEndOfSentenceCharacters();
      // SentenceDContextGenerator cg = new SentenceDContextGenerator();
      DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eosc);
View Full Code Here

                mdl = new POSModel(in);
                LOG.debug("OpenNLP5 POS Model loaded: " + mdl);
                break;
            }
            case SentenceModel: {
                mdl = new SentenceModel(in);
                LOG.debug("OpenNLP5 Sentence Model loaded: " + mdl);
                break;
            }
            case ChunkModel: {
                mdl = new ChunkerModel(in);
View Full Code Here

     * @throws IOException
     */
    public ApacheExtractor() throws IOException {
        nameFinder = new NameFinderME(new TokenNameFinderModel(ApacheExtractor.class.getResourceAsStream(pathToNERModel)));
        tokenizer = new TokenizerME(new TokenizerModel(ApacheExtractor.class.getResourceAsStream(pathToTokenizerModel)));
        sentenceDetector = new SentenceDetectorME(new SentenceModel(ApacheExtractor.class.getResourceAsStream(pathToSentenceDetectorModel)));
    }
View Full Code Here

     * @param language
     * @return
     */
    private SentenceModel getSentenceModel(String language) {
        try {
            SentenceModel model = openNLP.getSentenceModel(language);
            if(model != null){
                return model;
            } else { //fallback to english
                log.info("No sentence detection modle for {}. fallback to English");   
                model = openNLP.getSentenceModel("en");
View Full Code Here

     * @param language the language
     * @return the model of <code>null</code> if non is available or
     * an exception was encountered while loading
     */
    private SentenceDetector getSentenceDetector(String language) {
        SentenceModel model;
        String modelName = languageConfig.getParameter(language, MODEL_NAME_PARAM);
        if(modelName == null){
            try {
                model = openNLP.getSentenceModel(language);
            } catch (Exception e) {
                log.warn("Unable to load default Sentence Detection model for language '"+language+"'!",e);
                return null;
            }
        } else {
            try {
                model = openNLP.getModel(SentenceModel.class, modelName, null);
            } catch (Exception e) {
                log.warn("Unable to load Sentence Detection model for language '"
                        +language+"' from the configured model '"+modelName+"'!",e);
                return null;
            }
        }
        if(model != null) {
            log.debug("Sentence Detection Model {} for lanugage '{}' version: {}",
                new Object[]{model.getClass().getSimpleName(),
                             model.getLanguage(),
                             model.getVersion() != null ? model.getVersion() : "undefined"});
            return new SentenceDetectorME(model);
        }
        log.debug("Sentence Detection Model for Language '{}' not available.", language);
        return null;
    }
View Full Code Here

    if (args.length != 1) {
      System.out.println(getHelp());
    } else {

      SentenceModel model = new SentenceModelLoader().load(new File(args[0]));

      SentenceDetectorME sdetector = new SentenceDetectorME(model);

      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
      perfMon.start();
View Full Code Here

TOP

Related Classes of opennlp.tools.sentdetect.SentenceModel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.