Package opennlp.tools.sentdetect.lang

Examples of opennlp.tools.sentdetect.lang.Factory


   * Initializes the current instance.
   *
   * @param model the {@link SentenceModel}
   */
  public SentenceDetectorME(SentenceModel model) {
    this(model, new Factory());
  }
View Full Code Here


  public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples,
      boolean useTokenEnd, Dictionary abbreviations, TrainingParameters mlParams) throws IOException {
   
    Map<String, String> manifestInfoEntries = new HashMap<String, String>();
   
    Factory factory = new Factory();
   
    // TODO: Fix the EventStream to throw exceptions when training goes wrong
    EventStream eventStream = new SDEventStream(samples,
        factory.createSentenceContextGenerator(languageCode, getAbbreviations(abbreviations)),
        factory.createEndOfSentenceScanner(languageCode));
   
    AbstractModel sentModel = TrainUtil.train(eventStream, mlParams.getSettings(), manifestInfoEntries);
   
    return new SentenceModel(languageCode, sentModel,
        useTokenEnd, abbreviations, manifestInfoEntries);
View Full Code Here

        new Span(0, 15), new Span(16, 29));
   
    ObjectStream<SentenceSample> sampleStream =
      ObjectStreamUtils.createObjectStream(sample);
   
    Factory factory = new Factory();
   
    EventStream eventStream = new SDEventStream(sampleStream,
        factory.createSentenceContextGenerator("en"),
        factory.createEndOfSentenceScanner("en"));
   
    assertTrue(eventStream.hasNext());
    assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.next().getOutcome());
   
    assertTrue(eventStream.hasNext());
View Full Code Here

        if (prop != null) {
          this.eosCharacters = eosStringToCharArray(prop);
        }
      } else {
        // get from language dependent factory
        Factory f = new Factory();
        this.eosCharacters = f.getEOSCharacters(languageCode);
      }
    }
    return this.eosCharacters;
  }
View Full Code Here

    }
    return this.languageCode;
  }

  public EndOfSentenceScanner getEndOfSentenceScanner() {
    Factory f = new Factory();
    char[] eosChars = getEOSCharacters();
    if (eosChars != null && eosChars.length > 0) {
      return f.createEndOfSentenceScanner(eosChars);
    } else {
      return f.createEndOfSentenceScanner(this.languageCode);
    }
  }
View Full Code Here

      return f.createEndOfSentenceScanner(this.languageCode);
    }
  }

  public SDContextGenerator getSDContextGenerator() {
    Factory f = new Factory();
    char[] eosChars = getEOSCharacters();
    Set<String> abbs = null;
    Dictionary abbDict = getAbbreviationDictionary();
    if (abbDict != null) {
      abbs = abbDict.asStringSet();
    } else {
      abbs = Collections.emptySet();
    }
    if (eosChars != null && eosChars.length > 0) {
      return f.createSentenceContextGenerator(abbs, eosChars);
    } else {
      return f.createSentenceContextGenerator(this.languageCode, abbs);
    }
  }
View Full Code Here

        boolean useTokenEnd, Dictionary abbreviations, int cutoff, int iterations) throws IOException {

      Map<String, String> manifestInfoEntries = new HashMap<String, String>();
      ModelUtil.addCutoffAndIterations(manifestInfoEntries, cutoff, iterations);
     
      Factory factory = new Factory();

      // TODO: Fix the EventStream to throw exceptions when training goes wrong
      EventStream eventStream = new SDEventStream(samples,
          factory.createSentenceContextGenerator(languageCode),
          factory.createEndOfSentenceScanner(languageCode));
     
      HashSumEventStream hses = new HashSumEventStream(eventStream);
      GISModel sentModel = GIS.trainModel(hses, iterations, cutoff);

      manifestInfoEntries.put(BaseModel.TRAINING_EVENTHASH_PROPERTY,
View Full Code Here

        new Span(0, 15), new Span(16, 29));

    ObjectStream<SentenceSample> sampleStream =
      ObjectStreamUtils.createObjectStream(sample);

    Factory factory = new Factory();

    ObjectStream<Event> eventStream = new SDEventStream(sampleStream,
        factory.createSentenceContextGenerator("en"),
        factory.createEndOfSentenceScanner("en"));

    assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome());
    assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome());
    assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome());
    assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome());
View Full Code Here

        if (prop != null) {
          this.eosCharacters = eosStringToCharArray(prop);
        }
      } else {
        // get from language dependent factory
        Factory f = new Factory();
        this.eosCharacters = f.getEOSCharacters(languageCode);
      }
    }
    return this.eosCharacters;
  }
View Full Code Here

    }
    return this.languageCode;
  }

  public EndOfSentenceScanner getEndOfSentenceScanner() {
    Factory f = new Factory();
    char[] eosChars = getEOSCharacters();
    if (eosChars != null && eosChars.length > 0) {
      return f.createEndOfSentenceScanner(eosChars);
    } else {
      return f.createEndOfSentenceScanner(this.languageCode);
    }
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.sentdetect.lang.Factory

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.