Package opennlp.tools.tokenize

Examples of opennlp.tools.tokenize.TokenizerModel


     *
     * @throws IOException
     */
    public ApacheExtractor() throws IOException {
        nameFinder = new NameFinderME(new TokenNameFinderModel(ApacheExtractor.class.getResourceAsStream(pathToNERModel)));
        tokenizer = new TokenizerME(new TokenizerModel(ApacheExtractor.class.getResourceAsStream(pathToTokenizerModel)));
        sentenceDetector = new SentenceDetectorME(new SentenceModel(ApacheExtractor.class.getResourceAsStream(pathToSentenceDetectorModel)));
    }
View Full Code Here


    }

    protected Tokenizer loadTokenizer(String pathPrefix, FileSystem fs) throws IOException {
        Path tokenizerHdfsPath = new Path(pathPrefix + "/en-token.bin");

        TokenizerModel tokenizerModel = null;
        InputStream tokenizerModelInputStream = fs.open(tokenizerHdfsPath);
        try {
            tokenizerModel = new TokenizerModel(tokenizerModelInputStream);
        } finally {
            tokenizerModelInputStream.close();
        }

        return new TokenizerME(tokenizerModel);
View Full Code Here

    }

    protected Tokenizer loadTokenizer(String pathPrefix, FileSystem fs) throws IOException {
        Path tokenizerHdfsPath = new Path(pathPrefix + "/en-token.bin");

        TokenizerModel tokenizerModel = null;
        InputStream tokenizerModelInputStream = fs.open(tokenizerHdfsPath);
        try {
            tokenizerModel = new TokenizerModel(tokenizerModelInputStream);
        } finally {
            tokenizerModelInputStream.close();
        }

        return new TokenizerME(tokenizerModel);
View Full Code Here

    // if trace file
    // serialize events ...
   
    InputStream additionalTrainingDataIn = null;
    Writer samplesOut = null;
    TokenizerModel tokenModel;
   
    try {
      if (additionalTrainingDataFile != null) {
       
        if (mLogger.isLoggable(Level.INFO)) {
View Full Code Here

    return model;
  }

  @Override
  protected TokenizerModel loadModel(InputStream in) throws IOException {
    return new TokenizerModel(in);
  }
View Full Code Here

  public void initialize(UimaContext context)
      throws ResourceInitializationException {

    super.initialize(context);

    TokenizerModel model;

    try {
      TokenizerModelResource modelResource = (TokenizerModelResource) context
          .getResourceObject(UimaUtil.MODEL_PARAMETER);
View Full Code Here

        trainingDataInFile, params.getEncoding());
   
    if(mlParams == null)
      mlParams = createTrainingParameters(params.getIterations(), params.getCutoff());

    TokenizerModel model;
    try {
      Dictionary dict = loadDict(params.getAbbDict());
      model = opennlp.tools.tokenize.TokenizerME.train(params.getLang(),
          sampleStream, dict, params.getAlphaNumOpt(), mlParams);
    } catch (IOException e) {
View Full Code Here

  }
 
  @Override
  protected TokenizerModel loadModel(InputStream modelIn) throws IOException,
      InvalidFormatException {
    return new TokenizerModel(modelIn);
  }
View Full Code Here

  public void initialize(UimaContext context)
      throws ResourceInitializationException {

    super.initialize(context);

    TokenizerModel model;

    try {
      TokenizerModelResource modelResource = (TokenizerModelResource) context
          .getResourceObject(UimaUtil.MODEL_PARAMETER);
View Full Code Here

    GIS.PRINT_MESSAGES = false;
  
    ObjectStream<TokenSample> samples = ObjectStreamUtils.createObjectStream(tokenSamples);
   
    InputStream additionalTrainingDataIn = null;
    TokenizerModel tokenModel;
   
    try {
      if (additionalTrainingDataFile != null) {
       
        if (mLogger.isLoggable(Level.INFO)) {
View Full Code Here

TOP

Related Classes of opennlp.tools.tokenize.TokenizerModel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.