Package opennlp.tools.doccat

Examples of opennlp.tools.doccat.DoccatModel


    NameFinderFeatureGenerator nffg //<co id="tm.fg"/>
      = new NameFinderFeatureGenerator();
    BagOfWordsFeatureGenerator bowfg
      = new BagOfWordsFeatureGenerator();

    DoccatModel model = DocumentCategorizerME.train("en",
        ds, cutoff, iterations, nffg, bowfg); //<co id="tm.train"/>
    model.serialize(new FileOutputStream(modelFile));
   
/*<calloutlist>
<callout arearefs="tm.tok">Create data stream</callout>
<callout arearefs="tm.fg">Set up features generators</callout>
<callout arearefs="tm.train">Train categorizer</callout> 
View Full Code Here


    BagOfWordsFeatureGenerator bowfg
      = new BagOfWordsFeatureGenerator();

    InputStream modelStream = //<co id="tmx.modelreader"/>
        new FileInputStream(modelFile);
    DoccatModel model = new DoccatModel(modelStream);
    DocumentCategorizer categorizer //<co id="tmx.categorizer"/>
      = new DocumentCategorizerME(model, nffg, bowfg);
    Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
  
    int catCount = categorizer.getNumberOfCategories();
View Full Code Here

        chunker = new ChunkerME(chunkerModel); //<co id="qqpp.chunker"/>
        InputStream posStream = new FileInputStream(
            new File(modelsDir,"en-pos-maxent.bin"));
        POSModel posModel = new POSModel(posStream);
        tagger =  new POSTaggerME(posModel); //<co id="qqpp.tagger"/>
        model = new DoccatModel(new FileInputStream( //<co id="qqpp.theModel"/>
            new File(modelDirectory,"en-answer.bin")))
            .getChunkerModel();
        probs = new double[model.getNumOutcomes()];
        atcg = new AnswerTypeContextGenerator(
                new File(wordnetDirectory, "dict"));//<co id="qqpp.context"/>
View Full Code Here

    AnswerTypeContextGenerator actg = new AnswerTypeContextGenerator(new File(wordnetDir));
    //<start id="atc.train"/>
    AnswerTypeEventStream es = new AnswerTypeEventStream(trainFile,
            actg, parser);
    GISModel model = GIS.trainModel(100, new TwoPassDataIndexer(es, 3));//<co id="atc.train.do"/>
    new DoccatModel("en", model).serialize(new FileOutputStream(outFile));
    /*
    <calloutlist>
        <callout arearefs="atc.train.do"><para>Using the event stream, which feeds us training examples, do the actual training using OpenNLP's Maxent classifier.</para></callout>
    </calloutlist>
    */
 
View Full Code Here

  public void collectionProcessComplete(ProcessTrace trace)
      throws ResourceProcessException, IOException {
   
    GIS.PRINT_MESSAGES = false;

    DoccatModel categoryModel = DocumentCategorizerME.train(language, ObjectStreamUtils.createObjectStream(documentSamples));
   
    File modelFile = new File(getUimaContextAdmin().getResourceManager()
        .getDataPath() + File.separatorChar + mModelName);

    OpennlpUtil.serialize(categoryModel, modelFile);
View Full Code Here

    if (mLogger.isLoggable(Level.INFO)) {
      mLogger.log(Level.INFO, "Initializing the OpenNLP Categorizer.");
    }

    DoccatModel model;

    try {
      DoccatModelResource modelResource = (DoccatModelResource) context
          .getResourceObject(UimaUtil.MODEL_PARAMETER);
View Full Code Here

    return model;
  }

  @Override
  protected DoccatModel loadModel(InputStream in) throws IOException {
    return new DoccatModel(in);
  }
View Full Code Here

  public void collectionProcessComplete(ProcessTrace trace)
      throws ResourceProcessException, IOException {
   
    GIS.PRINT_MESSAGES = false;

    DoccatModel categoryModel = DocumentCategorizerME.train(language, ObjectStreamUtils.createObjectStream(documentSamples));
   
    File modelFile = new File(getUimaContextAdmin().getResourceManager()
        .getDataPath() + File.separatorChar + mModelName);

    OpennlpUtil.serialize(categoryModel, modelFile);
View Full Code Here

    if (mLogger.isLoggable(Level.INFO)) {
      mLogger.log(Level.INFO, "Initializing the OpenNLP Categorizer.");
    }

    DoccatModel model;

    try {
      DoccatModelResource modelResource = (DoccatModelResource) context
          .getResourceObject(UimaUtil.MODEL_PARAMETER);
View Full Code Here

    return model;
  }

  @Override
  protected DoccatModel loadModel(InputStream in) throws IOException {
    return new DoccatModel(in);
  }
View Full Code Here

TOP

Related Classes of opennlp.tools.doccat.DoccatModel

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.