Package opennlp.tools.dictionary

Examples of opennlp.tools.dictionary.Dictionary


  @Override
  protected ParserModel trainAndUpdate(ParserModel originalModel,
      ObjectStream<Parse> parseSamples, ModelUpdaterParams parameters)
      throws IOException {
   
      Dictionary mdict = ParserTrainerTool.buildDictionary(parseSamples, originalModel.getHeadRules(), parameters.getCutoff());
     
      parseSamples.reset();
     
      // TODO: Maybe that should be part of the ChunkingParser ...
      // Training build
View Full Code Here


      mlParams.put(TrainingParameters.CUTOFF_PARAM,
          Integer.toString(params.getCutoff()));
    }

    try {
      Dictionary abbreviations = SentenceDetectorTrainerTool.loadDict(params
          .getAbbDict());
      validator = new SDCrossValidator(params.getLang(), mlParams,
          abbreviations, errorListener);
     
      validator.evaluate(sampleStream, params.getFolds());
View Full Code Here

    return new SentenceSampleStream(lineStream);
  }
 
  static Dictionary loadDict(File f) throws IOException {
    Dictionary dict = null;
    if (f != null) {
      CmdLineUtil.checkInputFile("abb dict", f);
      dict = new Dictionary(new FileInputStream(f));
    }
    return dict;
  }
View Full Code Here

    ObjectStream<SentenceSample> sampleStream =
        openSampleData("Training", trainingDataInFile, params.getEncoding());

    SentenceModel model;
    try {
      Dictionary dict = loadDict(params.getAbbDict());
      if (mlParams == null) {
        model = SentenceDetectorME.train(params.getLang(), sampleStream, true, dict,
            params.getCutoff(), params.getIterations());
      }
      else {
View Full Code Here

    CmdLineUtil.checkOutputFile("pos tagger model", modelOutFile);
    ObjectStream<POSSample> sampleStream = openSampleData("Training", trainingDataInFile,
        params.getEncoding());
   
   
    Dictionary ngramDict = null;
   
    Integer ngramCutoff = params.getNgram();
   
    if (ngramCutoff != null) {
      System.err.print("Building ngram dictionary ... ");
View Full Code Here

  @Override
  protected ParserModel trainAndUpdate(ParserModel originalModel,
      ObjectStream<Parse> parseSamples, ModelUpdaterParams parameters)
      throws IOException {
   
      Dictionary mdict = ParserTrainerTool.buildDictionary(parseSamples, originalModel.getHeadRules(), parameters.getCutoff());
     
      parseSamples.reset();
     
      // TODO: training individual models should be in the chunking parser, not here
      // Training build
View Full Code Here

  }
 
  static Dictionary buildDictionary(ObjectStream<Parse> parseSamples, HeadRules headRules, int cutoff) {
    System.err.print("Building dictionary ...");
   
    Dictionary mdict;
    try {
      mdict = Parser.
          buildDictionary(parseSamples, headRules, cutoff);
    } catch (IOException e) {
      System.err.println("Error while building dictionary: " + e.getMessage());
View Full Code Here

          if (dict == null) {
            ss = new POSSampleSequenceStream(new WordTagSampleStream(
                new InputStreamReader(new FileInputStream(inFile))));
          }
          else {
            POSContextGenerator cg = new DefaultPOSContextGenerator(new Dictionary(new FileInputStream(dict)));

            ss = new POSSampleSequenceStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile)))),
                cg);
          }
        }
        else {
          if (dict == null) {

            ss = new POSSampleSequenceStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile), encoding))));
          }
          else {
            POSContextGenerator cg = new DefaultPOSContextGenerator(new Dictionary(new FileInputStream(dict)));

            ss = new POSSampleSequenceStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile), encoding))), cg);
          }
        }
        mod = new SimplePerceptronSequenceTrainer().trainModel(iterations, ss, cutoff, true);
        System.out.println("Saving the model as: " + outFile);
        new SuffixSensitivePerceptronModelWriter(mod, outFile).persist();
      }
      else {
        POSSampleEventStream es;
        if (encoding == null) {
          if (dict == null) {
            es = new POSSampleEventStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile)))));
          }
          else {
            POSContextGenerator cg = new DefaultPOSContextGenerator(new Dictionary(new FileInputStream(dict)));

            es = new POSSampleEventStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile)))),
                cg);
          }
        }
        else {
          if (dict == null) {

            es = new POSSampleEventStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile), encoding))));
          }
          else {
            POSContextGenerator cg = new DefaultPOSContextGenerator(new Dictionary(new FileInputStream(dict)));

            es = new POSSampleEventStream(new WordTagSampleStream((
                new InputStreamReader(new FileInputStream(inFile), encoding))), cg);
          }
        }
View Full Code Here

    }

    System.out.println("Saving the dictionary");

    ngramModel.cutoff(cutoff, Integer.MAX_VALUE);
    Dictionary dictionary = ngramModel.toDictionary(true);

    dictionary.serialize(new FileOutputStream(dict));
  }
View Full Code Here

        System.exit(1);
      }
      ai++;
    }
    HeadRules rules = new opennlp.tools.parser.lang.en.HeadRules(args[ai++]);
    Dictionary dict = null;
    if (ai < args.length) {
      dict = new Dictionary(new FileInputStream(args[ai++]),true);
    }
    if (fun) {
      Parse.useFunctionTags(true);
    }
    opennlp.model.EventStream es = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.InputStreamReader(System.in))), rules, etype, dict);
View Full Code Here

TOP

Related Classes of opennlp.tools.dictionary.Dictionary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.