Package opennlp.tools.dictionary

Examples of opennlp.tools.dictionary.Dictionary$StringListWrapper


        System.exit(1);
      }
      ai++;
    }
    HeadRules rules = new opennlp.tools.parser.lang.en.HeadRules(args[ai++]);
    Dictionary dict = null;
    if (ai < args.length) {
      dict = new Dictionary(new FileInputStream(args[ai++]),true);
    }
    if (fun) {
      Parse.useFunctionTags(true);
    }
    opennlp.model.EventStream es = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.InputStreamReader(System.in))), rules, etype, dict);
View Full Code Here


      System.exit(1);
    }
    ParserEventTypeEnum etype = null;
    boolean fun = false;
    int ai = 0;
    Dictionary dict = null;
    AbstractModel model = null;

    while (ai < args.length && args[ai].startsWith("-")) {
      if (args[ai].equals("-build")) {
        etype = ParserEventTypeEnum.BUILD;
      }
      else if (args[ai].equals("-attach")) {
        etype = ParserEventTypeEnum.ATTACH;
      }
      else if (args[ai].equals("-chunk")) {
        etype = ParserEventTypeEnum.CHUNK;
      }
      else if (args[ai].equals("-check")) {
        etype = ParserEventTypeEnum.CHECK;
      }
      else if (args[ai].equals("-tag")) {
        etype = ParserEventTypeEnum.TAG;
      }
      else if (args[ai].equals("-fun")) {
        fun = true;
      }
      else if (args[ai].equals("-dict")) {
        ai++;
        dict = new Dictionary(new FileInputStream(args[ai]));
      }
      else if (args[ai].equals("-model")) {
        ai++;
        model = (new SuffixSensitiveGISModelReader(new File(args[ai]))).getModel();
      }
View Full Code Here

      System.exit(1);
    }

    int ai = 0;

    Dictionary abbreviations = null;
    if ("-abbreviationsDictionary".equals(args[ai])) {
      ai++;
      abbreviations = new Dictionary(new FileInputStream(args[ai++]));
    }

    boolean useTokenEnd = false;
    if ("-useTokenEnd".equals(args[ai])) {
      useTokenEnd = true;
View Full Code Here

  public SDContextGenerator getSDContextGenerator() {
    Factory f = new Factory();
    char[] eosChars = getEOSCharacters();
    Set<String> abbs = null;
    Dictionary abbDict = getAbbreviationDictionary();
    if (abbDict != null) {
      abbs = abbDict.asStringSet();
    } else {
      abbs = Collections.emptySet();
    }
    if (eosChars != null && eosChars.length > 0) {
      return f.createSentenceContextGenerator(abbs, eosChars);
View Full Code Here

    return new BinaryGISModelReader(new DataInputStream(new GZIPInputStream(
        new FileInputStream(fileName)))).getModel();
  }

  private static Dictionary readNames(String nameFile) throws IOException {
    Dictionary names = new Dictionary();

    BufferedReader nameReader = new BufferedReader(new FileReader(nameFile));
    for (String line = nameReader.readLine(); line != null; line = nameReader.readLine()) {
      names.put(new StringList(line));
    }

    return names;
  }
View Full Code Here

      mlParams.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(iters));
      mlParams.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(cut));

      // Abbreviations dictionary
      // TODO: Actually import a Dictionary of abbreviations
      Dictionary dict = new Dictionary();

      try {
        mod = SentenceDetectorME.train("en", sampleStream, true, dict, mlParams);
      } finally {
        sampleStream.close();
View Full Code Here

  public static ParserModel train(String languageCode, ObjectStream<Parse> parseSamples, HeadRules rules, int iterations, int cut)
      throws IOException {
   
    System.err.println("Building dictionary");
    Dictionary mdict = buildDictionary(parseSamples, rules, cut);
   
    parseSamples.reset();
   
    Map<String, String> manifestInfoEntries = new HashMap<String, String>();
    ModelUtil.addCutoffAndIterations(manifestInfoEntries, cut, iterations);
View Full Code Here

    }
   
    if (dict || all) {
      System.err.println("Building dictionary");
      ObjectStream<Parse> data = new ParseSampleStream(new PlainTextByLineStream(new FileReader(inFile)));
      Dictionary mdict = buildDictionary(data, rules, cutoff);
      System.out.println("Saving the dictionary");
      mdict.serialize(new FileOutputStream(dictFile));
    }
   
    if (tag || all) {
      System.err.println("Training tagger");
      ObjectStream<POSSample> tes = new PosSampleStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.FileReader(inFile))));
      POSModel posModel = POSTaggerME.train("en", tes, ModelType.MAXENT, null, null, cutoff, 100);
      System.out.println("Saving the tagger model as: " + tagFile);
      OutputStream posOutputStream = new FileOutputStream(tagFile);
      posModel.serialize(posOutputStream);
      posOutputStream.close();
    }

    if (chunk || all) {
      System.err.println("Training chunker");
      ObjectStream<ChunkSample> ces = new ChunkSampleStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.FileReader(inFile))));
      ChunkerModel chunkModel = ChunkerME.train("en", ces, cutoff, iterations,
          new ChunkContextGenerator());
      System.out.println("Saving the chunker model as: " + chunkFile);
      OutputStream chunkOutputStream = new FileOutputStream(chunkFile);
      chunkModel.serialize(chunkOutputStream);
      chunkOutputStream.close();
    }

    if (build || all) {
      System.err.println("Loading Dictionary");
      Dictionary tridict = new Dictionary(new FileInputStream(dictFile.toString()),true);
      System.err.println("Training builder");
      opennlp.model.EventStream bes = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.FileReader(inFile))), rules, ParserEventTypeEnum.BUILD,tridict);
      AbstractModel buildModel = train(bes, iterations, cutoff);
      System.out.println("Saving the build model as: " + buildFile);
      new opennlp.maxent.io.SuffixSensitiveGISModelWriter(buildModel, buildFile).persist();
View Full Code Here

        System.exit(1);
      }
      ai++;
    }
    HeadRules rules = new opennlp.tools.parser.lang.en.HeadRules(args[ai++]);
    Dictionary dict = null;
    if (ai < args.length) {
      dict = new Dictionary(new FileInputStream(args[ai++]),true);
    }
    if (fun) {
      Parse.useFunctionTags(true);
    }
    opennlp.model.EventStream es = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.InputStreamReader(System.in))), rules, etype, dict);
View Full Code Here

    public DummyDictionary(Dictionary dict) {
      this.indict = dict;
    }

    public DummyDictionary(InputStream in) throws IOException {
      this.indict = new Dictionary(in);
    }
View Full Code Here

TOP

Related Classes of opennlp.tools.dictionary.Dictionary$StringListWrapper

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.