Package syntaxLearner.corpus

Examples of syntaxLearner.corpus.Vocabulary


   *
   */
  public void recordCorpusData (Corpus c, Learner l){
    if (!isRecording) return;
    this.c=c;
    Vocabulary v = c.getVocabulary();
    Set<Map.Entry<String, Integer>> entrySet = v.getWordIndicesEntrySet();
    Set<Map.Entry<Integer, Word>> wordSet = v.getWordEntrySet();
    StringBuilder s = new StringBuilder(v.getNumOfWords()*30);
    corpusFile = new File(mainPath, "corpus_"+name+".js");

    s.append(String.format("corpus_%1$s = \n{\n\tname:\"%1$s\",\n", name));
    s.append(String.format("\ttokenCount: %1$s,\n", c.tokenCount));
    s.append(String.format("\ttypeCount: %1$s,\n", v.getNumOfWords()));
    s.append(String.format("\tcommonTypes: %1$s,\n", (v.getNumOfWords()-v.countWordsBelowThreshold(l.RARE_WORD_THRESHOLD))));
    s.append(String.format("\tclusterCount: %1$s,\n", l.NUMBER_OF_CLUSTERS));
    //TODO add proper number formatters
    s.append(String.format("\tidentityEps: %1$s,\n", l.IDENTITY_EPSILON));
    s.append(String.format("\titerationCount: %1$s,\n", iterationCounter));
    s.append("\ttypeToId: \n\t{\n\t");
    for (Map.Entry<String, Integer> e: entrySet){
      if (v.getWord(e.getValue()).frequency >= l.RARE_WORD_THRESHOLD){ //TODO see if necessary
        s.append(String.format("\t\t\"%1$s\": %2$s,\n", e.getKey(), e.getValue()));
      }
    }
    s.append("},\n");
    s.append("\tidToType: {");
View Full Code Here

TOP

Related Classes of syntaxLearner.corpus.Vocabulary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.