Examples of edu.stanford.nlp.io.PrintFile

edu.stanford.nlp.io.PrintFile
Shorthand class for opening an output file for human-readable output. com:bruceeckel:tools:PrintFile.java

  private void test(int format, String saveRoot, String tagSeparator, String encoding) throws IOException {
    if ((format == 1)) {
      test(saveRoot, tagSeparator, encoding); //the data is tagged
    } else {
      BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(filename), encoding));
      PrintFile pf = null;
      PrintFile pf1 = null;
      if (writeWords) pf = new PrintFile(saveRoot + ".words");
      if (writeUnknDict) pf1 = new PrintFile(saveRoot + ".un.dict");


      for (String s; (s = in.readLine()) != null; ) {
        Sentence<Word> sent = Sentence.toSentence(Arrays.asList(s.split("\\s+")));
        ts.tagSentence(sent);
        if (pf != null) {
          pf.println(ts.getTaggedNice());
        }
      }


      in.close();
      if (pf != null) pf.close();
      if (pf1 != null) pf1.close();
    }
  }

View Full Code Here

   */
  private void test(TaggerConfig config, String saveRoot) throws IOException {
    numSentences = 0;
    String eosTag = "EOS";
    String eosWord = "EOS";
    PrintFile pf = null;
    PrintFile pf1 = null;
    PrintFile pf3 = null;


    if(writeWords) pf = new PrintFile(saveRoot + ".words");
    if(writeUnknDict) pf1 = new PrintFile(saveRoot + ".un.dict");
    if(writeTopWords) pf3 = new PrintFile(saveRoot + ".words.top");
    TreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
    DiskTreebank treebank = new DiskTreebank(trf,config.getEncoding());
    TreeTransformer transformer = config.getTreeTransformer();
    TreeNormalizer normalizer = config.getTreeNormalizer();


    if (config.getTreeRange() != null) {
      treebank.loadPath(filename, new NumberRangesFileFilter(config.getTreeRange(), true));
    } else {
      treebank.loadPath(filename);
    }
    for (Tree t : treebank) {
      if (normalizer != null) {
        t = normalizer.normalizeWholeTree(t, t.treeFactory());
      }
      if (transformer != null) {
        t = t.transform(transformer);
      }


      List<String> sentence = new ArrayList<String>();
      List<String> tagsArr = new ArrayList<String>();


      for (TaggedWord cur : t.taggedYield()) {
        tagsArr.add(cur.tag());
        sentence.add(cur.word());
      }
      //the sentence is read already, add eos
      sentence.add(eosWord);
      tagsArr.add(eosTag);
      numSentences++;


      int len = sentence.size();
      String[] testSent = new String[len];
      String[] correctTags = new String[len];
      for (int i = 0; i < len; i++) {
        testSent[i] = sentence.get(i);
        correctTags[i] = tagsArr.get(i);
      }


      TestSentence testS = new TestSentence(GlobalHolder.getLambdaSolve(), testSent, correctTags, pf, wrongWords);
      if (writeUnknDict) testS.printUnknown(numSentences, pf1);
      if (writeTopWords) testS.printTop(pf3);


      numWrong = numWrong + testS.numWrong;
      numRight = numRight + testS.numRight;
      unknownWords = unknownWords + testS.numUnknown;
      numWrongUnknown = numWrongUnknown + testS.numWrongUnknown;
      if (testS.numWrong == 0) {
        numCorrectSentences++;
      }
      System.out.println("Sentence number: " + numSentences + "; length " + (len-1) + "; correct: " + testS.numRight + "; wrong: " + testS.numWrong + "; unknown wrong: " + testS.numWrongUnknown);
      System.out.println("  Total tags correct: " + numRight + "; wrong: " + numWrong + "; unknown wrong: " + numWrongUnknown);
    }


    if(pf != null) pf.close();
    if(pf1 != null) pf1.close();
    if(pf3 != null) pf3.close();
  }

View Full Code Here

   */
  private void test(String saveRoot, String tagSeparator, String encoding) throws IOException {
    numSentences = 0;
    String eosTag = "EOS";
    String eosWord = "EOS";
    PrintFile pf = null;
    PrintFile pf1 = null;
    PrintFile pf3 = null;


    BufferedReader rf = new BufferedReader(new InputStreamReader(new FileInputStream(filename), encoding));
    if (writeWords) pf = new PrintFile(saveRoot + ".words");
    if (writeUnknDict) pf1 = new PrintFile(saveRoot + ".un.dict");
    if (writeTopWords) pf3 = new PrintFile(saveRoot + ".words.top");


    for (String s; (s = rf.readLine()) != null; ) {
      List<String> sentence = new ArrayList<String>();
      List<String> tagsArr = new ArrayList<String>();
      StringTokenizer st = new StringTokenizer(s);
      while (st.hasMoreTokens()) { // find the sentence there


        String token = st.nextToken();
        int index = token.lastIndexOf(tagSeparator);


        if (index == -1) {
          throw new RuntimeException("I was unable to find the delimiter '" + tagSeparator + "' in the token '" + token + "'. Consider using -delimiter.");
        }


        String w1 = token.substring(0, index);
        sentence.add(w1);
        String t1 = token.substring(index + 1);
        tagsArr.add(t1);
      }


      //the sentence is read already, add eos
      sentence.add(eosWord);
      tagsArr.add(eosTag);
      numSentences++;


      int len = sentence.size();
      String[] testSent = new String[len];
      String[] correctTags = new String[len];
      for (int i = 0; i < len; i++) {
        testSent[i] = sentence.get(i);
        correctTags[i] = tagsArr.get(i);
      }


      TestSentence testS = new TestSentence(GlobalHolder.getLambdaSolve(), testSent, correctTags, pf, wrongWords);
      if(writeUnknDict) testS.printUnknown(numSentences, pf1);
      if(writeTopWords) testS.printTop(pf3);


      numWrong = numWrong + testS.numWrong;
      numRight = numRight + testS.numRight;
      unknownWords = unknownWords + testS.numUnknown;
      numWrongUnknown = numWrongUnknown + testS.numWrongUnknown;
      if (testS.numWrong == 0) {
        numCorrectSentences++;
      }
      System.out.println("Sentence number: " + numSentences + "; length " + (len-1) + "; correct: " + testS.numRight + "; wrong: " + testS.numWrong + "; unknown wrong: " + testS.numWrongUnknown);
      System.out.println("  Total tags correct: " + numRight + "; wrong: " + numWrong + "; unknown wrong: " + numWrongUnknown);
    }


    rf.close();
    if (pf != null) pf.close();
    if (pf1 != null) pf1.close();
    if (pf3 != null) pf3.close();
  }

View Full Code Here

0 1

TOP

Related Classes of edu.stanford.nlp.io.PrintFile

edu.stanford.nlp.maxent.iis.LambdaSolve

edu.stanford.nlp.maxent.Problem

edu.stanford.nlp.tagger.maxent.GlobalHolder

edu.stanford.nlp.tagger.maxent.MaxentTagger

edu.stanford.nlp.tagger.maxent.TestClassifier

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.