Package edu.stanford.nlp.process

Examples of edu.stanford.nlp.process.WordSegmenter


      boolean segmentWords = ctpp.segment;
      boolean parse = lp != null;
      assert (parse || segmentWords);
      //      WordCatConstituent.collinizeWords = argMap.containsKey("-collinizeWords");
      //      WordCatConstituent.collinizeTags = argMap.containsKey("-collinizeTags");
      WordSegmenter seg = null;
      if (segmentWords) {
        seg = (WordSegmenter) lex;
      }
      String[] testArgs = (argMap.get("-test"));
      MemoryTreebank testTreebank = op.tlpParams.memoryTreebank();
      FileFilter testFilt = new NumberRangesFileFilter(testArgs[1], false);
      testTreebank.loadPath(new File(testArgs[0]), testFilt);
      TreeTransformer subcategoryStripper = op.tlpParams.subcategoryStripper();
      TreeTransformer collinizer = ctpp.collinizer();

      WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser();
      WordCatEqualityChecker eqcheck = new WordCatEqualityChecker();
      EquivalenceClassEval basicEval = new EquivalenceClassEval(eqclass, eqcheck, "basic");
      EquivalenceClassEval collinsEval = new EquivalenceClassEval(eqclass, eqcheck, "collinized");
      List<String> evalTypes = new ArrayList<String>(3);
      boolean goodPOS = false;
      if (segmentWords) {
        evalTypes.add(WordCatConstituent.wordType);
        if (ctpp.segmentMarkov && !parse) {
          evalTypes.add(WordCatConstituent.tagType);
          goodPOS = true;
        }
      }
      if (parse) {
        evalTypes.add(WordCatConstituent.tagType);
        evalTypes.add(WordCatConstituent.catType);
        if (combo) {
          evalTypes.add(WordCatConstituent.wordType);
          goodPOS = true;
        }
      }
      TreeToBracketProcessor proc = new TreeToBracketProcessor(evalTypes);

      System.err.println("Testing...");
      for (Tree goldTop : testTreebank) {
        Tree gold = goldTop.firstChild();
        List<HasWord> goldSentence = gold.yieldHasWord();
        if (goldSentence.size() > maxLength) {
          System.err.println("Skipping sentence; too long: " + goldSentence.size());
          continue;
        } else {
          System.err.println("Processing sentence; length: " + goldSentence.size());
        }
        List<HasWord> s;
        if (segmentWords) {
          StringBuilder goldCharBuf = new StringBuilder();
          for (Iterator<HasWord> wordIter = goldSentence.iterator(); wordIter.hasNext();) {
            StringLabel word = (StringLabel) wordIter.next();
            goldCharBuf.append(word.value());
          }
          String goldChars = goldCharBuf.toString();
          s = seg.segment(goldChars);
        } else {
          s = goldSentence;
        }
        Tree tree;
        if (parse) {
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.process.WordSegmenter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.