Package edu.stanford.nlp.trees

Examples of edu.stanford.nlp.trees.DiskTreebank$DiskTreebankIterator


    }

    Properties options = StringUtils.argsToProperties(args, argDefs());
    Language language = PropertiesUtils.get(options, "l", Language.English, Language.class);
    TreebankLangParserParams tlpp = Languages.getLanguageParams(language);
    DiskTreebank tb = null;
    String encoding = options.getProperty("l", "UTF-8");
    boolean removeBracket = PropertiesUtils.getBool(options, "b", false);
   
    tlpp.setInputEncoding(encoding);
    tlpp.setOutputEncoding(encoding);
    tb = tlpp.diskTreebank();

    String[] files = options.getProperty("", "").split("\\s+");
    if (files.length != 0) {
      for (String filename : files) {
        tb.loadPath(filename);
      }
    } else {
      System.err.println(usage());
      System.exit(-1);
    }
View Full Code Here


  public ATBArabicDataset() {
    super();

    //Read the raw file as UTF-8 irrespective of output encoding
    treebank = new DiskTreebank(new ArabicTreeReaderFactory.ArabicRawTreeReaderFactory(true), "UTF-8");
  }
View Full Code Here

      System.out.println(usage.toString());
      System.exit(-1);
    }

    TreebankLangParserParams tlpp = new EnglishTreebankParserParams();
    DiskTreebank tb = null;
    String encoding = "UTF-8";
    TregexPattern rootMatch = null;
   
    for(int i = 0; i < args.length; i++) {
      if(args[i].startsWith("-")) {
        switch (args[i]) {
          case "-l":
            Language lang = Language.valueOf(args[++i].trim());
            tlpp = Languages.getLanguageParams(lang);

            break;
          case "-e":
            encoding = args[++i];

            break;
          default:
            System.out.println(usage.toString());
            System.exit(-1);
        }

      } else {
        rootMatch = TregexPattern.compile("@" + args[i++]);

        if(tb == null) {
          if(tlpp == null) {
            System.out.println(usage.toString());
            System.exit(-1);
          } else {
            tlpp.setInputEncoding(encoding);
            tlpp.setOutputEncoding(encoding);
            tb = tlpp.diskTreebank();
          }
        }
        tb.loadPath(args[i++]);
      }
    }

    Counter<String> rhsCounter = new ClassicCounter<String>();
    for(Tree t : tb) {
View Full Code Here

  }

  /* Returns a DiskTreebank with a NegraPennTokenizer and a
   * NegraPennTreeNormalizer */
  public DiskTreebank diskTreebank() {
    return new DiskTreebank(treeReaderFactory(), inputEncoding);
  }
View Full Code Here

   */
  public void addFileFolder(final EnumMap<FilterType, String> filters, final File[] files) {
    List<FileTreeNode> newFiles = new ArrayList<FileTreeNode>();
    findLoadableFiles(filters, files, newFiles, FileTreeModel.this.getRoot());//findLoadableFiles updates newFiles
    for(FileTreeNode fileNode : newFiles) {
      Treebank treebank = new DiskTreebank(trf, curEncoding);
      treebank.loadPath(fileNode.getFile(), null, true);
      TreeTransformer transformer = TregexGUI.getInstance().transformer;
      if (transformer != null) {
        treebank = new TransformingTreebank(treebank, transformer);
      }
      fileNode.setTreebank(treebank);
View Full Code Here

    if(useSplit) {
      List<ObservedCorpusStats> allSplitStats = new ArrayList<ObservedCorpusStats>();
      makeVocab = true;
      for(Map.Entry<Split, Set<String>> split : splitFileLists.entrySet()) {
        DiskTreebank tb = tlpp.diskTreebank();
        FileFilter splitFilter = new SplitFilter(split.getValue());
        for(String path : pathNames)
          tb.loadPath(path, splitFilter);
        ObservedCorpusStats splitStats = gatherStats(tb,languageName.toString() + "." + split.getKey().toString());
        allSplitStats.add(splitStats);
        makeVocab = false;
      }

      display(aggregateStats(allSplitStats), displayWords, displayOOV);
      for(ObservedCorpusStats ocs : allSplitStats)
        display(ocs, displayWords, displayOOV);

    } else if(pathsAreFiles) {
      makeVocab = true;
      for(String path : pathNames) {
        DiskTreebank tb = tlpp.diskTreebank();
        tb.loadPath(path, pathname -> true);

        ObservedCorpusStats stats = gatherStats(tb, languageName.toString() + "  " + path.toString());
        display(stats, displayWords, displayOOV);
        makeVocab = false;
      }

    } else {
      trainVocab = Generics.newHashSet();
      DiskTreebank tb = tlpp.diskTreebank();
      for(String path : pathNames)
        tb.loadPath(path, pathname -> !pathname.isDirectory());

      ObservedCorpusStats allStats = gatherStats(tb, languageName.toString());
      display(allStats, displayWords, displayOOV);
    }
  }
View Full Code Here

    return new TreeCollinizer(treebankLanguagePack(),collinizerRetainsPunctuation,false);
  }

  @Override
  public DiskTreebank diskTreebank() {
    return new DiskTreebank(treeReaderFactory(), inputEncoding);
  }
View Full Code Here

      System.err.println("usage: Relation treebank numberRanges");
      return;
    }
    FileFilter testFilt = new NumberRangesFileFilter(args[1], true);
    TreeReaderFactory trf = new PennTreeReaderFactory(new NPTmpRetainingTreeNormalizer());
    DiskTreebank testTreebank = new DiskTreebank(trf);
    testTreebank.loadPath(new File(args[0]), testFilt);
    HeadFinder hf = new ModCollinsHeadFinder();
    List<Relation> relations = new ArrayList<Relation>();
    relations.addAll(Arrays.asList(SIMPLE_RELATIONS));
    relations.add(new HasIthChild(2));
    relations.add(new HasIthChild(-1));
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.trees.DiskTreebank$DiskTreebankIterator

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.