Package edu.stanford.nlp.trees

Examples of edu.stanford.nlp.trees.TreebankLanguagePack


        return outData;
    }

    private void parseRelations(Tree parsedTree, ParsedData outData) {
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parsedTree);
        List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
        String reln;
        int governerIndex;
        int dependentIndex;
View Full Code Here


   @param length The length of the sentence (just to avoid recomputation)
   */
  private boolean addSentenceFinalPunctIfNeeded(List<HasWord> sentence, int length) {
    int start = length - 3;
    if (start < 0) start = 0;
    TreebankLanguagePack tlp = op.tlpParams.treebankLanguagePack();
    for (int i = length - 1; i >= start; i--) {
      HasWord item = sentence.get(i);
      // An object (e.g., CoreLabel) can implement HasTag but not actually store
      // a tag so we need to check that there is something there for this case.
      // If there is, use only it, since word tokens can be ambiguous.
      String tag = null;
      if (item instanceof HasTag) {
        tag = ((HasTag) item).tag();
      }
      if (tag != null && ! tag.isEmpty()) {
        if (tlp.isSentenceFinalPunctuationTag(tag)) {
          return false;
        }
      } else {
        String str = item.word();
        if (tlp.isPunctuationWord(str)) {
          return false;
        }
      }
    }
    // none found so add one.
    if (op.testOptions.verbose) {
      System.err.println("Adding missing final punctuation to sentence.");
    }
    String[] sfpWords = tlp.sentenceFinalPunctuationWords();
    if (sfpWords.length > 0) {
      sentence.add(new Word(sfpWords[0]));
    }
    return true;
  }
View Full Code Here

    this.parser = parser;
    this.maxSentenceLength = maxSent;
    this.treeMap = treeMap;
    this.maxParseTime = 0;
    if (this.BUILD_GRAPHS) {
      TreebankLanguagePack tlp = parser.getTLPParams().treebankLanguagePack();
      this.gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder());
    } else {
      this.gsf = null;
    }
    this.nThreads = 1;
    this.saveBinaryTrees = false;
View Full Code Here

    } else {
      this.BUILD_GRAPHS = PropertiesUtils.getBool(props, buildGraphsProperty, true);
    }

    if (this.BUILD_GRAPHS) {
      TreebankLanguagePack tlp = parser.getTLPParams().treebankLanguagePack();
      // TODO: expose keeping punctuation as an option to the user?
      this.gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder());
    } else {
      this.gsf = null;
    }

    this.nThreads = PropertiesUtils.getInt(props, annotatorName + ".nthreads", PropertiesUtils.getInt(props, "nthreads", 1));
View Full Code Here

    }
  }

  public static void main(String[] args) {
    TreebankLangParserParams tlpParams = new ChineseTreebankParserParams();
    TreebankLanguagePack ctlp = tlpParams.treebankLanguagePack();
    Options op = new Options(tlpParams);
    TreeAnnotator ta = new TreeAnnotator(tlpParams.headFinder(), tlpParams, op);

    System.err.println("Reading Trees...");
    FileFilter trainFilter = new NumberRangesFileFilter(args[1], true);
View Full Code Here

    if(args.length != 1) {
      System.err.printf("Usage: java %s tree_file%n", SplitMaker.class.getName());
      System.exit(-1);
    }

    TreebankLanguagePack tlp = new HebrewTreebankLanguagePack();
    String inputFile = args[0];
    File treeFile = new File(inputFile);
    try {
      TreeReaderFactory trf = new HebrewTreeReaderFactory();
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.getEncoding()));
      TreeReader tr = trf.newTreeReader(br);

      PrintWriter pwDev = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.dev"),false,tlp.getEncoding()));
      PrintWriter pwTrain = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.train"),false,tlp.getEncoding()));
      PrintWriter pwTest = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.test"),false,tlp.getEncoding()));

      int numTrees = 0;
      for(Tree t; ((t = tr.readTree()) != null); numTrees++) {
        if(numTrees < 483)
          pwDev.println(t.toString());
View Full Code Here

  }

  /** Prints a few aspects of the TreebankLanguagePack, just for debugging.
   */
  public static void main(String[] args) {
    TreebankLanguagePack tlp = new TueBaDZLanguagePack();
    System.out.println("Start symbol: " + tlp.startSymbol());
    String start = tlp.startSymbol();
    System.out.println("Should be true: " + (tlp.isStartSymbol(start)));
    String[] strs = new String[]{"-", "-LLB-", "NP-2", "NP=3", "NP-LGS", "NP-TMP=3", "CARD-HD"};
    for (String str : strs) {
      System.out.println("String: " + str + " basic: " + tlp.basicCategory(str) + " basicAndFunc: " + tlp.categoryAndFunction(str));
    }
  }
View Full Code Here

    if(args.length != 1) {
      System.err.printf("Usage: java %s tree_file > trees%n", HebrewTreeReaderFactory.class.getName());
      System.exit(-1);
    }

    TreebankLanguagePack tlp = new HebrewTreebankLanguagePack();
    File treeFile = new File(args[0]);
    try {
      TreeReaderFactory trf = new HebrewTreeReaderFactory();
      BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.getEncoding()));
      TreeReader tr = trf.newTreeReader(br);

      int numTrees = 0;
      for(Tree t; ((t = tr.readTree()) != null); numTrees++)
        System.out.println(t.toString());
View Full Code Here

   *          e.g. training a parser.
   */
  public SpanishXMLTreeReader(String filename, Reader in, boolean simplifiedTagset,
                              boolean aggressiveNormalization,
                              boolean retainNER, boolean detailedAnnotations) {
    TreebankLanguagePack tlp = new SpanishTreebankLanguagePack();

    this.simplifiedTagset = simplifiedTagset;
    this.detailedAnnotations = detailedAnnotations;

    stream = new ReaderInputStream(in, tlp.getEncoding());
    treeFactory = new LabeledScoredTreeFactory();
    treeNormalizer =
      new SpanishTreeNormalizer(simplifiedTagset,
                                aggressiveNormalization,
                                retainNER);
View Full Code Here

    if(args.length < 1) {
      System.out.printf("Usage: java %s tree_file%n", NegraPennTreeReaderFactory.class.getName());
      return;
    }

    TreebankLanguagePack tlp = new NegraPennLanguagePack();
    TreeReaderFactory trf = new NegraPennTreeReaderFactory(2,false,false,tlp);

    try {
      TreeReader tr = trf.newTreeReader(IOUtils.readerFromString(args[0], tlp.getEncoding()));

      for (Tree t; (t = tr.readTree()) != null; ) {
        t.pennPrint();
      }
View Full Code Here

TOP

Related Classes of edu.stanford.nlp.trees.TreebankLanguagePack

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.