Package opennlp.tools.formats.ad.ADParagraphStream.ParagraphParser

Examples of opennlp.tools.formats.ad.ADParagraphStream.ParagraphParser.Node


      if (start > -1 && index < start) {
        index++;
        // skip this one
      } else {
        Node root = paragraph.getRoot();
        List<String> sentence = new ArrayList<String>();
        List<String> tags = new ArrayList<String>();
        List<String> target = new ArrayList<String>();

        processRoot(root, sentence, tags, target);
View Full Code Here


     */
    public Paragraph parse(String paragraphString) {
      BufferedReader reader = new BufferedReader(new StringReader(
          paragraphString));
      Paragraph sentence = new Paragraph();
      Node root = new Node();
      try {
        // first line is <s ...>
        String line = reader.readLine();
        if (line.startsWith("<s")) {
          // should finde the source source
          while (!line.startsWith("SOURCE")) {
            line = reader.readLine();
            if (line == null) {
              return new Paragraph();
            }
          }
        }
        line = reader.readLine();
        // we should have the plain sentence
        // we remove the first token
        int start = line.indexOf(" ");
        sentence.setText(line.substring(start + 1));
        // now we look for the root node
        line = reader.readLine();

        while (!rootPattern.matcher(line).matches()) {
          line = reader.readLine();
          if (line == null) {
            return sentence;
          }
        }
        // got the root. Add it to the stack
        Stack<Node> nodeStack = new Stack<Node>();
        // we get the complete line

        root.setSyntacticTag("ROOT");
        root.setLevel(0);
        nodeStack.add(root);
        // now we have to take care of the lastLevel. Every time it raises, we
        // will add the
        // leaf to the node at the top. If it decreases, we remove the top.
        //line = reader.readLine();
        while (line.length() != 0 && line.startsWith("</s>") == false) {
          TreeElement element = this.getElement(line);
         
          if(element != null) {
            // remove elements at same level or higher
            while (!nodeStack.isEmpty()
                && element.getLevel() > 0 && element.getLevel() <= nodeStack.peek().getLevel()) {
              nodeStack.pop();
            }
            if( element.isLeaf() ) {
              if (nodeStack.isEmpty()) {
                root.addElement(element);
              } else {
                // look for the node with the correct level
                Node peek = nodeStack.peek();
                if (element.level == 0) { // add to the root
                  nodeStack.firstElement().addElement(element);
                } else {
                  Node parent = null;
                  int index = nodeStack.size() - 1;
                  while(parent == null) {
                    if(peek.getLevel() < element.getLevel()) {
                      parent = peek;
                    } else {
                      index--;
                      if(index > -1) {
                        peek = nodeStack.get(index);
                      } else {
                        parent = nodeStack.firstElement();
                      }
                    }
                  }
                  parent.addElement(element);
                }
              }
            } else {
              if (!nodeStack.isEmpty()) {
                nodeStack.peek().addElement(element);
View Full Code Here

      Matcher nodeMatcher = nodePattern.matcher(line);
      if (nodeMatcher.matches()) {
        int level = nodeMatcher.group(1).length();
        String syntacticTag = nodeMatcher.group(2);
        String morphologicalTag = nodeMatcher.group(3);
        Node node = new Node();
        node.setLevel(level);
        node.setSyntacticTag(syntacticTag);
        node.setMorphologicalTag(morphologicalTag);
        return node;
      }

      Matcher leafMatcher = leafPattern.matcher(line);
      if (leafMatcher.matches()) {
View Full Code Here

  public NameSample read() throws IOException {

    Paragraph paragraph;
    while ((paragraph = this.adSentenceStream.read()) != null) {
      Node root = paragraph.getRoot();
      List<String> sentence = new ArrayList<String>();
      List<Span> names = new ArrayList<Span>();
      process(root, sentence, names);

      return new NameSample(sentence.toArray(new String[sentence.size()]),
View Full Code Here

TOP

Related Classes of opennlp.tools.formats.ad.ADParagraphStream.ParagraphParser.Node

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.