Package org.maltparserx.core.syntaxgraph

Examples of org.maltparserx.core.syntaxgraph.PhraseStructure


  public boolean readSentence(TokenStructure syntaxGraph) throws MaltChainedException  {
    if (syntaxGraph == null || !(syntaxGraph instanceof PhraseStructure)) {
      return false;
    }
    syntaxGraph.clear();
    final PhraseStructure phraseStructure = (PhraseStructure)syntaxGraph;
    PhraseStructureNode parent = null;
    PhraseStructureNode child = null;
    currentHeaderTable = NegraTables.UNDEF;
    String line = null;
    syntaxGraph.clear();
    nonterminals.clear();
    try {
      while (true) {
        line = reader.readLine();
        if (line == null) {
          if (syntaxGraph.hasTokens()) {
            sentenceCount++;
            if (syntaxGraph instanceof MappablePhraseStructureGraph) {
              ((MappablePhraseStructureGraph)syntaxGraph).getMapping().updateDependenyGraph(((MappablePhraseStructureGraph)syntaxGraph), ((PhraseStructure)syntaxGraph).getPhraseStructureRoot());
            }
          }
          if (cIterations < nIterations) {
            cIterations++;
            reopen();
            return true;
          }
          return false;
        } else if (line.startsWith("#EOS")) {
          currentTerminalSize = 0;
          currentNonTerminalSize = 0;
          currentHeaderTable = NegraTables.UNDEF;
          if (syntaxGraph instanceof MappablePhraseStructureGraph) {
            ((MappablePhraseStructureGraph)syntaxGraph).getMapping().updateDependenyGraph(((MappablePhraseStructureGraph)syntaxGraph), ((PhraseStructure)syntaxGraph).getPhraseStructureRoot());
          }
          return true;
        } else if (line.startsWith("#BOS")) {
          currentHeaderTable = NegraTables.SENTENCE;
          int s = -1, e = -1;
          for (int i = 5, n = line.length(); i < n; i++) {
            if (Character.isDigit(line.charAt(i)) && s == -1) {
              s = i;
            }
            if (line.charAt(i) == ' ') {
              e = i;
              break;
            }
          }
          if (s != e && s != -1 && e != -1) {
            phraseStructure.setSentenceID(Integer.parseInt(line.substring(s,e)));
          }
          sentenceCount++;
        } else if (currentHeaderTable == NegraTables.SENTENCE) {
          if (line.length() >= 2 && line.charAt(0) == '#' && Character.isDigit(line.charAt(1))) { // Non-terminal
            Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
            ColumnDescription column = null;
            currentNonTerminalSize++;
            char[] lineChars = line.toCharArray();
            int start = 0;
            int secedgecounter = 0;
            for (int i = 0, n = lineChars.length; i < n; i++) {
              if (lineChars[i] == '\t' && start == i) {
                start++;
              } else if (lineChars[i] == '\t' || i == n - 1) {
                if (columns.hasNext()) {
                  column = columns.next();
                }
                if (column.getPosition() == 0) {
                  int index = Integer.parseInt((i == n - 1)?line.substring(start+1):line.substring(start+1, i));
                  child = nonterminals.get(index);
                  if (child == null) {
                    if (index != 0) {
                      child = ((PhraseStructure)syntaxGraph).addNonTerminalNode(index-START_ID_OF_NONTERMINALS+1);
                    }
                    nonterminals.put(index,child);
                  }
                } else if (column.getPosition() == 2 && child != null) {
                  syntaxGraph.addLabel(child, "CAT", (i == n - 1)?line.substring(start):line.substring(start, i));
                } else if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) {
                  edgelabelSymbol.setLength(0);
                  edgelabelSymbol.append((i == n - 1)?line.substring(start):line.substring(start, i));
                  edgelabelTableName.setLength(0);
                  edgelabelTableName.append(column.getName());
                } else if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL && child != null) {
                  int index = Integer.parseInt((i == n - 1)?line.substring(start):line.substring(start, i));
                  parent = nonterminals.get(index);
                  if (parent == null) {
                    if (index == 0) {
                      parent = phraseStructure.getPhraseStructureRoot()
                    } else {
                      parent = phraseStructure.addNonTerminalNode(index-START_ID_OF_NONTERMINALS+1);
                    }
                    nonterminals.put(index,parent);
                  }
                  Edge e = phraseStructure.addPhraseStructureEdge(parent, child);
                  syntaxGraph.addLabel(e, edgelabelTableName.toString(), edgelabelSymbol.toString());
                } else if (column.getCategory() == ColumnDescription.SECONDARY_EDGE_LABEL && child != null) {
                  if (secedgecounter % 2 == 0) {
                    edgelabelSymbol.setLength(0);
                    edgelabelSymbol.append((i == n - 1)?line.substring(start):line.substring(start, i));
                    secedgecounter++;
                  } else {
                    int index = Integer.parseInt((i == n - 1)?line.substring(start):line.substring(start, i));
                    if (index == 0) {
                      parent = phraseStructure.getPhraseStructureRoot();
                    } else if (index < START_ID_OF_NONTERMINALS) {
                      parent = phraseStructure.getTokenNode(index);
                    } else {
                      parent = nonterminals.get(index);
                      if (parent == null) {
                        parent = phraseStructure.addNonTerminalNode(index-START_ID_OF_NONTERMINALS+1);
                        nonterminals.put(index,parent);
                      }
                    }
                    Edge e = phraseStructure.addSecondaryEdge(parent, child);
                    e.addLabel(column.getSymbolTable(), edgelabelSymbol.toString());
                    secedgecounter++;
                  }
                }
                start = i + 1;
              }
            }
          } else { // Terminal
            Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
            ColumnDescription column = null;
           
            currentTerminalSize++;
            child = syntaxGraph.addTokenNode(currentTerminalSize);
            char[] lineChars = line.toCharArray();
            int start = 0;
            int secedgecounter = 0;
            for (int i = 0, n = lineChars.length; i < n; i++) {
              if (lineChars[i] == '\t' && start == i) {
                start++;
              } else if (lineChars[i] == '\t' || i == n - 1) {
                if (columns.hasNext()) {
                  column = columns.next();
                }
                if (column.getCategory() == ColumnDescription.INPUT && child != null) {
                  syntaxGraph.addLabel(child, column.getName(), (i == n - 1)?line.substring(start):line.substring(start, i));
                } else if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL && child != null) { // && column.getName().equals("EDGELABEL")) {
                  edgelabelSymbol.setLength(0);
                  edgelabelSymbol.append((i == n - 1)?line.substring(start):line.substring(start, i));
                  edgelabelTableName.setLength(0);
                  edgelabelTableName.append(column.getName());
                } else if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL && child != null) {
                  int index = Integer.parseInt((i == n - 1)?line.substring(start):line.substring(start, i));
                  parent = nonterminals.get(index);
                  if (parent == null) {
                    if (index == 0) {
                      parent = phraseStructure.getPhraseStructureRoot()
                    } else {
                      parent = phraseStructure.addNonTerminalNode(index-START_ID_OF_NONTERMINALS+1);
                    }
                    nonterminals.put(index,parent);
                  }

                  Edge e = phraseStructure.addPhraseStructureEdge(parent, child);
                  syntaxGraph.addLabel(e, edgelabelTableName.toString(), edgelabelSymbol.toString());
                } else if (column.getCategory() == ColumnDescription.SECONDARY_EDGE_LABEL && child != null) {
                  if (secedgecounter % 2 == 0) {
                    edgelabelSymbol.setLength(0);
                    edgelabelSymbol.append((i == n - 1)?line.substring(start):line.substring(start, i));
                    secedgecounter++;
                  } else {
                    int index = Integer.parseInt((i == n - 1)?line.substring(start):line.substring(start, i));
                    if (index == 0) {
                      parent = phraseStructure.getPhraseStructureRoot();
                    } else if (index < START_ID_OF_NONTERMINALS) {
                      parent = phraseStructure.getTokenNode(index);
                    } else {
                      parent = nonterminals.get(index);
                      if (parent == null) {
                        parent = phraseStructure.addNonTerminalNode(index-START_ID_OF_NONTERMINALS+1);
                        nonterminals.put(index,parent);
                      }
                    }
                    Edge e = phraseStructure.addSecondaryEdge(parent, child);
                    e.addLabel(column.getSymbolTable(), edgelabelSymbol.toString());
                    secedgecounter++;
                  }
                }
                start = i + 1;
View Full Code Here


  public boolean readSentence(TokenStructure syntaxGraph) throws MaltChainedException  {
    if (syntaxGraph == null || !(syntaxGraph instanceof PhraseStructure)) {
      return false;
    }
    syntaxGraph.clear();
    final PhraseStructure phraseStructure = (PhraseStructure)syntaxGraph;
    PhraseStructureNode parent = null;
    PhraseStructureNode child = null;
//    if (header == null) {
//      header = new TigerXMLHeader(syntaxGraph.getSymbolTables());
//    }

    try {
      while (true) {
        int event = reader.next();
        if (event == XMLStreamConstants.START_ELEMENT) {
          if (reader.getLocalName().length() == 0) {
            continue;
          }
          if (reader.getLocalName().charAt(0) == 'e') {
            // e -> edge, edgelabel
            if (reader.getLocalName().length() == 4) { //edge
              int childid = -1;
              int indexSep = reader.getAttributeValue(null, "idref").indexOf('_');
             
              try {
                if (indexSep != -1) {
                  childid = Integer.parseInt(reader.getAttributeValue(null, "idref").substring(indexSep+1));
                } else {
                  childid = Integer.parseInt(reader.getAttributeValue(null, "idref"));
                }
                if (childid == -1) {
                  throw new SyntaxGraphException("The tiger reader couldn't recognize the idref attribute '"+reader.getAttributeValue(null, "idref")+"' of the edge element. ");
                }
              } catch (NumberFormatException e) {
                throw new SyntaxGraphException("The tiger reader couldn't recognize the idref attribute '"+reader.getAttributeValue(null, "idref")+"' of the edge element. ");
              }

              if (childid < START_ID_OF_NONTERMINALS) {
                child = phraseStructure.getTokenNode(childid);
              } else {

                child = phraseStructure.getNonTerminalNode(childid-START_ID_OF_NONTERMINALS+1);
              }

              Edge e = phraseStructure.addPhraseStructureEdge(parent, child);
              SortedMap<String, SymbolTable> inputTables = dataFormatInstance.getPhraseStructureEdgeLabelSymbolTables();
              for (String name : inputTables.keySet()) {
                e.addLabel(inputTables.get(name), reader.getAttributeValue(null, name.toLowerCase()));
              }
            } else if (reader.getLocalName().equals("edgelabel")) { // edgelabel
//              domain = Domain.EL;
            }
          } else if (reader.getLocalName().charAt(0) == 'n') {
            // n -> nt, nonterminals, name
            if (reader.getLocalName().length() == 2) { // nt
              final String id = reader.getAttributeValue(null, "id");
              if (graphRootID.length() == id.length() && graphRootID.toString().equals(id)) {
                parent = phraseStructure.getPhraseStructureRoot();
              } else {
                int index = id.indexOf('_');
                if (index != -1) {
                  parent = phraseStructure.addNonTerminalNode(Integer.parseInt(id.substring(index+1))-START_ID_OF_NONTERMINALS+1);
                }
              }
              SortedMap<String, SymbolTable> inputTables = dataFormatInstance.getPhraseStructureNodeLabelSymbolTables();
              for (String name : inputTables.keySet()) {
                parent.addLabel(inputTables.get(name), reader.getAttributeValue(null, name.toLowerCase()));
              }
            } else if (reader.getLocalName().equals("name")) { // name
//              elementContent.setLength(0);
//              collectChar = true;
            }
          } else if (reader.getLocalName().charAt(0) == 't') {
            // t -> t, terminals
            if (reader.getLocalName().length() == 1) { // t
              SortedMap<String, SymbolTable> inputTables = dataFormatInstance.getInputSymbolTables();
              child = syntaxGraph.addTokenNode();
              for (String name : inputTables.keySet()) {
                child.addLabel(inputTables.get(name), reader.getAttributeValue(null, name.toLowerCase()));
              }
            }
          } else if (reader.getLocalName().charAt(0) == 's') {
            // s -> subcorpus, secedge, s, secedgelabel
            if (reader.getLocalName().length() == 1) { // s
              String id = reader.getAttributeValue(null, "id");
              boolean indexable = false;
              int index = -1;
              if (id != null && id.length() > 0) {
                for (int i = 0, n = id.length(); i < n; i++) {
                  if (Character.isDigit(id.charAt(i))) {
                    if (index == -1) {
                      index = i;
                    }
                    indexable = true;
                  }
                }
              }
              if (indexable) {
                phraseStructure.setSentenceID(Integer.parseInt(id.substring(index)));
              } else {
                phraseStructure.setSentenceID(sentenceCount+1);
              }
            }
          } else if (reader.getLocalName().charAt(0) == 'v') {
            // v -> variable, value
//            if (reader.getLocalName().equals("value")) {
//              valueName.setLength(0);
//              valueName.append(reader.getAttributeValue(null, "name"));
//              elementContent.setLength(0);
//              collectChar = true;
//            }
          } else {
//             a -> annotation, author
//             b -> body
//             c -> corpus
//             d -> date, description,
//             f -> feature, format
//             g -> graph
//             h -> head, history
//             m -> matches, match
            if (reader.getLocalName().equals("graph")) {
              graphRootID.setLength(0);
              graphRootID.append(reader.getAttributeValue(null, "root"));
            } else  if (reader.getLocalName().equals("corpus")) {
//              header.setCorpusID(reader.getAttributeValue(null, "id"));
//              header.setCorpusID(reader.getAttributeValue(null, "version"));
            } else if (reader.getLocalName().equals("feature")) {
//              if (header != null) {
//                currentFeatureName.setLength(0);
//                currentFeatureName.append(reader.getAttributeValue(null, "name"));
//                header.addFeature(reader.getAttributeValue(null, "name"), reader.getAttributeValue(null, "domain"));
//              }
//              domain = Domain.valueOf(reader.getAttributeValue(null, "domain"));
            } else if (reader.getLocalName().equals("secedgelabel")) {
//              domain = Domain.SEL;
            } else if (reader.getLocalName().equals("author")) {
//              elementContent.setLength(0);
//              collectChar = true;
            } else if (reader.getLocalName().equals("date")) {
//              elementContent.setLength(0);
//              collectChar = true;
            } else if (reader.getLocalName().equals("description")) {
//              elementContent.setLength(0);
//              collectChar = true;
            } else if (reader.getLocalName().equals("format")) {
//              elementContent.setLength(0);
//              collectChar = true;
            } else if (reader.getLocalName().equals("history")) {
//              elementContent.setLength(0);
//              collectChar = true;
            }
          }
        } else if (event == XMLStreamConstants.END_ELEMENT) {
          if (reader.getLocalName().length() == 0) {
            continue;
          }
          if (reader.getLocalName().charAt(0) == 'e') {
            // e -> edge, edgelabel
          } else if (reader.getLocalName().charAt(0) == 'n') {
            // n -> nt, nonterminals, name
            if (reader.getLocalName().equals("nt")) {
              ntid.setLength(0);
            }
            else if (reader.getLocalName().equals("nonterminals")) {
              if (phraseStructure.nTokenNode() == 1 && phraseStructure.nNonTerminals() == 0 &&((NonTerminalNode)phraseStructure.getPhraseStructureRoot()).nChildren() == 0) {
                Edge e = phraseStructure.addPhraseStructureEdge(phraseStructure.getPhraseStructureRoot(), phraseStructure.getTokenNode(1));
                SortedMap<String, SymbolTable> inputTables = dataFormatInstance.getPhraseStructureEdgeLabelSymbolTables();
                for (String name : inputTables.keySet()) {
                  e.addLabel(inputTables.get(name), "--");
                }
              }
View Full Code Here

    if (syntaxGraph == null || dataFormatInstance == null) {
      return;
    }
    if (syntaxGraph.hasTokens()) {
      sentenceCount++;
      final PhraseStructure phraseStructure = (PhraseStructure)syntaxGraph;
      try {
        sentenceID.setLength(0);
        sentenceID.append(sentencePrefix);
        if (phraseStructure.getSentenceID() != 0) {
          sentenceID.append(Integer.toString(phraseStructure.getSentenceID()));
        } else {
          sentenceID.append(Integer.toString(sentenceCount));
        }
        writer.write("    <s id=\"");
        writer.write(sentenceID.toString())
        writer.write("\">\n");
       
        setRootID(phraseStructure);
        writer.write("      <graph root=\"");
        writer.write(rootID.toString());
        writer.write("\" ");
        writer.write("discontinuous=\"");
        writer.write(Boolean.toString(!phraseStructure.isContinuous()));
        writer.write("\">\n");
       
        writeTerminals(phraseStructure);
        if (phraseStructure.nTokenNode() != 1 || rootHandling.equals(RootHandling.TALBANKEN)) {
          writeNonTerminals(phraseStructure);
        } else {
          writer.write("        <nonterminals/>\n");
        }
        writer.write("      </graph>\n");
View Full Code Here

 
  public void writeSentence(TokenStructure syntaxGraph) throws MaltChainedException {
    if (syntaxGraph == null || dataFormatInstance == null || !(syntaxGraph instanceof PhraseStructure) || !syntaxGraph.hasTokens()) {
      return;
    }
    PhraseStructure phraseStructure = (PhraseStructure)syntaxGraph;
    sentenceCount++;
    try {
      writer.write("#BOS ");
      if (phraseStructure.getSentenceID() != 0) {
        writer.write(Integer.toString(phraseStructure.getSentenceID()));
      } else {
        writer.write(Integer.toString(sentenceCount));
      }
      writer.write('\n');

      if (phraseStructure.hasNonTerminals()) {
        calculateIndices(phraseStructure);
        writeTerminals(phraseStructure);
        writeNonTerminals(phraseStructure);
      } else {
        writeTerminals(phraseStructure);
      }
      writer.write("#EOS ");
      if (phraseStructure.getSentenceID() != 0) {
        writer.write(Integer.toString(phraseStructure.getSentenceID()));
      } else {
        writer.write(Integer.toString(sentenceCount));
      }
      writer.write('\n');
    } catch (IOException e) {
View Full Code Here

TOP

Related Classes of org.maltparserx.core.syntaxgraph.PhraseStructure

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.