Package synalp.commons.grammar

Source Code of synalp.commons.grammar.GrammarReader

package synalp.commons.grammar;

import java.io.*;
import java.util.*;

import javax.xml.parsers.*;

import org.apache.log4j.Logger;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;

import synalp.commons.input.Lemma;
import synalp.commons.semantics.*;
import synalp.commons.unification.*;
import synalp.commons.utils.Resources;
import synalp.commons.utils.configuration.ResourcesBundleFile;
import synalp.generation.configuration.GeneratorOption;


/**
* A GrammarReader reads a grammar in XML format.
* @author Alexandre Denis
*/
public class GrammarReader extends DefaultHandler
{
  private static Logger logger = Logger.getLogger(GrammarReader.class);

  private static boolean warnedAboutLemanchor;

  /**
   * Objects corresponding to node in the tree
   */
  private Tree tree;
  private Trace trace;
  private String curTag;
  private Grammar grammar;
  private DefaultLiteral literal;
  private GrammarEntry entry;
  private Semantics semantics;
  private FeatureConstant disjunction;

  /**
   * Stacks containing the objects parsed
   */
  private Stack<Node> nodesStack;
  private Stack<Feature> featStack;
  private Stack<FeatureStructure> structStack;

  /**
   * Fields sets to true when the current node is the one given in the name of the attribute
   */
  private boolean inArg;
  private boolean inLabel;
  private boolean inLiteral;
  private boolean inPredicate;
  private boolean inInterface;
  private boolean inDisjunction;


  /**
   * Demonstrates how to read a grammar.
   * @param args
   * @throws SAXException
   * @throws IOException
   */
  public static void main(String[] args) throws SAXException, IOException
  {
    Grammar grammar = GrammarReader.readGrammar(new File("/home/laura/gitprojects/quelonli/icgen/source/icgen/data/model/../dist/build/grammar/valuations.xml"));
    for(GrammarEntry entry : grammar.values())
      System.out.println(entry.toFullString() + "\n");
    for(String family : grammar.getFamilies().keySet())
      System.out.println("family:'"+family+"' : "+grammar.getFamilies().get(family));
    System.out.println("Read " + grammar.size() + " entries");
  }


  /**
   * Reads the given grammar.
   * @param file
   * @return a syntactic lexicon
   * @throws SAXException
   * @throws IOException
   */
  public static Grammar readGrammar(File file) throws SAXException, IOException
  {
    GrammarReader reader = new GrammarReader();
    try
    {
      logger.info("Reading grammar " + file);
      SAXParserFactory.newInstance().newSAXParser().parse(file, reader);
    }
    catch (ParserConfigurationException e)
    {
      e.printStackTrace();
    }
    return reader.grammar;
  }


  @Override
  public void endDocument() throws SAXException
  {
    postProcess(grammar);
    grammar.computeFamiliesCache();
  }


  /**
   * Resolves the given entity. It prevents the retrieval of the grammar dtd.
   */
  @Override
  public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException
  {
    return new InputSource(new StringReader(""));
  }


  @Override
  public void startDocument() throws SAXException
  {
    grammar = new Grammar();
    nodesStack = new Stack<Node>();
    featStack = new Stack<Feature>();
    structStack = new Stack<FeatureStructure>();
  }


  @Override
  public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
  {
    curTag = qName;
    if (qName.equals("entry"))
    {
      entry = createEntry(attributes);
      grammar.add(entry);
    }
    else if (qName.equals("trace"))
    {
      trace = new Trace();
      entry.setTrace(trace);
    }
    else if (qName.equals("tree"))
    {
      tree = createTree(attributes);
      entry.setTree(tree);
    }
    else if (qName.equals("semantics"))
    {
      semantics = new Semantics();
      entry.setSemantics(semantics);
    }
    else if (qName.equals("literal"))
    {
      inLiteral = true;
      literal = new DefaultLiteral();
      semantics.add(literal);
    }
    else if (qName.equals("vAlt"))
    {
      inDisjunction = true;
      disjunction = new FeatureConstant();
    }
    else if (qName.equals("sym"))
    {
      FeatureValue value = createFeatureValue(attributes);
      if (inDisjunction)
      {
        // we could be more cautious
        disjunction.addValue(value.toString());
      }
      else
      {
        if (!inLiteral)
          featStack.peek().setValue(value);
        else if (inLabel)
          literal.setLabel(value);
        else if (inArg)
          literal.addArgument(value);
        else if (inPredicate)
          literal.setPredicate(value);
      }
    }
    else if (qName.equals("node"))
      nodesStack.push(createNode(attributes));
    else if (qName.equals("f"))
      featStack.push(createFeature(attributes));
    else if (qName.equals("fs"))
      structStack.push(new FeatureStructure()); // do coref
    else if (qName.equals("interface"))
      inInterface = true;
    else if (qName.equals("label"))
      inLabel = true;
    else if (qName.equals("predicate"))
      inPredicate = true;
    else if (qName.equals("arg"))
      inArg = true;
  }


  @Override
  public void endElement(String uri, String localName, String qName) throws SAXException
  {
    if (qName.equals("f"))
      structStack.peek().add(featStack.pop());
    else if (qName.equals("fs"))
    {
      // if there is no fs in the stack anymore, attach the fs either to the interface or to the current node, else attach it to the top feature
      FeatureStructure fs = structStack.pop();
      if (structStack.isEmpty())
      {
        if (inInterface)
          entry.setInterface(fs);
        else nodesStack.peek().setTopBotCat(fs);
      }
      else featStack.peek().setValue(fs);
    }
    else if (qName.equals("node"))
    {
      // when popping node, create parenthood or set tree root
      Node node = nodesStack.pop();
      if (node == null)
        throw new SAXException("Error: every <node> must match a closing </node>");
      if (!nodesStack.isEmpty())
        node.setParent(nodesStack.peek());
      else tree.setRoot(node);
    }
    else if (qName.equals("interface"))
      inInterface = false;
    else if (qName.equals("literal"))
      inLiteral = false;
    else if (qName.equals("label"))
      inLabel = false;
    else if (qName.equals("predicate"))
      inPredicate = false;
    else if (qName.equals("arg"))
      inArg = false;
    else if (qName.equals("vAlt"))
    {
      if (!inLiteral)
        featStack.peek().setValue(disjunction);
      else if (inLabel)
        literal.setLabel(disjunction);
      else if (inArg)
        literal.addArgument(disjunction);
      else if (inPredicate)
        literal.setPredicate(disjunction);
      inDisjunction = false;
      disjunction = null;
    }

    curTag = "";
  }


  @Override
  public void characters(char[] ch, int start, int length) throws SAXException
  {
    if (curTag.equals("family"))
      entry.setFamily(toString(ch, start, length));
    else if (curTag.equals("class"))
      trace.add(toString(ch, start, length));
  }


  /**
   * Creates a FeatureValue.
   * @param attributes
   * @return a FeatureValue which can either be a FeatureConstant or a FeatureVariable
   * @throws SAXException
   */
  private FeatureValue createFeatureValue(Attributes attributes) throws SAXException
  {
    String value = attributes.getValue("value");
    if (value == null)
    {
      String varname = attributes.getValue("varname");
      if (varname == null)
        throw new SAXException("Error: a symbol in a fs must have either a 'value' or a 'varname' attribute");
      else return new FeatureVariable(varname);
    }
    else return new FeatureConstant(value);
  }


  /**
   * Post process the grammar. It both rewrites lex nodes and lemanchor features.
   * @param grammar
   */
  private static void postProcess(Grammar grammar)
  {
    for(GrammarEntry entry : grammar.values())
    {
      for(Node node : entry.getTree().getNodes())
      {
        if (GeneratorOption.REWRITE_LEX_NODES && node.getType() == NodeType.LEX)
          rewriteLexNode(node, entry);

        if (GeneratorOption.REWRITE_LEMANCHOR)
          rewriteLemanchor(node);
      }

      if (GeneratorOption.ASSIGN_NODE_IDS)
      {
        List<Integer> a = new ArrayList<Integer>();
        a.add(0);
        assignNodeID(entry.getTree().getRoot(), a);
      }
    }
  }


  /**
   * Assigns a node identifier to each node of a TAG tree. The node identifier is build as n.i
   * where i is an integer incremented for each visited node. The tree is traversed in depth-first
   * and the children are visited in the ordered they are entered in the grammar (if [a b c ] is
   * the list of children then a is visited first, then b and c). The root node is i=0
   * @param tree root node of a tree ({@link Node})
   * @param a should be initialized with the single element 0 TODO: better way of doing this?
   */
  private static void assignNodeID(Node tree, List<Integer> a)
  {
    if (tree.getId().isEmpty()) //is never null; empty by default!
      tree.setId("n" + a.get(0));
    for(Node n : tree.getChildren())
    {
      a.add(0, a.get(0) + 1);
      assignNodeID(n, a);
    }
  }


  /**
   * Rewrites lex nodes as coanchors nodes. It does not do any merging with the parent node and
   * simply rewrites a lex node as a COANCHOR. It forbids though any adjunction on the coanchor
   * itself, adjunctions need to be performed at the parent level if needed.
   * @param node
   * @param entry
   */
  private static void rewriteLexNode(Node node, GrammarEntry entry)
  {
    if (node.isPhonE())
      node.setType(NodeType.STD);
    else node.setType(NodeType.COANCHOR);
    node.setNoAdjunction(true);

    // the anchor lemma of the parent is the category of the child node, we take here the first value of the category if it exists
    if (node.getCategory() != null && !node.getCategory().getValues().isEmpty())
    {
      node.setAnchorLemma(new Lemma(node.getCategory().getFirstValue()), true);
      node.getFsTop().addConstantFeature("lemma", node.getCategory());
      node.getFsBot().addConstantFeature("lemma", node.getCategory());
    }

    if (node.getParent() != null)
      node.setCategory(node.getParent().getCategory());

    logger.warn("Warning: rewriting lex node for tree " + entry.getName() + " (lex nodes are now deprecated)");
  }


  /**
   * Rewrites lex nodes as coanchors nodes by merging them to their parent. Note that it appears
   * that parent lex nodes are not always terminal, hence this merging causes to have coanchors
   * inside the tree. We may remove this method in the future.
   * @param node
   * @param entry
   * @throws SAXException
   */
  @SuppressWarnings("unused")
  private static void rewriteLexNodeParentMerge(Node node, GrammarEntry entry) throws SAXException
  {
    Node parent = node.getParent();

    if (node.isPhonE())
      parent.setType(NodeType.STD);
    else parent.setType(NodeType.COANCHOR);
    FeatureStructure newBot = Unifier.unify(node.getFsBot(), parent.getFsBot());
    FeatureStructure newTop = Unifier.unify(node.getFsTop(), parent.getFsTop());
    if (newBot == null)
      throw new SAXException("Error: unable to rewrite lex node as a coanchor node since its bot fs and its parent bot fs do not unify (tree " +
                  entry.getName() +
                  ")");
    if (newTop == null)
      throw new SAXException("Error: unable to rewrite lex node as a coanchor node since its top fs and its parent top fs do not unify (tree " +
                  entry.getName() +
                  ")");

    parent.setFsBot(newBot);
    parent.setFsTop(newTop);

    // the anchor lemma of the parent is the category of the child node, we take here the first value of the category if it exists
    if (node.getCategory() != null && !node.getCategory().getValues().isEmpty())
    {
      parent.setAnchorLemma(new Lemma(node.getCategory().getFirstValue()), true);
      parent.getFsTop().addConstantFeature("lemma", node.getCategory());
      parent.getFsBot().addConstantFeature("lemma", node.getCategory());
    }

    parent.removeChild(node);

    logger.warn("Warning: rewriting lex node for tree " + entry.getName() + " (lex nodes are now deprecated)");
  }


  /**
   * Rewrites lemanchor features as "lemma", sets the node type to coanchor and also sets the
   * actual lemma.
   * @param node
   */
  public static void rewriteLemanchor(Node node)
  {
    List<Feature> features = new ArrayList<Feature>();
    features.addAll(node.getFsTop().getAllFeaturesRecursively());
    features.addAll(node.getFsBot().getAllFeaturesRecursively());

    for(Feature feat : features)
    {
      if (feat.getName().equals("lemanchor"))
      {
        feat.setName("lemma");
        if (!warnedAboutLemanchor)
        {
          logger.warn("Warning: rewriting 'lemanchor' as 'lemma' ('lemanchor' is now deprecated)");
          warnedAboutLemanchor = true;
        }
      }

      if (feat.getName().equals("lemma"))
      {
        if (node.getType() != NodeType.COANCHOR && node.getType() != NodeType.ANCHOR)
          node.setType(NodeType.COANCHOR);
        node.setAnchorLemma(new Lemma(feat.getValue().toString()));
      }
    }
  }


  /**
   * Creates a Node.
   * @param attributes
   * @return a new Node
   * @throws SAXException
   */
  private Node createNode(Attributes attributes) throws SAXException
  {
    String typeStr = attributes.getValue("type");
    if (typeStr == null)
      throw new SAXException("Error: a node is missing a 'type' attribute");

    NodeType type = NodeType.parse(typeStr);
    if (type == null)
      throw new SAXException("Error: a node has type '" + typeStr + "' which is not a valid type");

    Node ret = new Node(type);

    if (type == NodeType.NADJ)
    {
      ret.setType(NodeType.STD);
      ret.setNoAdjunction(true);
    }

    String id = attributes.getValue("name"); // the feature is called name, but id seems more appropriate
    if (id != null)
      ret.setId(id);
    return ret;
  }


  /**
   * Creates a Tree.
   * @param attributes
   * @return a new Tree
   * @throws SAXException
   */
  private static Tree createTree(Attributes attributes) throws SAXException
  {
    String id = attributes.getValue("id");
    if (id == null)
      throw new SAXException("Error: a tree is missing a 'id' attribute");
    else return new Tree(id);
  }


  /**
   * Creates a GrammarEntry.
   * @param attributes
   * @return a new GrammarEntry
   * @throws SAXException
   */
  private static GrammarEntry createEntry(Attributes attributes) throws SAXException
  {
    String name = attributes.getValue("name");
    if (name == null)
      throw new SAXException("Error: a grammar entry is missing a 'name' attribute");
    else return new GrammarEntry(name);
  }


  /**
   * Creates a Feature.
   * @param attributes
   * @return a feature without specified value.
   * @throws SAXException
   */
  private Feature createFeature(Attributes attributes) throws SAXException
  {
    String name = attributes.getValue("name");
    if (name == null)
      throw new SAXException("Error: a feature is missing a 'name' attribute");
    return new Feature(name);
  }


  /**
   * Returns a String from the given character range.
   * @param ch
   * @param start
   * @param length
   * @return a String
   */
  private static String toString(char[] ch, int start, int length)
  {
    return new String(Arrays.copyOfRange(ch, start, start + length));
  }

}
TOP

Related Classes of synalp.commons.grammar.GrammarReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.