Source Code of joshua.prefix_tree.Node

/* This file is part of the Joshua Machine Translation System.
 * 
 * Joshua is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free
 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 * MA 02111-1307 USA
 */
package joshua.prefix_tree;


import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;


import joshua.corpus.MatchedHierarchicalPhrases;
import joshua.corpus.RuleExtractor;
import joshua.corpus.suffix_array.ParallelCorpusGrammarFactory;
import joshua.corpus.suffix_array.Pattern;
import joshua.corpus.vocab.SymbolTable;
import joshua.decoder.ff.tm.BasicRuleCollection;
import joshua.decoder.ff.tm.Rule;
import joshua.decoder.ff.tm.RuleCollection;
import joshua.decoder.ff.tm.Trie;
import joshua.util.Cache;


/**
 * Represents a node in a prefix tree.
 * 
 * @author Lane Schwartz
 */
public class Node implements Comparable<Node>, Trie {


  /** Logger for this class. */
  private static final Logger logger =
    Logger.getLogger(Node.class.getName());


  /** Unique integer identifier for this node. */
  final int objectID;


  /**
   * The lower bound in the suffix array
   * for the source pattern at this node.
   */
  int lowBoundIndex;


  /**
   * The upper bound in the suffix array
   * for the source pattern at this node.
   */
  int highBoundIndex;


  /** Indicates whether this is an active node. */
  boolean active;
  
  /** Suffix link for this node. */
  Node suffixLink;


  /** 
   * Maps from integer representations of words to nodes. 
   * <p>
   * TODO It may be better to have a single map in PrefixTree that maps (Node,Integer) --> Node
   */
  Map<Integer,Node> children;
  
  /** Source side hierarchical phrases for this node. */
  MatchedHierarchicalPhrases sourceHierarchicalPhrases;
  
  
//  List<Rule> results;
  
  protected final ParallelCorpusGrammarFactory parallelCorpus;
  
//  private final Suffixes suffixArray;
//  private final Cache<Pattern, List<Rule>> ruleCache;
//  private final Cache<Pattern, MatchedHierarchicalPhrases> matchedPhrasesCache;
  
  Pattern sourcePattern;
  
  
  
  
////================================  
//  //add by zhifei??????????????????????????????????????????? these parameters are not intialized by the constructor
//  public static final int OOV_RULE_ID = 0;
//  private int defaultOwner;
//  private float oovFeatureCost = 100;
//  
//  /**
//   * the OOV rule should have this lhs, this should be grammar
//   * specific as only the grammar knows what LHS symbol can
//   * be combined with other rules
//   */ 
//  private int defaultLHS;
//  private int spanLimit = 10;
////==============================  
//  
  
  /** 
   * Gets translation rules for this node. 
   * <p>
   * The results of this method are guaranteed to be 
   * sorted according to whatever feature functions are in use.
   * 
   * Calling this method will return results equivalent to those 
   * that would be returned by calling 
   * <code>HierarchicalRuleExtractor#extractRules(getMatchedPhrases())</code>.
   * 
   * @see RuleExtractor#extractRules(MatchedHierarchicalPhrases)
   * @return translation rules for this node
   */
  protected List<Rule> getResults() {
    
    Cache<Pattern,List<Rule>> ruleCache = parallelCorpus.getSuffixArray().getCachedRules();
    
    List<Rule> results;
    
    if (ruleCache.containsKey(sourcePattern)) {
      results = ruleCache.get(sourcePattern);
      // The rules from the cache are guaranteed to be sorted.
    } else {
      results = parallelCorpus.getRuleExtractor().extractRules(getMatchedPhrases());
      // The above list of rules extracted is guaranteed to be sorted.
      ruleCache.put(sourcePattern, results);
    }
    
    // These rules are sorted.
    return results;
  }
  
  protected MatchedHierarchicalPhrases getMatchedPhrases() {
    
    //TODO Implement this method
    return this.sourceHierarchicalPhrases;
    
//    MatchedHierarchicalPhrases results;
//    
//    if (matchedPhrasesCache.containsKey(sourcePattern)) {
//      results = matchedPhrasesCache.get(sourcePattern);
//    } else {
//      
//      // Do some extra lookup
//      
//      
//      throw new RuntimeException("This code not yet implemented");
//      
//    }
//    
//    return results;
  }
  
  Node(Node parent) {
//    this(parent.ruleCache, parent.matchedPhrasesCache, true);
    this(parent.parallelCorpus, true, nodeIDCounter++);
  }
  
  Node(ParallelCorpusGrammarFactory parallelCorpus, int objectID) {
    this(parallelCorpus, true, objectID);
//    this(
//      (suffixArray==null ? null : suffixArray.getCachedRules()), 
//      (suffixArray==null ? null : suffixArray.getCachedHierarchicalPhrases()), 
//      true, objectID);
  }
  
  Node(ParallelCorpusGrammarFactory parallelCorpus, boolean active) {
    this(parallelCorpus, active, nodeIDCounter++);
  }
  
  
//  Node(Cache<Pattern, List<Rule>> ruleCache, Cache<Pattern, MatchedHierarchicalPhrases> matchedPhrasesCache, boolean active) {
//    this(ruleCache, matchedPhrasesCache, active, nodeIDCounter++);
//  }
  
//  Node(Cache<Pattern, List<Rule>> ruleCache, Cache<Pattern, MatchedHierarchicalPhrases> matchedPhrasesCache, boolean active, int objectID) {
  Node(ParallelCorpusGrammarFactory parallelCorpus, boolean active, int objectID) {
//    this.ruleCache = ruleCache;
//    this.matchedPhrasesCache = matchedPhrasesCache;
    this.parallelCorpus = parallelCorpus;
//    this.suffixArray = suffixArray;
    this.active = active;
    this.suffixLink = null;
    this.children = new HashMap<Integer,Node>();
    this.objectID = objectID;
    this.sourceHierarchicalPhrases = null;//HierarchicalPhrases.emptyList((SymbolTable) null);
//    this.results = Collections.emptyList();
  }
  
  Node calculateSuffixLink(int endOfPattern) {


    Node suffixLink = this.suffixLink.getChild(endOfPattern);


    if (suffixLink==null) {
      throw new NoSuchChildNodeException(this, endOfPattern);
    }
    
    return suffixLink;


  }
  
  /**
   * Gets the representation of the source side tokens corresponding
   * to the hierarchical phrases for this node.
   * 
   * @return the source language pattern for this node
   */
  public Pattern getSourcePattern() {
//    return sourceHierarchicalPhrases.getPattern();
    return sourcePattern;
  }
  
  
  /**
   * Gets rules for this node and the children of this node.
   *
   * @return rules for this node and the children of this node.
   */
  public List<Rule> getAllRules() {
    
    List<Rule> results = this.getResults();
    
    List<Rule> result = new ArrayList<Rule>(
        (results==null) ? Collections.<Rule>emptyList() : results);
      
    for (Node child : children.values()) {
      result.addAll(child.getAllRules());
    }
    
    return result;
  }
  
  /* See Javadoc for joshua.decoder.ff.tm.Trie#getRules */
  public RuleCollection getRules() {
        
    final int[] sourceSide = 
      (sourcePattern==null) 
      ? new int[]{}  
      : sourcePattern.getWordIDs();
      
    final int arity = 
      (sourcePattern==null) 
      ? 0 
      : sourcePattern.arity();
    
    List<Rule> results = this.getResults();
    
    return new BasicRuleCollection(arity, sourceSide, results);
    
  }
  
  /* See Javadoc for joshua.decoder.ff.tm.Trie#hasExtensions */
  public boolean hasExtensions() {
    return ! children.isEmpty();
  }
  
  /* See Javadoc for joshua.decoder.ff.tm.Trie#hasRules */
  public boolean hasRules() {
    
    if (active) {
      MatchedHierarchicalPhrases sourceHierarchicalPhrases = this.getMatchedPhrases();


      return ! sourceHierarchicalPhrases.isEmpty();
    } else {
      return false;
    }
  }
  
  /* See Javadoc for joshua.decoder.ff.tm.Trie#matchOne */
  public Trie matchOne(int symbol) {
    if (children.containsKey(symbol)) {
      Node child = children.get(symbol);
      if (child.active) {
        return child;
      } else {
        return null;
      }
//      return children.get(symbol);
    } else {
      return null;
    }
  }


  /* See Javadoc for joshua.decoder.ff.tm.Trie#getExtensions */
  public Collection<Node> getExtensions() {
    return this.children.values();
  }
  
  /* See Javadoc for joshua.decoder.ff.tm.Grammar#getTrieRoot */
  public Trie getTrieRoot() {
    return this;
  }
  
  /**
   * Determines whether this node has a specified child.
   * 
   * @param child
   * @return <code>true</code> if this node has a specified child,
   *         <code>false</code> otherwise
   */
  public boolean hasChild(int child) {
    return children.containsKey(child);
  }


  public Node getChild(int child) {
    return children.get(child);
  }


  public Node addChild(int child) {
    if (children.containsKey(child)) {
      throw new ChildNodeAlreadyExistsException(this, child);
    } else {
      Node node = new Node(this);
      children.put(child, node);
      return node;
    }
  }


  /**
   * Sets the suffix link for this node.
   * 
   * @param suffix Suffix link for this node
   */
  public void linkToSuffix(Node suffix) {
    this.suffixLink = suffix;
  }


  /**
   * Sets the lower and upper bounds in the suffix array
   * where the source pattern associated with this node
   * are located.
   * 
   * @param lowBound the lower bound in the suffix array
   *                 for the source pattern at this node
   * @param highBound the upper bound in the suffix array
   *                 for the source pattern at this node
   */
  public void setBounds(int lowBound, int highBound) {
    lowBoundIndex = lowBound;
    highBoundIndex = highBound;
  }




  /**
   * Stores in this node a list of source language hierarchical
   * phrases, the associated source language pattern, and the
   * list of associated translation rules.
   * <p>
   * This method is responsible for creating and storing
   * translation rules from the provided list of source
   * language hierarchical phrases.
   * 
   * @param hierarchicalPhrases Source language hierarchical phrases.
   */
  public void storeResults(MatchedHierarchicalPhrases hierarchicalPhrases, List<Rule> rules) {
    
    if (logger.isLoggable(Level.FINER)) {
      logger.finer("Storing " + hierarchicalPhrases.size() + " source phrases at node " + objectID + ":");
    }


    this.sourcePattern = hierarchicalPhrases.getPattern();
//    this.matchedPhrasesCache.put(sourcePattern, hierarchicalPhrases);
    
    //This is not needed, because this is put into the cache by HierarchicalRuleExtractor
//    this.parallelCorpus.getSuffixArray().getCachedRules().put(sourcePattern, rules);
    
    this.sourceHierarchicalPhrases = hierarchicalPhrases;
    
//    int numPhrases = hierarchicalPhrases.size();
//    if (numPhrases > 0) {
//      int lowerBound = hierarchicalPhrases.getFirstTerminalIndex(0);
//      int upperBound = hierarchicalPhrases.getFirstTerminalIndex(numPhrases-1);
//      this.setBounds(lowerBound, upperBound);
//    }
//    this.results = rules;
    
  }






  /**
   * Gets the number of rules stored in the grammar.
   * 
   * @return the number of rules stored in the grammar
   */
  public int getNumRules() {
    
    List<Rule> results = this.getResults();
    
    int numRules = 
      (results==null) ? 0 : results.size();


    if (children != null) {
      for (Node child : children.values()) {
        numRules += child.getNumRules();
      }
    }
    
    return numRules;
  }
  
  /**
   * Gets the number of nodes in the sub-tree rooted at this node.
   * <p>
   * This method recursively traverses through all nodes
   * in the sub-tree every time this method is called.
   * 
   * @return the number of nodes in the sub-tree rooted at this node
   */
  public int size() {


    int size = 1;


    for (Node child : children.values()) {
      size += child.size();
    }


    return size;
  }






  /* See Javadoc for java.lang.Object#hashCode */
  public int hashCode() {
    return objectID*31;
  }
  
  /**
   * Compares this node to another node
   * based solely on their respective objectIDs.
   * 
   * @param o Another node
   * @return <code>true</code> if this node's objectID 
   *         is equal to the other objectID,
   *         false otherwise
   */
  public boolean equals(Object o) {
    if (this==o) {
      return true;
    } else if (o instanceof Node) {
      Node other = (Node) o;
      return (objectID == other.objectID);
    } else {
      return false;
    }
  }
  
  /**
   * Compares this node to another node
   * based solely on their respective objectIDs.
   * 
   * @param o Another node
   * @return -1 if this node's objectID is less than the other objectID,
   *          0 if this node's objectID is equal to the other objectID,
   *          1 if this node's objectID is greater than the other objectID
   */
  public int compareTo(Node o) {
    Integer i = objectID;
    Integer j = o.objectID;


    return i.compareTo(j);
  }
  
  /**
   * Gets a String representation of the sub-tree rooted at this node.
   * 
   * @return a String representation of the sub-tree rooted at this node
   */
  public String toString(SymbolTable vocab, int incomingArcValue) {
    
    StringBuilder s = new StringBuilder();


    s.append("[id");
    s.append(objectID);
    s.append(' ');
    
    if (incomingArcValue==SymbolTable.X) {
      s.append('X');
    } else if (incomingArcValue==PrefixTree.ROOT_NODE_ID) {
      s.append("ROOT");
    } else if (vocab!=null) {
      s.append(vocab.getWord(incomingArcValue));
    } else {
      s.append('v');
      s.append(incomingArcValue);
    } 


    s.append(" (");
    if (null != suffixLink) {
      s.append(suffixLink.objectID);
    } else {
      s.append("null");
    }
    s.append(')');
    s.append(' ');


    ArrayList<Map.Entry<Integer, Node>> k = new ArrayList<Map.Entry<Integer, Node>>(children.entrySet());
    Collections.sort(k, NodeEntryComparator.get());
    
    for (Map.Entry<Integer, Node> kidEntry : k) {
      Integer arcValue = kidEntry.getKey();
      Node kid = kidEntry.getValue();
      
      s.append(kid.toString(vocab, arcValue));
      s.append(' ');
    }


    if (!active) s.append('*');
    s.append(']');


    return s.toString();


  }


  String toShortString(SymbolTable vocab) {
    
    StringBuilder s = new StringBuilder();


    s.append("[id");
    s.append(objectID);
    s.append(' ');
    
    s.append(" (");
    if (null != suffixLink) {
      s.append(suffixLink.objectID);
    } else {
      s.append("null");
    }
    s.append(')');
    s.append(' ');


    s.append('{');
    s.append(children.size());
    s.append(" children}");


    if (!active) s.append('*');
    s.append(']');


    return s.toString();
  }
  
  protected String toTreeString(String tabs, SymbolTable vocab, int incomingArcValue) {


    StringBuilder s = new StringBuilder();


    s.append(tabs); 
    s.append("[id");
    s.append(objectID);
    s.append(' ');


    if (incomingArcValue==SymbolTable.X) {
      s.append('X');
    } else if (incomingArcValue==PrefixTree.ROOT_NODE_ID) {
      s.append("ROOT");
    } else if (vocab!=null) {
      s.append(vocab.getWord(incomingArcValue));
    } else {
      s.append('v');
      s.append(incomingArcValue);
    } 


    s.append(" (");
    if (null != suffixLink) {
      s.append(suffixLink.objectID);
    } else {
      s.append("null");
    }
    s.append(')');


    if (children.size() > 0) {
      s.append(" \n\n");


      ArrayList<Map.Entry<Integer, Node>> k = new ArrayList<Map.Entry<Integer, Node>>(children.entrySet());
      Collections.sort(k, NodeEntryComparator.get());


      for (Map.Entry<Integer, Node> kidEntry : k) {
        Integer arcValue = kidEntry.getKey();
        Node kid = kidEntry.getValue();


        s.append(kid.toTreeString(tabs+"\t", vocab, arcValue));
        s.append(' ');
      }


      s.append(tabs);
    } else {
      s.append(' ');
    }


    if (!active) s.append('*');
    s.append(']');


    return s.toString();


  }
  


  static int nodeIDCounter = 2;
  
  static void resetNodeCounter() {
    nodeIDCounter = 2;
  }


//  public Rule constructManualRule(int lhs, int[] sourceWords, int[] targetWords, float[] scores, int arity) {
//    return new BilingualRule(lhs, sourceWords, targetWords, scores, arity, this.defaultOwner, 0, getOOVRuleID());
//  }
//  
//
//
//  public int getOOVRuleID() {
//    return OOV_RULE_ID;
//  }
//  /** 
//   * if the span covered by the chart bin is greater than the
//   * limit, then return false
//   */
//  public boolean hasRuleForSpan(int startIndex,  int endIndex,  int pathLength) {
//    if (this.spanLimit == -1) { // mono-glue grammar
//      return (startIndex == 0);
//    } else {
//      return (endIndex - startIndex <= this.spanLimit);
//    }
//  }
//
//  public Rule constructOOVRule(int qtyFeatures, int sourceWord, int targetWord, boolean hasLM) {
//    int[] french      = new int[1];
//    french[0]         = sourceWord;
//    int[] english       = new int[1];
//    english[0]          = targetWord;
//    float[] feat_scores = new float[qtyFeatures];
//    
//    // TODO: This is a hack to make the decoding without a LM works
//    /**when a ngram LM is used, the OOV word will have a cost 100.
//     * if no LM is used for decoding, so we should set the cost of some
//     * TM feature to be maximum
//     * */
//    if ( (!hasLM) && qtyFeatures > 0) { 
//      feat_scores[0] = oovFeatureCost;
//    }
//    
//    return new BilingualRule(this.defaultLHS, french, english, feat_scores, 0, this.defaultOwner, 0, getOOVRuleID());
//  }


}
Source Code of joshua.prefix_tree.Node

Related Classes of joshua.prefix_tree.Node