/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.prefix_tree;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import joshua.corpus.MatchedHierarchicalPhrases;
import joshua.corpus.RuleExtractor;
import joshua.corpus.suffix_array.ParallelCorpusGrammarFactory;
import joshua.corpus.suffix_array.Pattern;
import joshua.corpus.vocab.SymbolTable;
import joshua.decoder.ff.tm.BasicRuleCollection;
import joshua.decoder.ff.tm.Rule;
import joshua.decoder.ff.tm.RuleCollection;
import joshua.decoder.ff.tm.Trie;
import joshua.util.Cache;
/**
* Represents a node in a prefix tree.
*
* @author Lane Schwartz
*/
public class Node implements Comparable<Node>, Trie {
/** Logger for this class. */
private static final Logger logger =
Logger.getLogger(Node.class.getName());
/** Unique integer identifier for this node. */
final int objectID;
/**
* The lower bound in the suffix array
* for the source pattern at this node.
*/
int lowBoundIndex;
/**
* The upper bound in the suffix array
* for the source pattern at this node.
*/
int highBoundIndex;
/** Indicates whether this is an active node. */
boolean active;
/** Suffix link for this node. */
Node suffixLink;
/**
* Maps from integer representations of words to nodes.
* <p>
* TODO It may be better to have a single map in PrefixTree that maps (Node,Integer) --> Node
*/
Map<Integer,Node> children;
/** Source side hierarchical phrases for this node. */
MatchedHierarchicalPhrases sourceHierarchicalPhrases;
// List<Rule> results;
protected final ParallelCorpusGrammarFactory parallelCorpus;
// private final Suffixes suffixArray;
// private final Cache<Pattern, List<Rule>> ruleCache;
// private final Cache<Pattern, MatchedHierarchicalPhrases> matchedPhrasesCache;
Pattern sourcePattern;
////================================
// //add by zhifei??????????????????????????????????????????? these parameters are not intialized by the constructor
// public static final int OOV_RULE_ID = 0;
// private int defaultOwner;
// private float oovFeatureCost = 100;
//
// /**
// * the OOV rule should have this lhs, this should be grammar
// * specific as only the grammar knows what LHS symbol can
// * be combined with other rules
// */
// private int defaultLHS;
// private int spanLimit = 10;
////==============================
//
/**
* Gets translation rules for this node.
* <p>
* The results of this method are guaranteed to be
* sorted according to whatever feature functions are in use.
*
* Calling this method will return results equivalent to those
* that would be returned by calling
* <code>HierarchicalRuleExtractor#extractRules(getMatchedPhrases())</code>.
*
* @see RuleExtractor#extractRules(MatchedHierarchicalPhrases)
* @return translation rules for this node
*/
protected List<Rule> getResults() {
Cache<Pattern,List<Rule>> ruleCache = parallelCorpus.getSuffixArray().getCachedRules();
List<Rule> results;
if (ruleCache.containsKey(sourcePattern)) {
results = ruleCache.get(sourcePattern);
// The rules from the cache are guaranteed to be sorted.
} else {
results = parallelCorpus.getRuleExtractor().extractRules(getMatchedPhrases());
// The above list of rules extracted is guaranteed to be sorted.
ruleCache.put(sourcePattern, results);
}
// These rules are sorted.
return results;
}
protected MatchedHierarchicalPhrases getMatchedPhrases() {
//TODO Implement this method
return this.sourceHierarchicalPhrases;
// MatchedHierarchicalPhrases results;
//
// if (matchedPhrasesCache.containsKey(sourcePattern)) {
// results = matchedPhrasesCache.get(sourcePattern);
// } else {
//
// // Do some extra lookup
//
//
// throw new RuntimeException("This code not yet implemented");
//
// }
//
// return results;
}
Node(Node parent) {
// this(parent.ruleCache, parent.matchedPhrasesCache, true);
this(parent.parallelCorpus, true, nodeIDCounter++);
}
Node(ParallelCorpusGrammarFactory parallelCorpus, int objectID) {
this(parallelCorpus, true, objectID);
// this(
// (suffixArray==null ? null : suffixArray.getCachedRules()),
// (suffixArray==null ? null : suffixArray.getCachedHierarchicalPhrases()),
// true, objectID);
}
Node(ParallelCorpusGrammarFactory parallelCorpus, boolean active) {
this(parallelCorpus, active, nodeIDCounter++);
}
// Node(Cache<Pattern, List<Rule>> ruleCache, Cache<Pattern, MatchedHierarchicalPhrases> matchedPhrasesCache, boolean active) {
// this(ruleCache, matchedPhrasesCache, active, nodeIDCounter++);
// }
// Node(Cache<Pattern, List<Rule>> ruleCache, Cache<Pattern, MatchedHierarchicalPhrases> matchedPhrasesCache, boolean active, int objectID) {
Node(ParallelCorpusGrammarFactory parallelCorpus, boolean active, int objectID) {
// this.ruleCache = ruleCache;
// this.matchedPhrasesCache = matchedPhrasesCache;
this.parallelCorpus = parallelCorpus;
// this.suffixArray = suffixArray;
this.active = active;
this.suffixLink = null;
this.children = new HashMap<Integer,Node>();
this.objectID = objectID;
this.sourceHierarchicalPhrases = null;//HierarchicalPhrases.emptyList((SymbolTable) null);
// this.results = Collections.emptyList();
}
Node calculateSuffixLink(int endOfPattern) {
Node suffixLink = this.suffixLink.getChild(endOfPattern);
if (suffixLink==null) {
throw new NoSuchChildNodeException(this, endOfPattern);
}
return suffixLink;
}
/**
* Gets the representation of the source side tokens corresponding
* to the hierarchical phrases for this node.
*
* @return the source language pattern for this node
*/
public Pattern getSourcePattern() {
// return sourceHierarchicalPhrases.getPattern();
return sourcePattern;
}
/**
* Gets rules for this node and the children of this node.
*
* @return rules for this node and the children of this node.
*/
public List<Rule> getAllRules() {
List<Rule> results = this.getResults();
List<Rule> result = new ArrayList<Rule>(
(results==null) ? Collections.<Rule>emptyList() : results);
for (Node child : children.values()) {
result.addAll(child.getAllRules());
}
return result;
}
/* See Javadoc for joshua.decoder.ff.tm.Trie#getRules */
public RuleCollection getRules() {
final int[] sourceSide =
(sourcePattern==null)
? new int[]{}
: sourcePattern.getWordIDs();
final int arity =
(sourcePattern==null)
? 0
: sourcePattern.arity();
List<Rule> results = this.getResults();
return new BasicRuleCollection(arity, sourceSide, results);
}
/* See Javadoc for joshua.decoder.ff.tm.Trie#hasExtensions */
public boolean hasExtensions() {
return ! children.isEmpty();
}
/* See Javadoc for joshua.decoder.ff.tm.Trie#hasRules */
public boolean hasRules() {
if (active) {
MatchedHierarchicalPhrases sourceHierarchicalPhrases = this.getMatchedPhrases();
return ! sourceHierarchicalPhrases.isEmpty();
} else {
return false;
}
}
/* See Javadoc for joshua.decoder.ff.tm.Trie#matchOne */
public Trie matchOne(int symbol) {
if (children.containsKey(symbol)) {
Node child = children.get(symbol);
if (child.active) {
return child;
} else {
return null;
}
// return children.get(symbol);
} else {
return null;
}
}
/* See Javadoc for joshua.decoder.ff.tm.Trie#getExtensions */
public Collection<Node> getExtensions() {
return this.children.values();
}
/* See Javadoc for joshua.decoder.ff.tm.Grammar#getTrieRoot */
public Trie getTrieRoot() {
return this;
}
/**
* Determines whether this node has a specified child.
*
* @param child
* @return <code>true</code> if this node has a specified child,
* <code>false</code> otherwise
*/
public boolean hasChild(int child) {
return children.containsKey(child);
}
public Node getChild(int child) {
return children.get(child);
}
public Node addChild(int child) {
if (children.containsKey(child)) {
throw new ChildNodeAlreadyExistsException(this, child);
} else {
Node node = new Node(this);
children.put(child, node);
return node;
}
}
/**
* Sets the suffix link for this node.
*
* @param suffix Suffix link for this node
*/
public void linkToSuffix(Node suffix) {
this.suffixLink = suffix;
}
/**
* Sets the lower and upper bounds in the suffix array
* where the source pattern associated with this node
* are located.
*
* @param lowBound the lower bound in the suffix array
* for the source pattern at this node
* @param highBound the upper bound in the suffix array
* for the source pattern at this node
*/
public void setBounds(int lowBound, int highBound) {
lowBoundIndex = lowBound;
highBoundIndex = highBound;
}
/**
* Stores in this node a list of source language hierarchical
* phrases, the associated source language pattern, and the
* list of associated translation rules.
* <p>
* This method is responsible for creating and storing
* translation rules from the provided list of source
* language hierarchical phrases.
*
* @param hierarchicalPhrases Source language hierarchical phrases.
*/
public void storeResults(MatchedHierarchicalPhrases hierarchicalPhrases, List<Rule> rules) {
if (logger.isLoggable(Level.FINER)) {
logger.finer("Storing " + hierarchicalPhrases.size() + " source phrases at node " + objectID + ":");
}
this.sourcePattern = hierarchicalPhrases.getPattern();
// this.matchedPhrasesCache.put(sourcePattern, hierarchicalPhrases);
//This is not needed, because this is put into the cache by HierarchicalRuleExtractor
// this.parallelCorpus.getSuffixArray().getCachedRules().put(sourcePattern, rules);
this.sourceHierarchicalPhrases = hierarchicalPhrases;
// int numPhrases = hierarchicalPhrases.size();
// if (numPhrases > 0) {
// int lowerBound = hierarchicalPhrases.getFirstTerminalIndex(0);
// int upperBound = hierarchicalPhrases.getFirstTerminalIndex(numPhrases-1);
// this.setBounds(lowerBound, upperBound);
// }
// this.results = rules;
}
/**
* Gets the number of rules stored in the grammar.
*
* @return the number of rules stored in the grammar
*/
public int getNumRules() {
List<Rule> results = this.getResults();
int numRules =
(results==null) ? 0 : results.size();
if (children != null) {
for (Node child : children.values()) {
numRules += child.getNumRules();
}
}
return numRules;
}
/**
* Gets the number of nodes in the sub-tree rooted at this node.
* <p>
* This method recursively traverses through all nodes
* in the sub-tree every time this method is called.
*
* @return the number of nodes in the sub-tree rooted at this node
*/
public int size() {
int size = 1;
for (Node child : children.values()) {
size += child.size();
}
return size;
}
/* See Javadoc for java.lang.Object#hashCode */
public int hashCode() {
return objectID*31;
}
/**
* Compares this node to another node
* based solely on their respective objectIDs.
*
* @param o Another node
* @return <code>true</code> if this node's objectID
* is equal to the other objectID,
* false otherwise
*/
public boolean equals(Object o) {
if (this==o) {
return true;
} else if (o instanceof Node) {
Node other = (Node) o;
return (objectID == other.objectID);
} else {
return false;
}
}
/**
* Compares this node to another node
* based solely on their respective objectIDs.
*
* @param o Another node
* @return -1 if this node's objectID is less than the other objectID,
* 0 if this node's objectID is equal to the other objectID,
* 1 if this node's objectID is greater than the other objectID
*/
public int compareTo(Node o) {
Integer i = objectID;
Integer j = o.objectID;
return i.compareTo(j);
}
/**
* Gets a String representation of the sub-tree rooted at this node.
*
* @return a String representation of the sub-tree rooted at this node
*/
public String toString(SymbolTable vocab, int incomingArcValue) {
StringBuilder s = new StringBuilder();
s.append("[id");
s.append(objectID);
s.append(' ');
if (incomingArcValue==SymbolTable.X) {
s.append('X');
} else if (incomingArcValue==PrefixTree.ROOT_NODE_ID) {
s.append("ROOT");
} else if (vocab!=null) {
s.append(vocab.getWord(incomingArcValue));
} else {
s.append('v');
s.append(incomingArcValue);
}
s.append(" (");
if (null != suffixLink) {
s.append(suffixLink.objectID);
} else {
s.append("null");
}
s.append(')');
s.append(' ');
ArrayList<Map.Entry<Integer, Node>> k = new ArrayList<Map.Entry<Integer, Node>>(children.entrySet());
Collections.sort(k, NodeEntryComparator.get());
for (Map.Entry<Integer, Node> kidEntry : k) {
Integer arcValue = kidEntry.getKey();
Node kid = kidEntry.getValue();
s.append(kid.toString(vocab, arcValue));
s.append(' ');
}
if (!active) s.append('*');
s.append(']');
return s.toString();
}
String toShortString(SymbolTable vocab) {
StringBuilder s = new StringBuilder();
s.append("[id");
s.append(objectID);
s.append(' ');
s.append(" (");
if (null != suffixLink) {
s.append(suffixLink.objectID);
} else {
s.append("null");
}
s.append(')');
s.append(' ');
s.append('{');
s.append(children.size());
s.append(" children}");
if (!active) s.append('*');
s.append(']');
return s.toString();
}
protected String toTreeString(String tabs, SymbolTable vocab, int incomingArcValue) {
StringBuilder s = new StringBuilder();
s.append(tabs);
s.append("[id");
s.append(objectID);
s.append(' ');
if (incomingArcValue==SymbolTable.X) {
s.append('X');
} else if (incomingArcValue==PrefixTree.ROOT_NODE_ID) {
s.append("ROOT");
} else if (vocab!=null) {
s.append(vocab.getWord(incomingArcValue));
} else {
s.append('v');
s.append(incomingArcValue);
}
s.append(" (");
if (null != suffixLink) {
s.append(suffixLink.objectID);
} else {
s.append("null");
}
s.append(')');
if (children.size() > 0) {
s.append(" \n\n");
ArrayList<Map.Entry<Integer, Node>> k = new ArrayList<Map.Entry<Integer, Node>>(children.entrySet());
Collections.sort(k, NodeEntryComparator.get());
for (Map.Entry<Integer, Node> kidEntry : k) {
Integer arcValue = kidEntry.getKey();
Node kid = kidEntry.getValue();
s.append(kid.toTreeString(tabs+"\t", vocab, arcValue));
s.append(' ');
}
s.append(tabs);
} else {
s.append(' ');
}
if (!active) s.append('*');
s.append(']');
return s.toString();
}
static int nodeIDCounter = 2;
static void resetNodeCounter() {
nodeIDCounter = 2;
}
// public Rule constructManualRule(int lhs, int[] sourceWords, int[] targetWords, float[] scores, int arity) {
// return new BilingualRule(lhs, sourceWords, targetWords, scores, arity, this.defaultOwner, 0, getOOVRuleID());
// }
//
//
//
// public int getOOVRuleID() {
// return OOV_RULE_ID;
// }
// /**
// * if the span covered by the chart bin is greater than the
// * limit, then return false
// */
// public boolean hasRuleForSpan(int startIndex, int endIndex, int pathLength) {
// if (this.spanLimit == -1) { // mono-glue grammar
// return (startIndex == 0);
// } else {
// return (endIndex - startIndex <= this.spanLimit);
// }
// }
//
// public Rule constructOOVRule(int qtyFeatures, int sourceWord, int targetWord, boolean hasLM) {
// int[] french = new int[1];
// french[0] = sourceWord;
// int[] english = new int[1];
// english[0] = targetWord;
// float[] feat_scores = new float[qtyFeatures];
//
// // TODO: This is a hack to make the decoding without a LM works
// /**when a ngram LM is used, the OOV word will have a cost 100.
// * if no LM is used for decoding, so we should set the cost of some
// * TM feature to be maximum
// * */
// if ( (!hasLM) && qtyFeatures > 0) {
// feat_scores[0] = oovFeatureCost;
// }
//
// return new BilingualRule(this.defaultLHS, french, english, feat_scores, 0, this.defaultOwner, 0, getOOVRuleID());
// }
}