/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1n_pruned
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.decoder.chart_parser;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.logging.Level;
import java.util.logging.Logger;
import joshua.decoder.JoshuaConfiguration;
import joshua.decoder.ff.FeatureFunction;
import joshua.decoder.ff.state_maintenance.DPState;
import joshua.decoder.ff.tm.Rule;
import joshua.decoder.hypergraph.HGNode;
import joshua.decoder.hypergraph.HyperEdge;
/**
* this class implement functions:
* (1) combine small itesm into larger ones using rules, and create
* items and hyper-edges to construct a hyper-graph,
* (2) evaluate model score for items,
* (3) cube-pruning
* Note: Bin creates Items, but not all Items will be used in the
* hyper-graph
*
* @author Zhifei Li, <zhifei.work@gmail.com>
* @version $LastChangedDate: 2010-02-03 14:58:06 -0600 (Wed, 03 Feb 2010) $
*/
class Cell {
//===============================================================
// Private instance fields
//===============================================================
private Chart chart = null;
public BeamPruner<HGNode> beamPruner;//TODO: CubePruneCombiner access this
private int goalSymID;
// to maintain uniqueness of nodes
private HashMap<String,HGNode> nodesSigTbl = new HashMap<String,HGNode>();
// signature by lhs
private Map<Integer,SuperNode> superNodesTbl = new HashMap<Integer,SuperNode>();
/** sort values in nodesSigTbl,
* we need this list when necessary
*/
private List<HGNode> sortedNodes = null;
//===============================================================
// Static fields
//===============================================================
private static final Logger logger = Logger.getLogger(Cell.class.getName());
//===============================================================
// Constructor
//===============================================================
public Cell(Chart chart, int goalSymID) {
this.chart = chart;
this.goalSymID = goalSymID;
if(JoshuaConfiguration.useBeamAndThresholdPrune){
PriorityQueue<HGNode> nodesHeap = new PriorityQueue<HGNode>(1, HGNode.logPComparator);
beamPruner = new BeamPruner<HGNode>(nodesHeap, JoshuaConfiguration.relative_threshold, JoshuaConfiguration.max_n_items);
}
}
//===============================================================
// Package-protected methods
//===============================================================
/**
* add all the items with GOAL_SYM state into the goal bin
* the goal bin has only one Item, which itself has many
* hyperedges only "goal bin" should call this function
*/
//note that thei nput bin is bin[0][n], not the goal bin
void transitToGoal(Cell bin, List<FeatureFunction> featureFunctions, int sentenceLength) {
this.sortedNodes = new ArrayList<HGNode>();
HGNode goalItem = null;
for (HGNode antNode : bin.getSortedNodes()) {
if (antNode.lhs == this.goalSymID) {
double logP = antNode.bestHyperedge.bestDerivationLogP;
List<HGNode> antNodes = new ArrayList<HGNode>();
antNodes.add(antNode);
double finalTransitionLogP = ComputeNodeResult.computeCombinedTransitionLogP(featureFunctions, null, antNodes, 0, sentenceLength, null, this.chart.segmentID);
List<HGNode> previousItems = new ArrayList<HGNode>();
previousItems.add(antNode);
HyperEdge dt = new HyperEdge(null, logP + finalTransitionLogP, finalTransitionLogP, previousItems, null);
if (null == goalItem) {
goalItem = new HGNode(0, sentenceLength + 1, this.goalSymID, null, dt, logP + finalTransitionLogP);
this.sortedNodes.add(goalItem);
} else {
goalItem.addHyperedgeInNode(dt);
}
} // End if item.lhs == this.goalSymID
} // End foreach Item in bin.get_sorted_items()
if (logger.isLoggable(Level.INFO)) {
if (null == goalItem) {
logger.severe("goalItem is null!");
} else {
logger.info(String.format("Sentence id=" + this.chart.segmentID +"; BestlogP=%.3f",
goalItem.bestHyperedge.bestDerivationLogP));
}
}
ensureSorted();
int itemsInGoalBin = getSortedNodes().size();
if (1 != itemsInGoalBin) {
throw new RuntimeException("the goal_bin does not have exactly one item");
}
}
/**in order to add a hyperedge into the chart, we need to
* (1) do the combination, and compute the logP (if pass the cube-prunning filter)
* (2) run through the beam and threshold pruning, which itself has two steps.
* */
/**a note about pruning:
* when a hyperedge gets created, it first needs to pass through shouldPruneEdge filter.
* Then, if it does not trigger a new node (i.e. will be merged to an old node), then does not trigger pruningNodes.
* If it does trigger a new node (either because its signature is new or because its logP is better than the old node's logP),
* then it will trigger pruningNodes, which might causes *other* nodes got pruned as well
* */
/**create a hyperege, and add it into the chart if not got prunned
* */
HGNode addHyperEdgeInCell(
ComputeNodeResult result, Rule rule, int i, int j,
List<HGNode> ants, SourcePath srcPath, boolean noPrune
) {
HGNode res = null;
HashMap<Integer,DPState> dpStates = result.getDPStates();
double expectedTotalLogP = result.getExpectedTotalLogP(); // including outside estimation
double transitionLogP = result.getTransitionTotalLogP();
double finalizedTotalLogP = result.getFinalizedTotalLogP();
if(noPrune==false && beamPruner!=null && beamPruner.relativeThresholdPrune(expectedTotalLogP)){//the hyperedge should be pruned
this.chart.nPreprunedEdges++;
res = null;
}else{
HyperEdge dt = new HyperEdge(rule, finalizedTotalLogP, transitionLogP, ants, srcPath);
res = new HGNode(i, j, rule.getLHS(), dpStates, dt, expectedTotalLogP);
/** each node has a list of hyperedges,
* need to check whether the node is already exist,
* if yes, just add the hyperedges, this may change the best logP of the node
* */
HGNode oldNode = this.nodesSigTbl.get( res.getSignature() );
if (null != oldNode) { // have an item with same states, combine items
this.chart.nMerged++;
/** the position of oldItem in this.heapItems
* may change, basically, we should remove the
* oldItem, and re-insert it (linear time), this is too expense)
**/
if ( res.getPruneLogP() > oldNode.getPruneLogP() ) {//merget old to new: semiring plus
if(beamPruner!=null){
oldNode.setDead();// this.heapItems.remove(oldItem);
beamPruner.incrementDeadObjs();
}
res.addHyperedgesInNode(oldNode.hyperedges);
addNewNode(res, noPrune); //this will update the HashMap, so that the oldNode is destroyed
} else {//merge new to old, does not trigger pruningItems
oldNode.addHyperedgesInNode(res.hyperedges);
}
} else { // first time item
this.chart.nAdded++; // however, this item may not be used in the future due to pruning in the hyper-graph
addNewNode(res, noPrune);
}
}
return res;
}
List<HGNode> getSortedNodes() {
ensureSorted();
return this.sortedNodes;
}
Map<Integer,SuperNode> getSortedSuperItems() {
ensureSorted();
return this.superNodesTbl;
}
//===============================================================
// Private Methods
//===============================================================
/**two cases this function gets called
* (1) a new hyperedge leads to a non-existing node signature
* (2) a new hyperedge's signature matches an old node's signature, but the best-logp of old node is worse than the new hyperedge's logP
* */
private void addNewNode(HGNode node, boolean noPrune) {
this.nodesSigTbl.put(node.getSignature(), node); // add/replace the item
this.sortedNodes = null; // reset the list
if(beamPruner!=null){
if(noPrune==false){
List<HGNode> prunedNodes = beamPruner.addOneObjInHeapWithPrune(node);
this.chart.nPrunedItems += prunedNodes.size();
for(HGNode prunedNode : prunedNodes)
nodesSigTbl.remove(prunedNode.getSignature());
}else{
beamPruner.addOneObjInHeapWithoutPrune(node);
}
}
//since this.sortedItems == null, this is not necessary because we will always call ensure_sorted to reconstruct the this.tableSuperItems
//add a super-items if necessary
SuperNode si = this.superNodesTbl.get(node.lhs);
if (null == si) {
si = new SuperNode(node.lhs);
this.superNodesTbl.put(node.lhs, si);
}
si.nodes.add(node);//TODO what about the dead items?
}
/** get a sorted list of Nodes in the cell, and also make
* sure the list of node in any SuperItem is sorted, this
* will be called only necessary, which means that the list
* is not always sorted, mainly needed for goal_bin and
* cube-pruning
*/
private void ensureSorted() {
if (null == this.sortedNodes) {
//== get sortedNodes
//HGNode[] tCollection =(HGNode[])((Collection<HGNode>)this.nodesSigTbl.values()).toArray();
HGNode[] nodesArray = new HGNode[this.nodesSigTbl.size()];
int i=0;
for(HGNode node : this.nodesSigTbl.values() )
nodesArray[i++]= node;
/**sort the node in an decreasing-LogP order
* */
Arrays.sort(nodesArray, HGNode.inverseLogPComparator);
this.sortedNodes = new ArrayList<HGNode>();
for (HGNode node : nodesArray) {
this.sortedNodes.add(node);
//System.out.println(node.getPruneLogP());
}
//TODO: we cannot create new SuperItem here because the DotItem link to them
//== update superNodesTbl
List<SuperNode> tem_list = new ArrayList<SuperNode>(this.superNodesTbl.values());
for (SuperNode t_si : tem_list) {
t_si.nodes.clear();
}
for (HGNode it : this.sortedNodes) {
SuperNode si = this.superNodesTbl.get(it.lhs);
if (null == si) { // sanity check
throw new RuntimeException("Does not have super Item, have to exist");
}
si.nodes.add(it);
}
//== remove SuperNodes who may not contain any node any more due to pruning
List<Integer> toRemove = new ArrayList<Integer>();
for (Integer k : this.superNodesTbl.keySet()) {
if (this.superNodesTbl.get(k).nodes.size() <= 0) {
// note that: we cannot directly do the remove, because it will throw ConcurrentModificationException
toRemove.add(k);
//System.out.println("have zero items in superitem " + k);
//this.tableSuperItems.remove(k);
}
}
for (Integer t : toRemove) {
this.superNodesTbl.remove(t);
}
}
}
}