Package joshua.decoder.chart_parser

Source Code of joshua.decoder.chart_parser.Cell

/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1n_pruned
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.decoder.chart_parser;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.logging.Level;
import java.util.logging.Logger;

import joshua.decoder.JoshuaConfiguration;
import joshua.decoder.ff.FeatureFunction;
import joshua.decoder.ff.state_maintenance.DPState;
import joshua.decoder.ff.tm.Rule;
import joshua.decoder.hypergraph.HGNode;
import joshua.decoder.hypergraph.HyperEdge;


/**
* this class implement functions:
* (1) combine small itesm into larger ones using rules, and create
*     items and hyper-edges to construct a hyper-graph,
* (2) evaluate model score for items,
* (3) cube-pruning
* Note: Bin creates Items, but not all Items will be used in the
* hyper-graph
*
* @author Zhifei Li, <zhifei.work@gmail.com>
* @version $LastChangedDate: 2010-02-03 14:58:06 -0600 (Wed, 03 Feb 2010) $
*/
class Cell {
 
//===============================================================
// Private instance fields
//=============================================================== 
  private Chart chart = null;
 
  public BeamPruner<HGNode> beamPruner;//TODO: CubePruneCombiner access this
 
  private int goalSymID;
 
   
  // to maintain uniqueness of nodes
  private HashMap<String,HGNode> nodesSigTbl = new HashMap<String,HGNode>();
 
  // signature by lhs
  private Map<Integer,SuperNode> superNodesTbl = new HashMap<Integer,SuperNode>();
 
  /** sort values in nodesSigTbl,
   * we need this list when necessary
   */
  private List<HGNode> sortedNodes = null;
 
 
 
//===============================================================
// Static fields
//===============================================================
  private static final Logger logger = Logger.getLogger(Cell.class.getName());
 
 
//===============================================================
// Constructor
//===============================================================
 
  public Cell(Chart chart, int goalSymID) {
    this.chart     = chart;
    this.goalSymID = goalSymID;
   
    if(JoshuaConfiguration.useBeamAndThresholdPrune){
      PriorityQueue<HGNode> nodesHeap = new PriorityQueue<HGNode>(1, HGNode.logPComparator);   
      beamPruner = new BeamPruner<HGNode>(nodesHeap, JoshuaConfiguration.relative_threshold, JoshuaConfiguration.max_n_items);
    }
  }
 
 
//===============================================================
// Package-protected methods
//===============================================================
 
 
  /**
   * add all the items with GOAL_SYM state into the goal bin
   * the goal bin has only one Item, which itself has many
   * hyperedges only "goal bin" should call this function
   */
  //note that thei nput bin is  bin[0][n], not the goal bin
  void transitToGoal(Cell bin, List<FeatureFunction> featureFunctions, int sentenceLength) {
    this.sortedNodes = new ArrayList<HGNode>();
    HGNode goalItem = null;
   
    for (HGNode antNode : bin.getSortedNodes()) {
      if (antNode.lhs == this.goalSymID) {
        double logP = antNode.bestHyperedge.bestDerivationLogP;
        List<HGNode> antNodes = new ArrayList<HGNode>();
        antNodes.add(antNode);
       
        double finalTransitionLogP = ComputeNodeResult.computeCombinedTransitionLogP(featureFunctions, null, antNodes, 0, sentenceLength, null, this.chart.segmentID);
                   
        List<HGNode> previousItems = new ArrayList<HGNode>();
        previousItems.add(antNode);
       
        HyperEdge dt = new HyperEdge(null, logP + finalTransitionLogP, finalTransitionLogP, previousItems, null);
               
        if (null == goalItem) {
          goalItem = new HGNode(0, sentenceLength + 1, this.goalSymID, null, dt, logP + finalTransitionLogP);
          this.sortedNodes.add(goalItem);
        } else {
          goalItem.addHyperedgeInNode(dt);
        }
      } // End if item.lhs == this.goalSymID
    } // End foreach Item in bin.get_sorted_items()
   
   
    if (logger.isLoggable(Level.INFO)) {
      if (null == goalItem) {
        logger.severe("goalItem is null!");
      } else {
        logger.info(String.format("Sentence id=" + this.chart.segmentID +"; BestlogP=%.3f",
          goalItem.bestHyperedge.bestDerivationLogP));
      }
    }
    ensureSorted();
   
    int itemsInGoalBin = getSortedNodes().size();
    if (1 != itemsInGoalBin) {   
      throw new RuntimeException("the goal_bin does not have exactly one item");
    }
  }
 
 
 
  /**in order to add a hyperedge into the chart, we need to
   * (1) do the combination, and compute the logP (if pass the cube-prunning filter)
   * (2) run through the beam and threshold pruning, which itself has two steps.
   * */
 
  /**a note about pruning:
   * when a hyperedge gets created, it first needs to pass through shouldPruneEdge filter.
   * Then, if it does not trigger a new node (i.e. will be merged to an old node), then does not trigger pruningNodes.
   * If it does trigger a new node (either because its signature is new or because its logP is better than the old node's logP),
   * then it will trigger pruningNodes, which might causes *other* nodes got pruned as well
   * */
 
 
  /**create a hyperege, and add it into the chart if not got prunned
   * */
  HGNode addHyperEdgeInCell(
    ComputeNodeResult result, Rule rule, int i, int j,
    List<HGNode> ants, SourcePath srcPath, boolean noPrune
  ) {
    HGNode res = null;
   
    HashMap<Integer,DPState> dpStates = result.getDPStates();
    double expectedTotalLogP  = result.getExpectedTotalLogP(); // including outside estimation
    double transitionLogP    = result.getTransitionTotalLogP();
    double finalizedTotalLogP = result.getFinalizedTotalLogP();
   
   
   
    if(noPrune==false && beamPruner!=null &&  beamPruner.relativeThresholdPrune(expectedTotalLogP)){//the hyperedge should be pruned
      this.chart.nPreprunedEdges++;
      res = null;
    }else{
      HyperEdge dt = new HyperEdge(rule, finalizedTotalLogP, transitionLogP, ants, srcPath);
      res = new HGNode(i, j, rule.getLHS(), dpStates, dt, expectedTotalLogP);
     
      /** each node has a list of hyperedges,
       * need to check whether the node is already exist,
       * if yes, just add the hyperedges, this may change the best logP of the node
       * */
      HGNode oldNode = this.nodesSigTbl.get( res.getSignature() );
      if (null != oldNode) { // have an item with same states, combine items
        this.chart.nMerged++;
       
        /** the position of oldItem in this.heapItems
         *  may change, basically, we should remove the
         *  oldItem, and re-insert it (linear time), this is too expense)
         **/
        if ( res.getPruneLogP() > oldNode.getPruneLogP() ) {//merget old to new: semiring plus         

          if(beamPruner!=null){
            oldNode.setDead();// this.heapItems.remove(oldItem);
            beamPruner.incrementDeadObjs();
          }
         
          res.addHyperedgesInNode(oldNode.hyperedges);
          addNewNode(res, noPrune); //this will update the HashMap, so that the oldNode is destroyed
         
        } else {//merge new to old, does not trigger pruningItems
          oldNode.addHyperedgesInNode(res.hyperedges);
        }
       
      } else { // first time item
        this.chart.nAdded++; // however, this item may not be used in the future due to pruning in the hyper-graph
        addNewNode(res, noPrune);
      }
    }
    return res;
  }
 
 
  List<HGNode> getSortedNodes() {
    ensureSorted();
    return this.sortedNodes;
  }
 
 
  Map<Integer,SuperNode> getSortedSuperItems() {
    ensureSorted();
    return this.superNodesTbl;
  }
 
  
 
//===============================================================
// Private Methods
//===============================================================

  /**two cases this function gets called
   * (1) a new hyperedge leads to a non-existing node signature
   * (2) a new hyperedge's signature matches an old node's signature, but the best-logp of old node is worse than the new hyperedge's logP
   * */
  private void addNewNode(HGNode node, boolean noPrune) {
    this.nodesSigTbl.put(node.getSignature(), node); // add/replace the item
    this.sortedNodes = null; // reset the list
     
 
    if(beamPruner!=null){
      if(noPrune==false){
        List<HGNode> prunedNodes = beamPruner.addOneObjInHeapWithPrune(node);
        this.chart.nPrunedItems += prunedNodes.size();
        for(HGNode prunedNode : prunedNodes)
          nodesSigTbl.remove(prunedNode.getSignature());
      }else{
        beamPruner.addOneObjInHeapWithoutPrune(node);
      }
    } 
   
    //since this.sortedItems == null, this is not necessary because we will always call ensure_sorted to reconstruct the this.tableSuperItems
    //add a super-items if necessary
    SuperNode si = this.superNodesTbl.get(node.lhs);
    if (null == si) {
      si = new SuperNode(node.lhs);
      this.superNodesTbl.put(node.lhs, si);
    }
    si.nodes.add(node);//TODO what about the dead items?
   
 
  }

 
 
  /** get a sorted list of Nodes in the cell, and also make
   * sure the list of node in any SuperItem is sorted, this
   * will be called only necessary, which means that the list
   * is not always sorted, mainly needed for goal_bin and
   * cube-pruning
   */
  private void ensureSorted() {
   
    if (null == this.sortedNodes) {
      //== get sortedNodes
      //HGNode[] tCollection =(HGNode[])((Collection<HGNode>)this.nodesSigTbl.values()).toArray();
      HGNode[] nodesArray = new HGNode[this.nodesSigTbl.size()];
      int i=0;
      for(HGNode node : this.nodesSigTbl.values() )
        nodesArray[i++]= node;
     
      /**sort the node in an decreasing-LogP order
       * */
      Arrays.sort(nodesArray, HGNode.inverseLogPComparator);
     
      this.sortedNodes = new ArrayList<HGNode>();
      for (HGNode node : nodesArray) {
        this.sortedNodes.add(node);
        //System.out.println(node.getPruneLogP());
      }
     
     
     
      //TODO: we cannot create new SuperItem here because the DotItem link to them
     
      //== update superNodesTbl
      List<SuperNode> tem_list = new ArrayList<SuperNode>(this.superNodesTbl.values());
      for (SuperNode t_si : tem_list) {
        t_si.nodes.clear();
      }
     
      for (HGNode it : this.sortedNodes) {
        SuperNode si = this.superNodesTbl.get(it.lhs);
        if (null == si) { // sanity check
          throw new RuntimeException("Does not have super Item, have to exist");
        }
        si.nodes.add(it);
      }
     
      //== remove SuperNodes who may not contain any node any more due to pruning
      List<Integer> toRemove = new ArrayList<Integer>();
      for (Integer k : this.superNodesTbl.keySet()) {
        if (this.superNodesTbl.get(k).nodes.size() <= 0) {
           // note that: we cannot directly do the remove, because it will throw ConcurrentModificationException
          toRemove.add(k);
          //System.out.println("have zero items in superitem " + k);
          //this.tableSuperItems.remove(k);
        }
      }
      for (Integer t : toRemove) {
        this.superNodesTbl.remove(t);
      }
    }
  }
}
TOP

Related Classes of joshua.decoder.chart_parser.Cell

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.