package joshua.discriminative.feature_related.feature_template;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import joshua.corpus.vocab.SymbolTable;
import joshua.decoder.ff.state_maintenance.NgramDPState;
import joshua.decoder.ff.tm.Rule;
import joshua.decoder.hypergraph.HGNode;
import joshua.discriminative.DiscriminativeSupport;
@Deprecated
public class EdgeBigramFT extends AbstractFeatureTemplate {
int baselineLMOrder =3;
SymbolTable symbolTbl;
int ngramStateID=0; //the baseline LM feature id
boolean useIntegerString = true;
public EdgeBigramFT(SymbolTable symbolTbl, int ngramStateID, int baselineLMOrder, boolean useIntegerString){
this.symbolTbl = symbolTbl;
this.ngramStateID = ngramStateID;
this.baselineLMOrder = baselineLMOrder;
this.useIntegerString = useIntegerString;
System.out.println("use edge ngram only");
}
public void getFeatureCounts(Rule rule, List<HGNode> antNodes, HashMap<String, Double> featureTbl, HashSet<String> restrictedFeatureSet, double scale) {
HashMap<String,Double> ngramsTbl = getEdgeBigrams(rule, antNodes, baselineLMOrder);
if(ngramsTbl!=null){
for(Map.Entry<String,Double> entry : ngramsTbl.entrySet() ){
String ngramFeatKey= entry.getKey();
if(restrictedFeatureSet ==null || restrictedFeatureSet.contains(ngramFeatKey)==true){
DiscriminativeSupport.increaseCount(featureTbl, ngramFeatKey, entry.getValue()*scale);
}
}
}
}
private HashMap<String,Double> getEdgeBigrams(Rule rule, List<HGNode> antNodes, int baselineLMOrder){
if(baselineLMOrder<=1){
System.out.println("lm order is too small");
System.exit(0);
}
if(rule==null){//##### deductions under "goal item" does not have rule
if(antNodes.size()!=1){
System.out.println("error deduction under goal item have more than one item");
System.exit(0);
}
return null;
}
if(rule.getArity()<=0){//in axiom, no bigram will be created, every ngram is from the rule which itself comes from the parallel corpora
return null;//empty hashmap
}
//################## not deductions under "goal item"
HashMap<String,Double> edgeBigrams = new HashMap<String,Double>();//new ngrams created due to the combination
Integer contextWord = null;
boolean afterNonterminal = false;
int[] enWords = rule.getEnglish();
for(int c=0; c<enWords.length; c++){
int c_id = enWords[c];
if(symbolTbl.isNonterminal(c_id)==true){
int index=symbolTbl.getTargetNonterminalIndex(c_id);
HGNode antNode = antNodes.get(index);
NgramDPState state = (NgramDPState) antNode.getDPState(this.ngramStateID);
List<Integer> l_context = state.getLeftLMStateWords();
List<Integer> r_context = state.getRightLMStateWords();
if(contextWord!=null){
String bigram = null;
if(this.useIntegerString)
bigram = contextWord + " " + l_context.get(0);
else
bigram = symbolTbl.getWord(contextWord) + " " + symbolTbl.getWord(l_context.get(0));
DiscriminativeSupport.increaseCount(edgeBigrams, bigram,1);
}
if(r_context.size()>0)
contextWord = r_context.get(r_context.size()-1);
else
contextWord = l_context.get(l_context.size()-1);
afterNonterminal = true;
}else{
if(afterNonterminal==true){
afterNonterminal=false;
String bigram = null;
if(this.useIntegerString)
bigram = contextWord + " " + c_id;
else
bigram = symbolTbl.getWord(contextWord) + " " + symbolTbl.getWord(c_id);
DiscriminativeSupport.increaseCount(edgeBigrams, bigram,1);
}
contextWord = c_id;
}
}
return edgeBigrams;
}
public void estimateFeatureCounts(Rule rule, HashMap<String, Double> featureTbl, HashSet<String> restrictedFeatureSet, double scale) {
// TODO Auto-generated method stub
}
}