package joshua.discriminative.semiring_parsing;
import joshua.corpus.vocab.BuildinSymbol;
import joshua.corpus.vocab.SymbolTable;
import joshua.decoder.hypergraph.DiskHyperGraph;
import joshua.decoder.hypergraph.HGNode;
import joshua.decoder.hypergraph.HyperEdge;
import joshua.decoder.hypergraph.HyperGraph;
public class EntropyOnHG extends DefaultSemiringParser {
public EntropyOnHG(int semiring, int add_mode, double scale) {
super(semiring, add_mode, scale);
// TODO Auto-generated constructor stub
}
protected ExpectationSemiring createNewSemiringMember() {
return new ExpectationSemiring();
}
protected ExpectationSemiring getHyperedgeSemiringWeight(HyperEdge dt, HGNode parent_item, double scale, AtomicSemiring p_atomic_semiring){
ExpectationSemiring res = null;
if(p_atomic_semiring.ATOMIC_SEMIRING==AtomicSemiring.LOG_SEMIRING){
double logProb = scale * dt.getTransitionLogP(false);
double val = scale * dt.getTransitionLogP(false);//s(x,y); to compute E(s(x,y)); s(x,y) is the linear combintation (considered scaling factor)
//double factor1 = Math.exp(prob)*val; //real semiring
SignedValue factor1 = SignedValue.multi(
logProb,
SignedValue.createSignedValue(val)
);
res = new ExpectationSemiring(logProb, factor1);
}else{
System.out.println("un-implemented atomic-semiring");
System.exit(1);
}
return res;
}
// #######################################################################
public static void main(String[] args) {
if(args.length>4){
System.out.println("Wrong number of parameters, it must have at least four parameters: java NbestMinRiskAnnealer use_shortest_ref f_config gain_factor f_dev_src f_nbest_prefix f_dev_ref1 f_dev_ref2....");
System.exit(1);
}
String f_dev_hg_prefix=args[0].trim();
String f_dev_items = f_dev_hg_prefix +".items";
String f_dev_rules = f_dev_hg_prefix +".rules";
double scale = 1;
if(args.length>=2)
scale = new Double(args[1].trim());
int num_sents =5;
if(args.length>=3)
num_sents = new Integer(args[2].trim());
int numSrcWords =1;
if(args.length>=4)
numSrcWords = new Integer(args[3].trim());
SymbolTable symbolTbl = new BuildinSymbol(null);
int ngramStateID =0;
double sumEntropy = 0;
DefaultSemiringParser ds = new EntropyOnHG(1,0,scale);
DiskHyperGraph diskHG = new DiskHyperGraph(symbolTbl, ngramStateID, true, null); //have model costs stored
diskHG.initRead(f_dev_items, f_dev_rules,null);
for(int sentID=0; sentID < num_sents; sentID ++){
System.out.println("#Process sentence " + sentID);
HyperGraph testHG = diskHG.readHyperGraph();
ds.insideEstimationOverHG(testHG);
ExpectationSemiring goalSemiring = (ExpectationSemiring) ds.getGoalSemiringMember(testHG);
//goal_semiring.printInfor();
goalSemiring.normalizeFactors();
goalSemiring.printInfor();
double entropy = goalSemiring.getLogProb() - goalSemiring.getFactor1().convertRealValue();//logZ-E(s)/Z
System.out.println("entropy is " + entropy);
sumEntropy += entropy;
}
System.out.println("scale=" + scale + "; num_sents=" + num_sents +"; numSrcWords="+numSrcWords);
//a nats has 1.44 bits
System.out.println("sum_entropy: " + scale + " " + 1.44*sumEntropy/numSrcWords);
}
}