protected void merge(int c1, int c2) {
int newid = lastid++;
heads.put(c1, newid);
heads.put(c2, newid);
TIntFloatHashMap newpcc = new TIntFloatHashMap();
TIntFloatHashMap inewpcc = new TIntFloatHashMap();
TIntFloatHashMap newwcc = new TIntFloatHashMap();
float pc1 = wordProb.get(c1);
float pc2 = wordProb.get(c2);
//新类的概率
float pc = pc1+pc2;
float w;
{
float pcc1 = getProb(c1,c1);
float pcc2 = getProb(c2,c2);
float pcc3 = getProb(c1,c2);
float pcc4 = getProb(c2,c1);
float pcc = pcc1 + pcc2 + pcc3 + pcc4;
if(pcc!=0.0f)
newpcc.put(newid, pcc);
w = clacW(pcc,pc,pc);
if(w!=0.0f)
newwcc.put(newid, w);
}
TIntIterator it = slots.iterator();
while(it.hasNext()){
int k = it.next();
float pck = wordProb.get(k);
if (c1==k||c2==k) {
continue;
} else {
float pcc1 = getProb(c1,k);
float pcc2 = getProb(c2,k);
float pcc12 = pcc1 + pcc2;
if(pcc12!=0.0f)
newpcc.put(newid, pcc12);
float p1 = clacW(pcc12,pc,pck);
float pcc3 = getProb(k,c1);
float pcc4 = getProb(k,c2);
float pcc34 = pcc3 + pcc4;
if(pcc34!=0.0f)
inewpcc.put(k, pcc34);
float p2 = clacW(pcc34,pck,pc);
w = p1 + p2;
if(w!=0.0f)
newwcc.put(newid, w);
}
}
//更新slots
slots.remove(c1);
slots.remove(c2);
slots.add(newid);
pcc.put(newid, newpcc);
pcc.remove(c1);
pcc.remove(c2);
TIntFloatIterator it2 = inewpcc.iterator();
while(it2.hasNext()){
it2.advance();
TIntFloatHashMap pmap = pcc.get(it2.key());
// if(pmap==null){
// pmap = new TIntFloatHashMap();
// pcc.put(it2.key(), pmap);
// }
pmap.put(newid, it2.value());
pmap.remove(c1);
pmap.remove(c2);
}
//
//newid 永远大于 it3.key;
wcc.put(newid, new TIntFloatHashMap());
wcc.remove(c1);
wcc.remove(c2);
TIntFloatIterator it3 = newwcc.iterator();
while(it3.hasNext()){
it3.advance();
TIntFloatHashMap pmap = wcc.get(it3.key());
pmap.put(newid, it3.value());
pmap.remove(c1);
pmap.remove(c2);
}
wordProb.remove(c1);
wordProb.remove(c2);
wordProb.put(newid, pc);