int[] idx = index.keys();
Arrays.sort(idx);
int length = weights.length;
IFeatureAlphabet newfeat = new StringFeatureAlphabet();
cl.getAlphabetFactory().setDefaultFeatureAlphabet(newfeat);
TFloatArrayList ww = new TFloatArrayList();
float[] vars = new float[idx.length];
float[] entropy = new float[idx.length];
for (int i = 0; i < idx.length; i++) {
int base = idx[i]; //一个特征段起始位置
int end; //一个特征段结束位置
if (i < idx.length - 1)
end = idx[i + 1]; //对应下一个特征段起始位置
else
end = length; //或者整个结束位置
int interv = end - base; //一个特征段长度
float[] sw = new float[interv];
for (int j = 0; j < interv; j++) {
sw[j] = weights[base+j];
}
//计算方差
// System.out.println(MyStrings.toString(sw, " "));
vars[i] = MyArrays.viarance(sw);
MyArrays.normalize(sw);
MyArrays.normalize2Prop(sw);
entropy[i] = MyArrays.entropy(sw);
int[] maxe = new int[sw.length];
for(int iii=0;iii<maxe.length;iii++){
maxe[iii]=1;
}
float maxen = MyArrays.entropy(maxe);
if (i==0||vars[i]>varsthresh&&entropy[i]<maxen*0.999) {
String str = index.get(base);
int id = newfeat.lookupIndex(str, interv);
for (int j = 0; j < interv; j++) {
ww.insert(id + j, weights[base + j]);
}
}else{
// System.out.print(".");
}
}
System.out.println("方差均值:"+MyArrays.average(vars));
System.out.println("方差非零个数:"+MyArrays.countNoneZero(vars));
System.out.println("方差直方图:"+MyStrings.toString(MyArrays.histogram(vars, 10)));
// MyArrays.normalize2Prop(entropy);
System.out.println("熵均值:"+MyArrays.average(entropy));
System.out.println("熵非零个数:"+MyArrays.countNoneZero(entropy));
System.out.println("熵直方图:"+MyStrings.toString(MyArrays.histogram(entropy, 10)));
newfeat.setStopIncrement(freeze);
cl.setWeights(ww.toArray());
float[] www = cl.getWeights();
c = MyArrays.countNoneZero(www);
System.out.println("\n优化后");
System.out.println("字典索引个数"+cl.getAlphabetFactory().DefaultFeatureAlphabet().keysize());
System.out.println("字典大小"+cl.getAlphabetFactory().DefaultFeatureAlphabet().size());
System.out.println("权重长度"+www.length);
System.out.println("非零权重"+c);
index.clear();
ww.clear();
}