Package org.fnlp.ml.types.alphabet

Examples of org.fnlp.ml.types.alphabet.StringFeatureAlphabet


   * 统计信息,计算删除非0特征后,权重的长度
   *
   * @throws IOException
   */
  public void removeZero(Linear cl) {
    StringFeatureAlphabet feature = (StringFeatureAlphabet) cl.getAlphabetFactory().DefaultFeatureAlphabet();
    float[] weights = cl.getWeights();   
    int c = MyArrays.countNoneZero(weights);
    System.out.println("\n优化前")
    System.out.println("字典索引个数"+feature.keysize());
    System.out.println("字典大小"+cl.getAlphabetFactory().DefaultFeatureAlphabet().size());
    System.out.println("权重长度"+weights.length);
    System.out.println("非零权重"+c)
    boolean freeze = false;
    if (feature.isStopIncrement()) {
      feature.setStopIncrement(false);
      freeze = true;
    }


    TIntObjectHashMap<String> index = new TIntObjectHashMap<String>();
    TObjectIntIterator<String> it = feature.iterator();
    while (it.hasNext()) {
      it.advance();
      String value = it.key();
      int key = it.value();
      index.put(key, value);
    }
    int[] idx = index.keys();
    Arrays.sort(idx);
    int length = weights.length;
    IFeatureAlphabet newfeat = new StringFeatureAlphabet();
    cl.getAlphabetFactory().setDefaultFeatureAlphabet(newfeat);
    TFloatArrayList ww = new TFloatArrayList();
    float[] vars = new float[idx.length];
    float[] entropy = new float[idx.length];
    for (int i = 0; i < idx.length; i++) {
      int base = idx[i]; //一个特征段起始位置
      int end; //一个特征段结束位置
      if (i < idx.length - 1)
        end = idx[i + 1]; //对应下一个特征段起始位置
      else
        end  = length; //或者整个结束位置
      int interv = end - base;   //一个特征段长度
      float[] sw = new float[interv];
      for (int j = 0; j < interv; j++) {
        sw[j] = weights[base+j];
      }
      //计算方差
//      System.out.println(MyStrings.toString(sw, " "));
      vars[i] = MyArrays.viarance(sw);
      MyArrays.normalize(sw);
      MyArrays.normalize2Prop(sw);
      entropy[i] = MyArrays.entropy(sw);
      int[] maxe = new int[sw.length];
      for(int iii=0;iii<maxe.length;iii++){
        maxe[iii]=1;
      }
      float maxen = MyArrays.entropy(maxe);
      if (i==0||vars[i]>varsthresh&&entropy[i]<maxen*0.999) {
        String str = index.get(base);
        int id = newfeat.lookupIndex(str, interv);
        for (int j = 0; j < interv; j++) {
          ww.insert(id + j, weights[base + j]);
        }
      }else{
//                System.out.print("."); 
      }
    }
    System.out.println("方差均值:"+MyArrays.average(vars));
    System.out.println("方差非零个数:"+MyArrays.countNoneZero(vars));
    System.out.println("方差直方图:"+MyStrings.toString(MyArrays.histogram(vars, 10)));
//    MyArrays.normalize2Prop(entropy);
    System.out.println("熵均值:"+MyArrays.average(entropy));
    System.out.println("熵非零个数:"+MyArrays.countNoneZero(entropy));
    System.out.println("熵直方图:"+MyStrings.toString(MyArrays.histogram(entropy, 10)));
   
    newfeat.setStopIncrement(freeze);
    cl.setWeights(ww.toArray());

    float[] www = cl.getWeights();
    c = MyArrays.countNoneZero(www);

View Full Code Here



  public void feature(){
    features = new TIntHashSet[docs.size()];

    StringFeatureAlphabet fa = new StringFeatureAlphabet();

    for(int i=0;i<docs.size();i++){
      Set<String> set = FingerPrint.featureset(docs.get(i).content,type);
      features[i] = new TIntHashSet(set.size());
      Iterator<String> it = set.iterator();
      while(it.hasNext()){
        String str = it.next();       
        int idx = fa.lookupIndex(str);
        features[i].add(idx);
      }     
    }
    group();
  }
View Full Code Here

      nweights[i] = models[i].getWeights();
      ww[i] = new TFloatArrayList();
    }
    int length = nweights[0].length;

    StringFeatureAlphabet features = (StringFeatureAlphabet) factory.DefaultFeatureAlphabet(Type.String);
    TIntObjectHashMap<String> index = new TIntObjectHashMap<String>();
    TObjectIntIterator<String> it = features.iterator();
    while (it.hasNext()) {
      it.advance();
      String value = it.key();
      int key = it.value();
      index.put(key, value);
View Full Code Here

    }
    TIntObjectHashMap<String> index = (TIntObjectHashMap<String>) feature.toInverseIndexMap();
   
    System.out.println("原字典大小"+index.size());
    System.out.println("原字典大小"+feature.size());
    StringFeatureAlphabet newfeat = new StringFeatureAlphabet();
    cl.factory.setDefaultFeatureAlphabet(newfeat);
    for(int i=0;i<weights.length;i++){
        TIntFloatIterator itt = weights[i].data.iterator();
        HashSparseVector ww = new HashSparseVector();
        while(itt.hasNext()){
          itt.advance();
          float v = itt.value();
          if(Math.abs(v)<1e-3f)
            continue;
          String fea = index.get(itt.key());
          int newidx = newfeat.lookupIndex(fea);
          ww.put(newidx, v);       
      }
      weights[i] = ww; 
    }
   
    newfeat.setStopIncrement(freeze);
    System.out.println("新字典大小"+newfeat.size());   
    System.out.println("新字典大小"+feature.size());   
    index.clear();   
  }
View Full Code Here

TOP

Related Classes of org.fnlp.ml.types.alphabet.StringFeatureAlphabet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.