Package org.fnlp.ml.types

Examples of org.fnlp.ml.types.InstanceSet


    long starttime = System.currentTimeMillis();
    // 将样本通过Pipe抽取特征
    Pipe pipe = createProcessor();

    // 测试集
    testSet = new InstanceSet(pipe);

    testSet.loadThruStagePipes(new SequenceReader(testfile,hasLabel,"utf8"));
    System.out.println("测试样本个数:\t" + testSet.size()); // 样本个数

    long featuretime = System.currentTimeMillis();
View Full Code Here


    long beginTime = System.currentTimeMillis();

    Pipe pipe = createProcessor();

   
    trainSet = new InstanceSet(pipe, factory);
     labels = factory.DefaultLabelAlphabet();
    features = factory.DefaultFeatureAlphabet();
    features.setStopIncrement(false);
    labels.setStopIncrement(false);
View Full Code Here

      if (!hasTarget) {// 如果test data没有标注
        pipe = featurePipe;
      }

      // 测试集
      testSet = new InstanceSet(pipe);
      testSet.loadThruStagePipes(new SequenceReader(testfile, hasTarget, "utf8"));
      System.out.println("Test Number: " + testSet.size()); // 样本个数
    }
  }
View Full Code Here

    //建立pipe组合
    SeriesPipes pp = new SeriesPipes(new Pipe[]{ngrampp,targetpp,indexpp});
   
    SeriesPipes  pp2 = new SeriesPipes(new Pipe[]{prePipe, ngrampp,targetpp,indexpp});
   
    InstanceSet instset = new InstanceSet(pp2,af);



    //读入数据,并进行数据处理
    instset.loadThruStagePipes(reader);

    /**
     * 建立分类器
     */   
    OnlineTrainer trainer = new OnlineTrainer(af,100);
View Full Code Here

      ARClassifier tc = new ARClassifier();
      tc.train();
      Linear cl =Linear.loadFrom(modelFile);
      int i = 0;int j = 0;double ij = 0.0;int kk = 0;int jj = 0;int nn = 0;int n = 0;
      InstanceSet test = new InstanceSet(cl.getPipe(),cl.getAlphabetFactory());
      SimpleFileReader sfr = new SimpleFileReader("../tmp/ar-train.txt",true);
     
      ArrayList<Instance> list1 = new ArrayList<Instance>();
      while (sfr.hasNext())
      {
        list1.add(sfr.next());
      }
      List<String>[] str1 = new List[list1.size()];
      String[] str2 = new String[list1.size()];
      Iterator it = list1.iterator();
      while(it.hasNext()){
        Instance in = (Instance) it.next();
        str1[i] = (List<String>) in.getData();
        str2[i] = (String) in.getTarget();
        i++;
      }
      for(int k = 0;k<str2.length;k++)
      {
        if(str2[k].equals("1"))
          kk++;
      }
      String ss =null;
        test.loadThruPipes(new ListReader(str1));
       
        for(int ii=0;ii<str1.length;ii++){
          ss = cl.getStringLabel(test.getInstance(ii));
          if(ss.equals("1"))
            j++;
       
          if(ss.equals("1")&&ss.equals(str2[ii]))
            jj++;
View Full Code Here

      //构造转换器组
      SeriesPipes pipe = new SeriesPipes(new Pipe[]{lpipe,fpipe});



      InstanceSet instset = new InstanceSet(pipe,factory);
      instset.loadThruStagePipes(new SimpleFileReader(trainFile," ",true,Type.LabelData));
      Generator gen = new SFGenerator();
      ZeroOneLoss l = new ZeroOneLoss();
      Inferencer ms = new LinearMax(gen, factory.getLabelSize());
      Update update = new LinearMaxPAUpdate(l);
      OnlineTrainer trainer = new OnlineTrainer(ms, update,l, factory.getFeatureSize(), 50,0.005f);
View Full Code Here

    Pipe targetpp = new Target2Label(af.DefaultLabelAlphabet())
    //建立pipe组合
    SeriesPipes pp = new SeriesPipes(new Pipe[]{removepp,segpp,s2spp,targetpp,sparsepp});

    System.out.print("\nReading data......\n");
    InstanceSet instset = new InstanceSet(pp,af)
    Reader reader = new MyDocumentReader(trainDataPath,"gbk");
    instset.loadThruStagePipes(reader);
    System.out.print("..Reading data complete\n");
   
    //将数据集分为训练是和测试集
    System.out.print("Sspliting....");
    float percent = 0.8f;
    InstanceSet[] splitsets = instset.split(percent);
   
    InstanceSet trainset = splitsets[0];
    InstanceSet testset = splitsets[1]
    System.out.print("..Spliting complete!\n");

    System.out.print("Training...\n");
    af.setStopIncrement(true);
    BayesTrainer trainer=new BayesTrainer();
    BayesClassifier classifier= (BayesClassifier) trainer.train(trainset);
    System.out.print("..Training complete!\n");
    System.out.print("Saving model...\n");
    classifier.saveTo(bayesModelFile)
    classifier = null;
    System.out.print("..Saving model complete!\n");
    /**
     * 测试
     */
    System.out.print("Loading model...\n");
    BayesClassifier bayes;
    bayes =BayesClassifier.loadFrom(bayesModelFile);
    System.out.print("..Loading model complete!\n");
   
    System.out.println("Testing Bayes...");
    int flag=0;
    float[] percents_cs=new float[]{1.0f,0.9f,0.8f,0.7f,0.5f,0.3f,0.2f,0.1f};
    int[] counts_cs=new int[10];
    for(int test=0;test<percents_cs.length;test++){
      System.out.println("Testing Bayes"+percents_cs[test]+"...");
      if(test!=0)
        bayes.fS_CS(percents_cs[test]);
      int count=0;
      for(int i=0;i<testset.size();i++){
        Instance data = testset.getInstance(i);
        Integer gold = (Integer) data.getTarget();
        Predict<String> pres=bayes.classify(data, Type.STRING, 3);
        String pred_label=pres.getLabel();
        String gold_label = bayes.getLabel(gold);
       
        if(pred_label.equals(gold_label)){
          count++;
        }
        else{
          flag=i;
//          System.err.println(gold_label+"->"+pred_label+" : "+testset.getInstance(i).getTempData());
//          for(int j=0;j<3;j++)
//            System.out.println(pres.getLabel(j)+":"+pres.getScore(j));
        }
      }
      counts_cs[test]=count;
      System.out.println("Bayes Precision("+percents_cs[test]+"):"
      +((float)count/testset.size())+"("+count+"/"+testset.size()+")");
    }
    bayes.noFeatureSelection();
    float[] percents_csmax=new float[]{1.0f,0.9f,0.8f,0.7f,0.5f,0.3f,0.2f,0.1f};
    int[] counts_csmax=new int[10];
    for(int test=0;test<percents_csmax.length;test++){
      System.out.println("Testing Bayes"+percents_csmax[test]+"...");
      if(test!=0)
        bayes.fS_CS_Max(percents_csmax[test]);
      int count=0;
      for(int i=0;i<testset.size();i++){
        Instance data = testset.getInstance(i);
        Integer gold = (Integer) data.getTarget();
        Predict<String> pres=bayes.classify(data, Type.STRING, 3);
        String pred_label=pres.getLabel();
        String gold_label = bayes.getLabel(gold);
       
        if(pred_label.equals(gold_label)){
          count++;
        }
        else{
//          System.err.println(gold_label+"->"+pred_label+" : "+testset.getInstance(i).getTempData());
//          for(int j=0;j<3;j++)
//            System.out.println(pres.getLabel(j)+":"+pres.getScore(j));
        }
      }
      counts_csmax[test]=count;
      System.out.println("Bayes Precision("+percents_csmax[test]+"):"
      +((float)count/testset.size())+"("+count+"/"+testset.size()+")");
    }
    bayes.noFeatureSelection();
    float[] percents_ig=new float[]{1.0f,0.9f,0.8f,0.7f,0.5f,0.3f,0.2f,0.1f};
    int[] counts_ig=new int[10];
    for(int test=0;test<percents_ig.length;test++){
      System.out.println("Testing Bayes"+percents_ig[test]+"...");
      if(test!=0)
        bayes.fS_IG(percents_ig[test]);
      int count=0;
      for(int i=0;i<testset.size();i++){
        Instance data = testset.getInstance(i);
        Integer gold = (Integer) data.getTarget();
        Predict<String> pres=bayes.classify(data, Type.STRING, 3);
        String pred_label=pres.getLabel();
        String gold_label = bayes.getLabel(gold);
       
        if(pred_label.equals(gold_label)){
          count++;
        }
        else{
//          System.err.println(gold_label+"->"+pred_label+" : "+testset.getInstance(i).getTempData());
//          for(int j=0;j<3;j++)
//            System.out.println(pres.getLabel(j)+":"+pres.getScore(j));
        }
      }
      counts_ig[test]=count;
      System.out.println("Bayes Precision("+percents_csmax[test]+"):"
      +((float)count/testset.size())+"("+count+"/"+testset.size()+")");
    }
   
    System.out.println("..Testing Bayes complete!");
    for(int i=0;i<percents_cs.length;i++)
      System.out.println("Bayes Precision CS("+percents_cs[i]+"):"
    +((float)counts_cs[i]/testset.size())+"("+counts_cs[i]+"/"+testset.size()+")");
   
    for(int i=0;i<percents_csmax.length;i++)
      System.out.println("Bayes Precision CS_Max("+percents_csmax[i]+"):"
    +((float)counts_csmax[i]/testset.size())+"("+counts_csmax[i]+"/"+testset.size()+")");
   
    for(int i=0;i<percents_ig.length;i++)
      System.out.println("Bayes Precision IG("+percents_ig[i]+"):"
    +((float)counts_ig[i]/testset.size())+"("+counts_ig[i]+"/"+testset.size()+")");

  }
View Full Code Here

  private void init(String[][][]stringTag,String str) throws Exception{
    ts = new TreeSet<Entity>();
    llis = new LinkedList<Instance>();
    fc = new FormChanger();
    test = new InstanceSet(cl.getPipe());
    test.loadThruPipes(new AR_Reader(stringTag,str));
    for(int i=0;i<test.size();i++){
      String ss = cl.getStringLabel(test.getInstance(i));
      if(ss.equals("1")){
        llis.add(test.getInstance(i));
View Full Code Here

    Pipe fpipe = new StringArray2SV(factory, true);
    //构造转换器组
    Pipe pipe = new SeriesPipes(new Pipe[]{lpipe,fpipe});
   
    //构建训练集
    train = new InstanceSet(pipe, factory);
    SimpleFileReader reader = new SimpleFileReader (path,true);
    train.loadThruStagePipes(reader);
    al.setStopIncrement(true);
   
    //构建测试集
    test = new InstanceSet(pipe, factory);   
    reader = new SimpleFileReader (path,true);
    test.loadThruStagePipes(reader)

    System.out.println("Train Number: " + train.size());
    System.out.println("Test Number: " + test.size());
View Full Code Here

    Pipe pipe = new SeriesPipes(new Pipe[] { new Target2Label(labels), featurePipe });


    System.out.print("读入训练数据 ...");
    InstanceSet trainSet = new InstanceSet(pipe, factory);

    // 训练集
    trainSet.loadThruStagePipes(new SequenceReader(train, true, "utf8"));
    System.out.println("训练样本个数 " + trainSet.size());
    System.out.println("标签个数: " + labels.size()); //
    System.out.println("特征个数" + features.size());

    // 冻结特征集
    features.setStopIncrement(true);
    labels.setStopIncrement(true);


    // viterbi解码
    HammingLoss loss = new HammingLoss();
    Inferencer inference = new LinearViterbi(templets, labels.size());
    Update update = new LinearViterbiPAUpdate((LinearViterbi) inference, loss);


    OnlineTrainer trainer = new OnlineTrainer(inference, update, loss,
        features.size(), 50,0.1f);

    Linear cl = trainer.train(trainSet);


    // test data没有标注
    Pipe tpipe = featurePipe;
    // 测试集
    InstanceSet testSet = new InstanceSet(tpipe);

    testSet.loadThruPipes(new SequenceReader(testfile, false, "utf8"));
    System.out.println("测试样本个数: " + testSet.size()); //
    String[][] labelsSet = new String[testSet.size()][];
    for (int i = 0; i < testSet.size(); i++) {
      Instance carrier = testSet.get(i);
      int[] pred = (int[]) cl.classify(carrier).getLabel(0);
      labelsSet[i] = labels.lookupString(pred);
    }
   
    String s = SimpleFormatter.format(testSet, labelsSet);
View Full Code Here

TOP

Related Classes of org.fnlp.ml.types.InstanceSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.