Examples of AlphabetFactory


Examples of org.fnlp.ml.types.alphabet.AlphabetFactory

    BufferedReader rd = new BufferedReader(
          new InputStreamReader(new FileInputStream(from), "gbk"));
   
    ObjectOutputStream out = new ObjectOutputStream(new BufferedOutputStream (
        new GZIPOutputStream (new FileOutputStream(to))));
    AlphabetFactory factory = AlphabetFactory.buildFactory();
    LabelAlphabet labels = factory.DefaultLabelAlphabet();
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();;
    String s;
    rd.readLine()//version
    List lst = new ArrayList();      //template
    while(true) {
      s = rd.readLine();
View Full Code Here

Examples of org.fnlp.ml.types.alphabet.AlphabetFactory

  public void train(Reader reader, String modelFile) throws Exception {
    /**
     * 分类模型
     */
    //建立字典管理器
    AlphabetFactory af = AlphabetFactory.buildFactory();

    //使用n元特征
    Pipe ngrampp = new NGram(new int[] {1,2});
    //将字符特征转换成字典索引
    Pipe indexpp = new StringArray2IndexArray(af);
    //将目标值对应的索引号作为类别
    Pipe targetpp = new Target2Label(af.DefaultLabelAlphabet());   

    //建立pipe组合
    SeriesPipes pp = new SeriesPipes(new Pipe[]{ngrampp,targetpp,indexpp});
   
    SeriesPipes  pp2 = new SeriesPipes(new Pipe[]{prePipe, ngrampp,targetpp,indexpp});
   
    InstanceSet instset = new InstanceSet(pp2,af);



    //读入数据,并进行数据处理
    instset.loadThruStagePipes(reader);

    /**
     * 建立分类器
     */   
    OnlineTrainer trainer = new OnlineTrainer(af,100);
    trainer.c = 0.01f;
    pclassifier = trainer.train(instset);
    pp.removeTargetPipe();
    pclassifier.setPipe(pp);
    af.setStopIncrement(true);

    //将分类器保存到模型文件
    pclassifier.saveTo(modelFile)
  }
View Full Code Here

Examples of org.fnlp.ml.types.alphabet.AlphabetFactory

    Pipe s2spp=new Strings2StringArray();
    /**
     * Bayes
     */
    //建立字典管理器
    AlphabetFactory af = AlphabetFactory.buildFactory();
    //使用n元特征
    Pipe ngrampp = new NGram(new int[] {2,3});
    //将字符特征转换成字典索引; 
    Pipe sparsepp=new StringArray2SV(af);
    //将目标值对应的索引号作为类别
    Pipe targetpp = new Target2Label(af.DefaultLabelAlphabet())
    //建立pipe组合
    SeriesPipes pp = new SeriesPipes(new Pipe[]{removepp,segpp,s2spp,targetpp,sparsepp});

    System.out.print("\nReading data......\n");
    InstanceSet instset = new InstanceSet(pp,af)
    Reader reader = new MyDocumentReader(trainDataPath,"gbk");
    instset.loadThruStagePipes(reader);
    System.out.print("..Reading data complete\n");
   
    //将数据集分为训练是和测试集
    System.out.print("Sspliting....");
    float percent = 0.8f;
    InstanceSet[] splitsets = instset.split(percent);
   
    InstanceSet trainset = splitsets[0];
    InstanceSet testset = splitsets[1]
    System.out.print("..Spliting complete!\n");

    System.out.print("Training...\n");
    af.setStopIncrement(true);
    BayesTrainer trainer=new BayesTrainer();
    BayesClassifier classifier= (BayesClassifier) trainer.train(trainset);
    System.out.print("..Training complete!\n");
    System.out.print("Saving model...\n");
    classifier.saveTo(bayesModelFile)
View Full Code Here

Examples of org.fnlp.ml.types.alphabet.AlphabetFactory

*
*/
public class BayesTrainer{

  public AbstractClassifier train(InstanceSet trainset) {
    AlphabetFactory af=trainset.getAlphabetFactory();
    SeriesPipes pp=(SeriesPipes) trainset.getPipes();
    pp.removeTargetPipe();
    return train(trainset,af,pp);
  }
View Full Code Here

Examples of org.fnlp.ml.types.alphabet.AlphabetFactory

   
    String train = "./example-data/sequence/train.txt";
    String testfile = "./example-data/sequence/test.txt";
    String templateFile="./example-data/sequence/template";
    AlphabetFactory factory;
    Pipe featurePipe;
    TempletGroup templets;

    templets = new TempletGroup();
    templets.load(templateFile);
    factory = AlphabetFactory.buildFactory();

    /**
     * 标签字典。转为0、1、2、...
     */
    LabelAlphabet labels = factory.DefaultLabelAlphabet();
    /**
     * 特征字典
     */
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();
    // 将样本通过Pipe抽取特征
   
    featurePipe = new Sequence2FeatureSequence(templets, features, labels);

    Pipe pipe = new SeriesPipes(new Pipe[] { new Target2Label(labels), featurePipe });
View Full Code Here

Examples of org.fnlp.ml.types.alphabet.AlphabetFactory

    CWSTagger tag = new CWSTagger("../models/seg.m");
    Pipe segpp=new CNPipe(tag);
    Pipe s2spp=new Strings2StringArray();
   
    //建立字典管理器
    AlphabetFactory af = AlphabetFactory.buildFactory();
    //使用n元特征
    Pipe ngrampp = new NGram(new int[] {2,3});
    //将字符特征转换成字典索引; 
    Pipe sparsepp=new StringArray2SV(af);
    //将目标值对应的索引号作为类别
    Pipe targetpp = new Target2Label(af.DefaultLabelAlphabet())
    //建立pipe组合
    SeriesPipes pp = new SeriesPipes(new Pipe[]{removepp,segpp,s2spp,targetpp,sparsepp});

    /**
     * Knn
     */
    System.out.print("\nKnn\n");
    System.out.print("\nReading data......\n");
    long time_mark=System.currentTimeMillis();
    InstanceSet instset = new InstanceSet(pp,af)
    Reader reader = new MyDocumentReader(trainDataPath,"gbk");
    instset.loadThruStagePipes(reader);
    System.out.print("..Reading data complete "+(System.currentTimeMillis()-time_mark)+"(ms)\n");
   
    //将数据集分为训练是和测试集
    System.out.print("Sspliting....");
    float percent = 0.9f;
    InstanceSet[] splitsets = instset.split(percent);
   
    InstanceSet trainset = splitsets[0];
    InstanceSet testset = splitsets[1]
    System.out.print("..Spliting complete!\n");
   
    System.out.print("Training Knn...\n");
    time_mark=System.currentTimeMillis();
    SparseVectorSimilarity sim=new SparseVectorSimilarity();
    pp.removeTargetPipe();
    KNNClassifier knn=new KNNClassifier(trainset, pp, sim, af, 9)
    af.setStopIncrement(true)
   
    ItemFrequency tf=new ItemFrequency(trainset);
    FeatureSelect fs=new FeatureSelect(tf.getFeatureSize());
    long time_train=System.currentTimeMillis()-time_mark;
   
View Full Code Here

Examples of org.fnlp.ml.types.alphabet.AlphabetFactory

  public static void main(String[] args) throws Exception {

   
    //建立字典管理器
    AlphabetFactory af = AlphabetFactory.buildFactory();
   
    //使用n元特征
    Pipe ngrampp = new NGram(new int[] {2,3 });
    //将字符特征转换成字典索引
    Pipe indexpp = new StringArray2IndexArray(af);
    //将目标值对应的索引号作为类别
    Pipe targetpp = new Target2Label(af.DefaultLabelAlphabet());   
   
    //建立pipe组合
    SeriesPipes pp = new SeriesPipes(new Pipe[]{ngrampp,targetpp,indexpp});
   
    InstanceSet instset = new InstanceSet(pp,af);
   
    //用不同的Reader读取相应格式的文件
    Reader reader = new FileReader(trainDataPath,"UTF-8",".data");
   
    //读入数据,并进行数据处理
    instset.loadThruStagePipes(reader);
       
    float percent = 0.8f;
   
    //将数据集分为训练是和测试集
    InstanceSet[] splitsets = instset.split(percent);
   
    InstanceSet trainset = splitsets[0];
    InstanceSet testset = splitsets[1]
   
    /**
     * 建立分类器
     */   
    OnlineTrainer trainer = new OnlineTrainer(af);
    Linear pclassifier = trainer.train(trainset);
    pp.removeTargetPipe();
    pclassifier.setPipe(pp);
    af.setStopIncrement(true);
   
    //将分类器保存到模型文件
    pclassifier.saveTo(modelFile)
    pclassifier = null;
   
View Full Code Here

Examples of org.fnlp.ml.types.alphabet.AlphabetFactory

  public static void main(String[] args) throws Exception {
    /**
     * Bayes
     */
    //建立字典管理器
    AlphabetFactory af = AlphabetFactory.buildFactory();
    //使用n元特征
    Pipe ngrampp = new NGram(new int[] {2,3});
    //将字符特征转换成字典索引; 
    Pipe sparsepp=new StringArray2SV(af);
    //将目标值对应的索引号作为类别
    Pipe targetpp = new Target2Label(af.DefaultLabelAlphabet())
    //建立pipe组合
    SeriesPipes pp = new SeriesPipes(new Pipe[]{ngrampp,targetpp,sparsepp});

    System.out.print("\nReading data......\n");
    InstanceSet instset = new InstanceSet(pp,af)
    Reader reader = new MyDocumentReader(trainDataPath,"gbk");
    instset.loadThruStagePipes(reader);
    System.out.print("..Reading data complete\n");
   
    //将数据集分为训练是和测试集
    System.out.print("Sspliting....");
    float percent = 0.9f;
    InstanceSet[] splitsets = instset.split(percent);
   
    InstanceSet trainset = splitsets[0];
    InstanceSet testset = splitsets[1]
    System.out.print("..Spliting complete!\n");

    System.out.print("Training...\n");
    BayesTrainer trainer=new BayesTrainer();
    BayesClassifier classifier= (BayesClassifier) trainer.train(trainset);
    pp.removeTargetPipe();
    classifier.setPipe(pp);
    af.setStopIncrement(true);
    System.out.print("..Training complete!\n");
    System.out.print("Saving model...\n");
    classifier.saveTo(bayesModelFile)
    classifier = null;
    System.out.print("..Saving model complete!\n");
    /**
     * 测试
     */
    System.out.print("Loading model...\n");
    BayesClassifier bayes;
    bayes =BayesClassifier.loadFrom(bayesModelFile);
//    bayes =classifier;
    System.out.print("..Loading model complete!\n");
   
    System.out.println("Testing Bayes...");
    int count=0;
    for(int i=0;i<testset.size();i++){
      Instance data = testset.getInstance(i);
      Integer gold = (Integer) data.getTarget();
      Predict<String> pres=bayes.classify(data, Type.STRING, 3);
      String pred_label=pres.getLabel();
//      String pred_label = bayes.getStringLabel(data);
      String gold_label = bayes.getLabel(gold);
     
      if(pred_label.equals(gold_label)){
        //System.out.println(pred_label+" : "+testsetbayes.getInstance(i).getTempData());
        count++;
      }
      else{
//        System.err.println(gold_label+"->"+pred_label+" : "+testset.getInstance(i).getTempData());
//        for(int j=0;j<3;j++)
//          System.out.println(pres.getLabel(j)+":"+pres.getScore(j));
      }
    }
    int bayesCount=count;
    System.out.println("..Testing Bayes complete!");
    System.out.println("Bayes Precision:"+((float)bayesCount/testset.size())+"("+bayesCount+"/"+testset.size()+")");


    /**
     * Knn
     */
    System.out.print("\nKnn\n");
    //建立字典管理器
    AlphabetFactory af2 = AlphabetFactory.buildFactory();
    //使用n元特征
    ngrampp = new NGram(new int[] {2,3});
    //将字符特征转换成字典索引; 
    sparsepp=new StringArray2SV(af2);
    //将目标值对应的索引号作为类别
    targetpp = new Target2Label(af2.DefaultLabelAlphabet())
    //建立pipe组合
    pp = new SeriesPipes(new Pipe[]{ngrampp,targetpp,sparsepp});

    System.out.print("Init dataset...");
    trainset.setAlphabetFactory(af2)
    trainset.setPipes(pp)
    testset.setAlphabetFactory(af2)
    testset.setPipes(pp);     
    for(int i=0;i<trainset.size();i++){
      Instance inst=trainset.get(i);
      inst.setData(inst.getSource());
      int target_id=Integer.parseInt(inst.getTarget().toString());
      inst.setTarget(af.DefaultLabelAlphabet().lookupString(target_id));
      pp.addThruPipe(inst);
    }   
    for(int i=0;i<testset.size();i++){
      Instance inst=testset.get(i);
      inst.setData(inst.getSource());
      int target_id=Integer.parseInt(inst.getTarget().toString());
      inst.setTarget(af.DefaultLabelAlphabet().lookupString(target_id));
      pp.addThruPipe(inst);
    }

    System.out.print("complete!\n");
    System.out.print("Training Knn...\n");
    SparseVectorSimilarity sim=new SparseVectorSimilarity();
    pp.removeTargetPipe();
    KNNClassifier knn=new KNNClassifier(trainset, pp, sim, af2, 7)
    af2.setStopIncrement(true)
    System.out.print("..Training compelte!\n");
    System.out.print("Saving model...\n");
    knn.saveTo(knnModelFile)
    knn = null;
    System.out.print("..Saving model compelte!\n");

   
    System.out.print("Loading model...\n");
    knn =KNNClassifier.loadFrom(knnModelFile);
    System.out.print("..Loading model compelte!\n");
    System.out.println("Testing Knn...\n");
    count=0;
    for(int i=0;i<testset.size();i++){
      Instance data = testset.getInstance(i);
      Integer gold = (Integer) data.getTarget();
      Predict<String> pres=(Predict<String>) knn.classify(data, Type.STRING, 3);
      String pred_label=pres.getLabel();
      String gold_label = knn.getLabel(gold);
     
      if(pred_label.equals(gold_label)){
        //System.out.println(pred_label+" : "+testsetknn.getInstance(i).getTempData());
        count++;
      }
      else{
//        System.err.println(gold_label+"->"+pred_label+" : "+testset.getInstance(i).getTempData());
//        for(int j=0;j<3;j++)
//          System.out.println(pres.getLabel(j)+":"+pres.getScore(j));
      }
    }
    int knnCount=count;
    System.out.println("..Testing Knn Complete");
    System.out.println("Bayes Precision:"+((float)bayesCount/testset.size())+"("+bayesCount+"/"+testset.size()+")");
    System.out.println("Knn Precision:"+((float)knnCount/testset.size())+"("+knnCount+"/"+testset.size()+")");
   
    //建立字典管理器
    AlphabetFactory af3 = AlphabetFactory.buildFactory();
    //使用n元特征
    ngrampp = new NGram(new int[] {2,3 });
    //将字符特征转换成字典索引
    Pipe indexpp = new StringArray2IndexArray(af3);
    //将目标值对应的索引号作为类别
    targetpp = new Target2Label(af3.DefaultLabelAlphabet());   
   
    //建立pipe组合
    pp = new SeriesPipes(new Pipe[]{ngrampp,targetpp,indexpp});
   
    trainset.setAlphabetFactory(af3)
View Full Code Here

Examples of org.fnlp.ml.types.alphabet.AlphabetFactory

  public static void main(String[] args) throws Exception {

   
    //建立字典管理器
    AlphabetFactory af = AlphabetFactory.buildFactory();
   
    //使用n元特征
    Pipe ngrampp = new NGram(new int[] {2,3 });
    //将字符特征转换成字典索引
    Pipe indexpp = new StringArray2IndexArray(af);
    //将目标值对应的索引号作为类别
    Pipe targetpp = new Target2Label(af.DefaultLabelAlphabet());   
   
    //建立pipe组合
    SeriesPipes pp = new SeriesPipes(new Pipe[]{ngrampp,targetpp,indexpp});
   
    InstanceSet trainset = new InstanceSet(pp,af);
    InstanceSet testset = new InstanceSet(pp,af);
   
    //用不同的Reader读取相应格式的文件
    Reader reader = new DocumentReader(trainDataPath);
   
    //读入数据,并进行数据处理
    trainset.loadThruStagePipes(reader);
   
    reader = new DocumentReader(testDataPath);
     
    testset.loadThruStagePipes(reader);
   
   
    /**
     * 建立分类器
     */   
    OnlineTrainer trainer = new OnlineTrainer(af);
    Linear pclassifier = trainer.train(trainset);
    pp.removeTargetPipe();
    pclassifier.setPipe(pp);
    af.setStopIncrement(true);
   
    //将分类器保存到模型文件
    pclassifier.saveTo(modelFile)
    pclassifier = null;
   
View Full Code Here

Examples of org.fnlp.ml.types.alphabet.AlphabetFactory

    }
    String res = bayes.getStringLabel(inst);
    System.out.println("xxx")
    System.out.println("类别:"+ res)
    //建立字典管理器
    AlphabetFactory af = AlphabetFactory.buildFactory();
   
    //使用n元特征
    Pipe ngrampp = new NGram(new int[] {1,2});
    //分词
//    CWSTagger tag = new CWSTagger("../models/seg.m");
//    Pipe segpp=new CNPipe(tag);
    //将字符特征转换成字典索引
    Pipe indexpp = new StringArray2IndexArray(af)
    Pipe sparsepp=new StringArray2SV(af);
    //将目标值对应的索引号作为类别
    Pipe targetpp = new Target2Label(af.DefaultLabelAlphabet())
    //建立pipe组合
    SeriesPipes pp = new SeriesPipes(new Pipe[]{ngrampp,targetpp,sparsepp});
   
    InstanceSet instset = new InstanceSet(pp,af);
   
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.