Package org.fnlp.train.pos

Source Code of org.fnlp.train.pos.POSTrain

/**
*  This file is part of FNLP (formerly FudanNLP).
*  FNLP is free software: you can redistribute it and/or modify
*  it under the terms of the GNU Lesser General Public License as published by
*  the Free Software Foundation, either version 3 of the License, or
*  (at your option) any later version.
*  FNLP is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU Lesser General Public License for more details.
*  You should have received a copy of the GNU General Public License
*  along with FudanNLP.  If not, see <http://www.gnu.org/licenses/>.
*  Copyright 2009-2014 www.fnlp.org. All rights reserved.
*/

package org.fnlp.train.pos;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.fnlp.data.reader.SequenceReader;
import org.fnlp.ml.classifier.linear.Linear;
import org.fnlp.ml.classifier.linear.OnlineTrainer;
import org.fnlp.ml.classifier.linear.inf.Inferencer;
import org.fnlp.ml.classifier.linear.update.Update;
import org.fnlp.ml.classifier.struct.inf.HigherOrderViterbi;
import org.fnlp.ml.classifier.struct.inf.LinearViterbi;
import org.fnlp.ml.classifier.struct.update.HigherOrderViterbiPAUpdate;
import org.fnlp.ml.classifier.struct.update.LinearViterbiPAUpdate;
import org.fnlp.ml.loss.struct.HammingLoss;
import org.fnlp.ml.types.InstanceSet;
import org.fnlp.ml.types.alphabet.AlphabetFactory;
import org.fnlp.ml.types.alphabet.HashFeatureAlphabet;
import org.fnlp.ml.types.alphabet.IFeatureAlphabet;
import org.fnlp.ml.types.alphabet.LabelAlphabet;
import org.fnlp.ml.types.alphabet.AlphabetFactory.Type;
import org.fnlp.nlp.pipe.Pipe;
import org.fnlp.nlp.pipe.SeriesPipes;
import org.fnlp.nlp.pipe.Target2Label;
import org.fnlp.nlp.pipe.seq.Sequence2FeatureSequence;
import org.fnlp.nlp.pipe.seq.templet.BaseTemplet;
import org.fnlp.nlp.pipe.seq.templet.CharClassTemplet;
import org.fnlp.nlp.pipe.seq.templet.CharInStringTemplet;
import org.fnlp.nlp.pipe.seq.templet.StringTypeTemplet;
import org.fnlp.nlp.pipe.seq.templet.Templet;
import org.fnlp.nlp.pipe.seq.templet.TempletGroup;
import org.fnlp.nlp.tag.AbstractTagger;
import org.fnlp.ontology.CharClassDictionary;

/**
* 序列标注器训练和测试程序
*
* @author xpqiu
*
*/
public class POSTrain extends AbstractTagger {

 
  public Pipe createProcessor() throws Exception {

    templets = new TempletGroup();
    templets.load(templateFile);
    for(Templet templet:templets){
      ((BaseTemplet) templet).minLen = 5;
    }
    // Dictionary d = new Dictionary();
    // d.loadWithWeigth("D:/xpqiu/项目/自选/CLP2010/CWS/av-b-lut.txt",
    // "AV");
    // templets.add(new DictionaryTemplet(d, gid++, -1, 0));
    // templets.add(new DictionaryTemplet(d, gid++, 0, 1));
    // templets.add(new DictionaryTemplet(d, gid++, -1,0, 1));
    // templets.add(new DictionaryTemplet(d, gid++, -2,-1,0, 1));

    templets.add(new CharInStringTemplet(templets.gid++,-1,1));
    templets.add(new CharInStringTemplet(templets.gid++,0,1));
    templets.add(new CharInStringTemplet(templets.gid++,-2,2));
    templets.add(new StringTypeTemplet(templets.gid++));

    CharClassDictionary dsurname = new CharClassDictionary();
    dsurname.load("../data/knowledge/百家姓.txt", "姓");
    templets.add(new CharClassTemplet(templets.gid++, new CharClassDictionary[]{dsurname}));

    if (cl != null)
      factory = cl.getAlphabetFactory();
    else
      factory = AlphabetFactory.buildFactory();
   
    //TODO: 修改字典类型
    AlphabetFactory.defaultFeatureType = Type.String;
    /**
     * 标签转为0、1、2、...
     */
    LabelAlphabet labels = factory.DefaultLabelAlphabet();

    // 将样本通过Pipe抽取特征
    IFeatureAlphabet features = factory.DefaultFeatureAlphabet();
    featurePipe = new Sequence2FeatureSequence(templets, features, labels);

    Pipe pipe = new SeriesPipes(new Pipe[] { new Target2Label(labels), featurePipe });
    return pipe;
  }
}
TOP

Related Classes of org.fnlp.train.pos.POSTrain

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.