//清理
MyFiles.delete(dictfile);
MyFiles.delete(segfile);
FNLPCorpus corpus = new FNLPCorpus();
//读自有数据
corpus.readOurCorpus(datapath + "/ourdata",".txt","UTF8");
//读分词文件
corpus.readCWS(datapath + "/FNLPDATA/seg",".txt","UTF8");
//读分词+词性文件
corpus.readPOS(datapath + "/FNLPDATA/pos",".txt","UTF8");
//读FNLP数据
corpus.read(datapath + "/FNLPDATA/ctb7.dat", null);
corpus.read(datapath + "/FNLPDATA/WeiboFTB(v1.0)-train.dat", null);
FNLP2BMES.w2BMES(corpus,segfile);
//FNLP2BMES.w2BMES(corpus,segfile_w); //?
//词典转BMES
//搜狗词典
DICT dict = new DICT();
String sougou = datapath + "/FNLPDATA/dict/SogouLabDic.dic.raw";
// dict.readSougou(sougou,2,3,"sougou");
//互动词典
String hudong = datapath + "/FNLPDATA/dict/hudong.dic.all";
// dict.readSougou(hudong,2,3,"");
//添加其他词典
dict.readDictionary(datapath + "/FNLPDATA/dict",".dic");
//添加其他词典
// dict.readDictionaryWithFrequency(datapath + "/FNLPDATA/dict",".dic.freq");
//添加词性字典
dict.readPOSDICT(datapath + "/FNLPDATA/词性字典", ".txt");
dict.readPOSDICT(datapath + "/FNLPDATA/dict-sogou-input/txt", ".txt");
dict.toBMES(dictfile,3);
new File(dictfile).deleteOnExit();
//合并训练文件
List<File> files = MyFiles.getAllFiles(datapath + "/FNLPDATA/", ".seg");
MyFiles.combine(trainfile,files.toArray(new File[files.size()]));
//生成新字典
String dicfile = datapath + "/FNLPDATA/train.dict";
DICT.BMES2DICT(trainfile,dicfile);
//处理测试数据
FNLPCorpus corpust = new FNLPCorpus();
//读自有数据
corpust.read(datapath + "/FNLPDATA/WeiboFTB(v1.0)-test.dat", null);
FNLP2BMES.w2BMES(corpust,testfile);