Package com.NLP.Train

Source Code of com.NLP.Train.Train

package com.NLP.Train;
/*
* Burkan Y�lmaz
* T�bitak Bilgem NLP Course Project
* 26/06/2013
* */
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.StringTokenizer;



import com.NLP.ReqExp.RegExp;

public class Train {
 
  public String[] data;
  public RegExp regexp;
  public static void main(String[] args) {
    Train t = new Train();
    t.WriteTrainData();
  }
  public Train(){
    regexp = new RegExp();
    data = new String[8];
    data[0] = "cnn-turk.txt";
    data[1] = "derlem-lisans.txt";
    data[2] = "dunya.txt";
    //data[3] = "hukuki-net.txt";
    data[3] = "milliyet-sondakika.txt";
    data[4] = "ntvmsnbc.txt";
    data[5] = "radikal.txt";
    data[6] = "star-gazete.txt";
    data[7] = "tbmm.txt";
    //data[8] = "sentences_utf_8.txt";
  }
  public void WriteTrainData(){
    try{
      File trainData = new File(System.getProperty("user.home")+"/Desktop/sentence1M.txt");
      if(!trainData.isFile()){
        trainData.createNewFile();
      }
      FileWriter writer = new FileWriter(trainData.getAbsoluteFile());
      BufferedWriter bw = new BufferedWriter(writer);
      for(int i = 0;i<data.length;i++){
        BufferedReader reader = new BufferedReader(new FileReader(System.getProperty("user.home")+"/Desktop/PROJECT/NLP/yaz-okulu/metin-derlemler/"+data[i]));
        String line = "";
        while((line = reader.readLine())!=null){
          if(line.contains("<DOC_END>")){
            continue;
          }else{
            line = line.replaceAll("\""," ");
            line = regexp.SplitPunct(line);
            StringTokenizer tokens = new StringTokenizer(line);
            line = "";
            while(tokens.hasMoreTokens()){
              String str = tokens.nextToken();
              line += str+" ";
            }
            bw.write(line);
            bw.write("\n");
          }
        }
        System.out.println("File "+(int)(i+1)+" is done.");
        System.out.println("File is done.");
      }
      bw.close();
    }catch(IOException e){
      e.printStackTrace();
    }
  }
}
TOP

Related Classes of com.NLP.Train.Train

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.