/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package nl2sql.parserQuestion;
import danbikel.lisp.Sexp;
import danbikel.lisp.SexpList;
import danbikel.parser.Parser;
import danbikel.parser.Settings;
import java.io.*;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.logging.Level;
import java.util.logging.Logger;
import nl2sql.utils.Directory;
/**
*
* @author TAMHD
*/
public final class ColtechParser {
// Doc file QuestionSet
static File input = new File(Directory.directoryColtechPaserInputFile);
static File output = new File(Directory.directoryColtechPaserOutputFile);
static String settingFilePath = Directory.directoryColtechPaserFolder + "/others/vietnamese.properties";
static String modelFilePath = Directory.directoryColtechPaserFolder + "/others/vtb-train_1.drd";
public ColtechParser(LinguisticComponent chunk){
/*
try {
writeInput(questionSet);
toOutput();
} catch (Exception ex) {
System.out.println("ERROR : " + ex.getMessage());
}
* */
}
// Running the coltechparser without accessing file
// Return the string of key
public String Process(LinguisticComponent chunk)throws IOException,
ClassNotFoundException, NoSuchMethodException,
InvocationTargetException, IllegalAccessException,
InstantiationException {
String std_input = "";
if (chunk.TokenVn_ann != null) {
for (int i = 0; i < chunk.TokenVn_ann.length; i++) {
String token = chunk.TokenVn_ann[i].getSentence();
token = token.replace(" ", "_");
std_input += token + " ";
}
}
Settings.load(settingFilePath);
Parser parser = new Parser(modelFilePath);
System.out.print("Parsing input: " + std_input);
String sent = "(" + std_input.trim() + ")";
Sexp inpSent = Sexp.read(sent);
Sexp parsedTree = parser.parse((SexpList) inpSent);
String parse = parsedTree.toString();
parse = parse.replaceAll("\\[.*?\\]", "");
System.out.println("Parsing output: " + parse);
return parse;
}
public void toInput(LinguisticComponent chunk) throws Exception{
String std_input = "";
if (chunk.TokenVn_ann != null) {
for (int i = 0; i < chunk.TokenVn_ann.length; i++) {
String token = chunk.TokenVn_ann[i].getSentence();
token = token.replace(" ", "_");
std_input += token + " ";
}
}
std_input = std_input.trim();
FileWriter outFile = new FileWriter(input, false);
PrintWriter out = new PrintWriter(outFile);
out.print("(" + std_input+ ")\n");
System.out.println("std_input:" + std_input);
out.close();
}
public void writeInput(File questionSet, LinguisticComponent chunk) throws Exception{
Scanner sc = new Scanner(questionSet);
while(sc.hasNextLine()){
//LinguisticComponent chunk = new LinguisticComponent(Directory.directoryLinguisticComponent);
String oneLine = sc.nextLine();
// Chuẩn hóa viết phần thập phân tiếng anh
oneLine = oneLine.replace(",", ".");
//chunk.parseQuestion(oneLine, null);
toInput(chunk);
}
sc.close();
}
// print the input file
public void toOutput()throws Exception{
final String filepath = Directory.directoryColtechPaserJar;
final File dir = new File(Directory.directoryColtechPaserFolder);
Thread thread = new Thread() {
@Override
public void run() {
System.out.println(" ----- Running the coltechparser ----- ");
try {
Process p = Runtime.getRuntime().exec("java -Xms800m -Xmx800m -jar " + filepath, null, dir);
Runtime.getRuntime().exec("taskkill /F /IM java.exe");
} catch (IOException ex) {
System.err.println(ex.getMessage());
}
}
};
thread.start();
System.out.println(thread.getName());
//Thread.currentThread().interrupt();
//Thread.interrupted();
//p.waitFor();
}
// Đọc cả file output để lấy List các string
public static ArrayList<String> getOutput() throws Exception{
ArrayList<String> coltechparser = new ArrayList<String>();
FileReader file = new FileReader(output);
BufferedReader in = new BufferedReader(file);
while (in.ready()){
String s = in.readLine();
coltechparser.add(s);
}
in.close();
return coltechparser;
}
}