package nl2sql.parserQuestion;
import gate.AnnotationSet;
import java.util.*;
import nl2sql.utils.Directory;
/**
* @author : Duc Tam Hoang
*
*/
public class LinguisticComponent implements GateQueryTypes {
public GateChunker gateChunker;
public String question;
public int patternMaching;
public QueryTuple[] queryTuples;
public IEAnnotation[] Maucauhoi_ann; //Xác định cấu trúc câu hỏi
//public IEAnnotation[] Cumdanhtu_ann;
//public IEAnnotation[] Danhngu_ann;
public IEAnnotation[] Lookup_ann;
public IEAnnotation[] TokenVn_ann;
public IEAnnotation[] Person_ann;
public String gatedir = "";
public String runpath = "";
public String Dangmaucauhoi;
boolean feedback = false;
public LinguisticComponent() throws Exception {
gateChunker = new GateChunker();
System.out.println("LinguisticComponent initialized");
}
//--------------------------------------------------------------------------
public LinguisticComponent(String localdir) throws Exception {
this.gatedir = localdir;
System.out.println("Gate home " + gatedir);
int i = localdir.indexOf("WEB-INF");
this.runpath = new String(localdir + "/");
if (i > 0) {
runpath = new String(localdir.substring(0, i));
} else {
runpath = localdir;
}
gateChunker = new GateChunker(localdir);
System.out.println("LinguisticComponent initialized on " + localdir);
}
//--------------------------------------------------------------------------
public String cleanQuestion(String question) {
question = question.replaceAll("[\\?]", " ");
question = question.replaceAll("\\s{2,}", " ");
question = question + "?";
question = question.replaceFirst(" \\?", "\\?");
return question;
}
/**
* -------------------------------------------------------------------------
* The main function. Parse the user's question to create the Tuples for the
* data model: QueryTuples[] queryTuples
*
* @param quest
*/
public void parseQuestion(String quest, String col_data) throws Exception {
this.question = cleanQuestion(quest);
AnnotationSet ann = gateChunker.ParseQuestion(question);
System.out.println("\n-----Start(LinguisticComponent.java): "
+ "cac ham GetAnnotationsType(...) -----\n");
Lookup_ann = gateChunker.GetAnnotationsType(question, "Lookup", ann);
//Person_ann = gateChunker.GetAnnotationsType(question, "Person", ann);
TokenVn_ann = gateChunker.GetAnnotationsType(question, "Word", ann);
//showTokenVn();
//showAnnotation();
Maucauhoi_ann = gateChunker.GetAnnotationsType(question, "Maucauhoi", ann);
if (Maucauhoi_ann != null) {
//Cumdanhtu_ann = gateChunker.GetAnnotationsType(question, "Cumdanhtu", ann);
//Danhngu_ann = gateChunker.GetAnnotationsType(question, "Danhngu", ann);
deleteOverlapLookup();
//showAnnotation();
System.out.println("\n-----End (In LinguisticComponent.java): clean_NP_QueryTerm()-----\n");
System.out.println("\n-----Start: CreateQueryTuples()-----\n");
CreateQueryTuples(col_data);
System.out.println("\n-----End: CreateQueryTuples()-----\n");
} else {
System.out.println("PATTERN IS NULL --> NON-VALIDATE QUETION: "
+ "PATTERN NOT RECOGNIZED");
}
System.out.println("\n-----End: classify pattern-----\n");
}
//--------------------------------------------------------------------------
private void showAnnotation() {
if (Lookup_ann != null) {
System.out.println("-----\nLooup là:");
for (int i = 0; i < Lookup_ann.length; i++) {
System.out.println(Lookup_ann[i].getSentence());
}
}
}
private void showTokenVn() {
if (TokenVn_ann != null) {
System.out.println("----- This is TokenVn: ");
for (int i = 0; i < TokenVn_ann.length; i++) {
System.out.println(TokenVn_ann[i].getSentence() + TokenVn_ann[i].getFeature("POS"));
}
}
}
//--------------------------------------------------------------------------
private int getCategory(String category) {
category = category.trim();
if (category.equals("QU AVGBASIC")) {
return AVG_BASIC;
} else if (category.equals("QU RATEBASIC")) {
return RATE_BASIC;
} else if (category.equals("QU HOWMANY")) {
return HOW_MANY;
} else if (category.equals("QU GETINFOR")) {
return GET_INFOR;
} else if (category.equals("QU PERCENT RATIO")) {
return QU_PERCENT_RATIO;
} else if (category.equals("QU SUM CALC")) {
return QU_SUM_CALC;
} else if (category.equals("QU CORRELATION")) {
return QU_CORRELATION;
} else {
return -1;
}
}
//--------------------------------------------------------------------------
public void CreateQueryTuples(String col_data) throws Exception {
ArrayList list = new ArrayList();
list = CreateQueryTuples_Categories(patternMaching, col_data);
this.queryTuples = new QueryTuple[list.size()];
System.out.println("Creating the query Tuple");
list.toArray(this.queryTuples);
}
//--------------------------------------------------------------------------
private ArrayList CreateQueryTuples_Categories(int patternMaching, String col_data) throws Exception {
String[][] dbValue = null;
ArrayList<String> selectValue = new ArrayList<String>();
//String[] selectValue = null;
String[] whereValue = null;
String questionPattern = null;
String categoryQuention = null;
QueryTuple Tuple;
ArrayList v = new ArrayList();
System.out.println("In: " + patternMaching);
questionPattern = Maucauhoi_ann[0].getFeature("pattern");
System.out.println("In: " + questionPattern);
categoryQuention = Maucauhoi_ann[0].getFeature("POS");
System.out.println("Loại câu hỏi:______ " + categoryQuention);
return v;
}
//--------------------------------------------------------------------------
private void deleteOverlapLookup() {
Vector v = new Vector();
if (Lookup_ann != null && Lookup_ann.length >= 2) {
v.add(Lookup_ann[0]);
for (int i = 0; i < Lookup_ann.length - 1; i++) {
//int starti = Lookup_ann[i].getOffset_begin();
int endi = Lookup_ann[i].getOffset_end();
int startiadd1 = Lookup_ann[i + 1].getOffset_begin();
//int endiadd1 = Lookup_ann[i + 1].getOffset_end();
if (startiadd1 > endi) {
v.add(Lookup_ann[i + 1]);
}
}
IEAnnotation[] temp = new IEAnnotation[v.size()];
v.copyInto(temp);
Lookup_ann = temp;
}
}
//--------------------------------------------------------------------------
private void cleanPatterns() {
if ((Maucauhoi_ann != null) && (Maucauhoi_ann.length >= 1)) {
System.out.println("Số mẫu: " + Maucauhoi_ann.length);
Vector v = new Vector();
for (int i = 0; i < Maucauhoi_ann.length; i++) {
if ((Maucauhoi_ann[i].getOffset_end() == question.length() - 1)
&& (Maucauhoi_ann[i].getOffset_begin() == 0)) {
v.add(Maucauhoi_ann[i]);
}
}
IEAnnotation[] overlap1;
if (v.size() == 0) {
Maucauhoi_ann = null;
} else {
overlap1 = new IEAnnotation[v.size()];
v.copyInto(overlap1);
Maucauhoi_ann = overlap1;
System.out.println("Số mẫu: " + Maucauhoi_ann.length);
//
Vector v1 = new Vector();
Vector v2 = new Vector();
for (int i = 0; i < Maucauhoi_ann.length; i++) {
if (!v2.contains(Maucauhoi_ann[i].getFeatures())) {
v1.add(Maucauhoi_ann[i]);
v2.add(Maucauhoi_ann[i].getFeatures());
}
}
IEAnnotation[] overlap;
if (v1.size() == 0) {
Maucauhoi_ann = null;
} else {
overlap = new IEAnnotation[v1.size()];
v1.copyInto(overlap);
Maucauhoi_ann = overlap;
}
}
}
}
//--------------------------------------------------------------------------
public int ValidateQuery() {
return 0;
}
//--------------------------------------------------------------------------
public static String getTypeOfPattern(int category) {
switch (category) {
case 1:
return "QU AVG_BASIC";
case 2:
return "QU RATE_BASIC";
case 3:
return "QU HOWMANY";
case 4:
return "QU GETINFOR";
case 5:
return "QU PERCENT RATIO";
case 6:
return "QU SUM CALC";
case 7:
return "QU CORRELATION";
}
return "Not valid";
}
/**
* Example
*/
public static void main(String[] args) throws Exception {
LinguisticComponent chunk = null;
chunk = new LinguisticComponent(Directory.directoryLinguisticComponent);
String s = "Tỉnh nào nằm cạnh Hà Nội";
String col_data = null;
chunk.parseQuestion(s, col_data);
//System.out.println(TokenVn_ann);
}
}