List<String> features = new ArrayList<String>(100);
int ps = constituents.length;
// cons(-2), cons(-1), cons(0), cons(1), cons(2)
// cons(-2)
Parse p_2 = null;
Parse p_1 = null;
Parse p0 = null;
Parse p1 = null;
Parse p2 = null;
Collection<Parse> punct1s = null;
Collection<Parse> punct2s = null;
Collection<Parse> punct_1s = null;
Collection<Parse> punct_2s = null;
if (index - 2 >= 0) {
p_2 = constituents[index - 2];
}
if (index - 1 >= 0) {
p_1 = constituents[index - 1];
punct_2s = p_1.getPreviousPunctuationSet();
}
p0 = constituents[index];
punct_1s=p0.getPreviousPunctuationSet();
punct1s=p0.getNextPunctuationSet();
if (index + 1 < ps) {
p1 = constituents[index + 1];
punct2s = p1.getNextPunctuationSet();
}
if (index + 2 < ps) {
p2 = constituents[index + 2];
}
boolean u_2 = true;
boolean u_1 = true;
boolean u0 = true;
boolean u1 = true;
boolean u2 = true;
boolean b_2_1 = true;
boolean b_10 = true;
boolean b01 = true;
boolean b12 = true;
boolean t_2_10 = true;
boolean t_101 = true;
boolean t012 = true;
if (dict != null) {
if (p_2 != null) {
unigram[0] = p_2.getHead().toString();
u_2 = dict.contains(new StringList(unigram));
}
if (p2 != null) {
unigram[0] = p2.getHead().toString();
u2 = dict.contains(new StringList(unigram));
}
unigram[0] = p0.getHead().toString();
u0 = dict.contains(new StringList(unigram));
if (p_2 != null && p_1 != null) {
bigram[0] = p_2.getHead().toString();
bigram[1] = p_1.getHead().toString();
b_2_1 = dict.contains(new StringList(bigram));
trigram[0] = p_2.getHead().toString();
trigram[1] = p_1.getHead().toString();
trigram[2] = p0.getHead().toString();
t_2_10 = dict.contains(new StringList(trigram));
}
if (p_1 != null && p1 != null) {
trigram[0] = p_1.getHead().toString();
trigram[1] = p0.getHead().toString();
trigram[2] = p1.getHead().toString();
t_101 = dict.contains(new StringList(trigram));
}
if (p_1 != null) {
unigram[0] = p_1.getHead().toString();
u_1 = dict.contains(new StringList(unigram));
//extra check for 2==null case
b_2_1 = b_2_1 && u_1 & u_2;
t_2_10 = t_2_10 && u_1 & u_2 & u0;
t_101 = t_101 && u_1 & u0 && u1;
bigram[0] = p_1.getHead().toString();
bigram[1] = p0.getHead().toString();
b_10 = dict.contains(new StringList(bigram)) && u_1 && u0;
}
if (p1 != null && p2 != null) {
bigram[0] = p1.getHead().toString();
bigram[1] = p2.getHead().toString();
b12 = dict.contains(new StringList(bigram));
trigram[0] = p0.getHead().toString();
trigram[1] = p1.getHead().toString();
trigram[2] = p2.getHead().toString();
t012 = dict.contains(new StringList(trigram));
}
if (p1 != null) {
unigram[0] = p1.getHead().toString();
u1 = dict.contains(new StringList(unigram));
//extra check for 2==null case
b12 = b12 && u1 && u2;
t012 = t012 && u1 && u2 && u0;
t_101 = t_101 && u0 && u_1 && u1;
bigram[0] = p0.getHead().toString();
bigram[1] = p1.getHead().toString();
b01 = dict.contains(new StringList(bigram));
b01 = b01 && u0 && u1;
}
}
String consp_2 = cons(p_2, -2);
String consp_1 = cons(p_1, -1);
String consp0 = cons(p0, 0);
String consp1 = cons(p1, 1);
String consp2 = cons(p2, 2);
String consbop_2 = consbo(p_2, -2);
String consbop_1 = consbo(p_1, -1);
String consbop0 = consbo(p0, 0);
String consbop1 = consbo(p1, 1);
String consbop2 = consbo(p2, 2);
Cons c_2 = new Cons(consp_2,consbop_2,-2,u_2);
Cons c_1 = new Cons(consp_1,consbop_1,-1,u_1);
Cons c0 = new Cons(consp0,consbop0,0,u0);
Cons c1 = new Cons(consp1,consbop1,1,u1);
Cons c2 = new Cons(consp2,consbop2,2,u2);
//default
features.add("default");
//first constituent label
//features.add("fl="+constituents[0].getLabel());
// features.add("stage=cons(i)");
// cons(-2), cons(-1), cons(0), cons(1), cons(2)
if (u0) features.add(consp0);
features.add(consbop0);
if (u_2) features.add(consp_2);
features.add(consbop_2);
if (u_1) features.add(consp_1);
features.add(consbop_1);
if (u1) features.add(consp1);
features.add(consbop1);
if (u2) features.add(consp2);
features.add(consbop2);
//cons(0),cons(1)
cons2(features,c0,c1,punct1s,b01);
//cons(-1),cons(0)
cons2(features,c_1,c0,punct_1s,b_10);
//features.add("stage=cons(0),cons(1),cons(2)");
cons3(features,c0,c1,c2,punct1s,punct2s,t012,b01,b12);
cons3(features,c_2,c_1,c0,punct_2s,punct_1s,t_2_10,b_2_1,b_10);
cons3(features,c_1,c0,c1,punct_1s,punct1s,t_101,b_10,b01);
//features.add("stage=other");
String p0Tag = p0.getType();
if (p0Tag.equals("-RRB-")) {
for (int pi = index - 1; pi >= 0; pi--) {
Parse p = constituents[pi];
if (p.getType().equals("-LRB-")) {
features.add("bracketsmatch");
break;
}
if (p.getLabel().startsWith(Parser.START)) {
break;
}
}
}
if (p0Tag.equals("-RCB-")) {
for (int pi = index - 1; pi >= 0; pi--) {
Parse p = constituents[pi];
if (p.getType().equals("-LCB-")) {
features.add("bracketsmatch");
break;
}
if (p.getLabel().startsWith(Parser.START)) {
break;
}
}
}
if (p0Tag.equals("''")) {
for (int pi = index - 1; pi >= 0; pi--) {
Parse p = constituents[pi];
if (p.getType().equals("``")) {
features.add("quotesmatch");
break;
}
if (p.getLabel().startsWith(Parser.START)) {
break;
}
}
}
if (p0Tag.equals("'")) {
for (int pi = index - 1; pi >= 0; pi--) {
Parse p = constituents[pi];
if (p.getType().equals("`")) {
features.add("quotesmatch");
break;
}
if (p.getLabel().startsWith(Parser.START)) {
break;
}
}
}
if (p0Tag.equals(",")) {
for (int pi = index - 1; pi >= 0; pi--) {
Parse p = constituents[pi];
if (p.getType().equals(",")) {
features.add("iscomma");
break;
}
if (p.getLabel().startsWith(Parser.START)) {
break;
}
}
}
if (p0Tag.equals(".") && index == ps - 1) {
for (int pi = index - 1; pi >= 0; pi--) {
Parse p = constituents[pi];
if (p.getLabel().startsWith(Parser.START)) {
if (pi == 0) {
features.add("endofsentence");
}
break;
}