* @param personTagger
* @param placeTagger
* @return
*/
public static ArrayList<SegNode> finalAdjust(ArrayList<SegNode> optSegPath, DictLib dictLib) {
SegNode wr = null;
ArrayList<SegNode> result = null;
if (dictLib == null)
return null;
Dictionary placeTagger = dictLib.getPlaceUnknownDict();
Dictionary personTagger = dictLib.getPersonUnknownDict();
if (optSegPath != null && optSegPath.size() > 0 && personTagger != null && placeTagger != null) {
result = new ArrayList<SegNode>();
for (int i = 0; i < optSegPath.size(); i++) {
boolean isBeProcess = false;
wr = optSegPath.get(i);
// if (wr.getPos() == POSTag.NOUN_PERSON && (pname =
// Utility.chineseNameSplit(wr.getSrcWord(), personTagger)) !=
// null
// && !"Ҷ����".equals(wr.getSrcWord())) {
// if (pname.getFirstName() != null) {
// SegNode wr2 = new SegNode();
// wr2.setWord(pname.getFirstName());
// wr2.setPos(POSTag.NOUN_PERSON);
// result.add(wr2);
// }
//
// if (pname.getMidName() != null) {
// SegNode wr2 = new SegNode();
// wr2.setWord(pname.getMidName());
// wr2.setPos(POSTag.NOUN_PERSON);
// result.add(wr2);
// }
//
// if (pname.getLastName() != null) {
// SegNode wr2 = new SegNode();
// wr2.setWord(pname.getLastName());
// wr2.setPos(POSTag.NOUN_PERSON);
// result.add(wr2);
// }
//
// isBeProcess = true;
// }
// Rule2 for overlap words ABB һ�ζΡ�һƬƬ
if (wr.getPos() == POSTag.NUM && i + 2 < optSegPath.size() && optSegPath.get(i + 1).getLen() == 2
&& optSegPath.get(i + 1).getSrcWord().equals(optSegPath.get(i + 2).getSrcWord())) {
SegNode wr2 = new SegNode();
wr2.setWord(wr.getSrcWord() + optSegPath.get(i + 1).getSrcWord() + optSegPath.get(i + 2).getSrcWord());
wr2.setPos(POSTag.NUM);
result.add(wr2);
i += 2;
isBeProcess = true;
}
// Rule3 for overlap words AA
else if (wr.getLen() == 2 && i + 1 < optSegPath.size() && wr.getSrcWord().equals(optSegPath.get(i + 1).getSrcWord())) {
SegNode wr2 = new SegNode();
wr2.setWord(wr.getSrcWord() + optSegPath.get(i + 1).getSrcWord());
wr2.setPos(POSTag.ADJ);
if (wr.getPos() == POSTag.VERB || optSegPath.get(i + 1).getPos() == POSTag.VERB)// 30208='v'8256
wr2.setPos(POSTag.VERB);
if (wr.getPos() == POSTag.NOUN || optSegPath.get(i + 1).getPos() == POSTag.NOUN)// 30208='v'8256
wr2.setPos(POSTag.NOUN);
i += 1;
if (optSegPath.get(i + 1).getLen() == 2) {// AAB:ϴ/ϴ/����������
if ((wr2.getPos() == POSTag.VERB && optSegPath.get(i + 1).getPos() == POSTag.NOUN)
|| (wr2.getPos() == POSTag.ADJ && optSegPath.get(i + 1).getPos() == POSTag.ADJ)) {
wr2.setWord(wr2.getWord() + optSegPath.get(i + 1).getSrcWord());
i += 1;
}
}
isBeProcess = true;
result.add(wr2);
}
// Rule 4: AAB ϴ/ϴ��
else if (wr.getLen() == 2 && i + 1 < optSegPath.size() && (wr.getPos() == POSTag.VERB || wr.getPos() == POSTag.ADJ)
&& optSegPath.get(i + 1).getLen() == 4 && optSegPath.get(i + 1).getSrcWord().indexOf(wr.getSrcWord()) == 0) {
SegNode wr2 = new SegNode();
wr2.setWord(wr.getWord() + optSegPath.get(i + 1).getSrcWord());
wr2.setPos(POSTag.ADJ); // 24832=='a'*256
if (wr.getPos() == POSTag.VERB || optSegPath.get(i + 1).getPos() == POSTag.VERB)// 30208='v'8256
wr2.setPos(POSTag.VERB);
i += 1;
isBeProcess = true;
result.add(wr2);
} else if (wr.getPos() / 256 == 'u' && wr.getPos() % 256 != 0)// uj,ud,uv,uz,ul,ug->u
wr.setPos('u' * 256);
// AABB,��������
else if (wr.getLen() == 2 && i + 2 < optSegPath.size() && optSegPath.get(i + 1).getLen() == 4
&& optSegPath.get(i + 1).getWord().indexOf(wr.getWord()) == 0
&& optSegPath.get(i + 1).getWord().indexOf(optSegPath.get(i + 2).getWord()) == 0) {
SegNode wr2 = new SegNode();
wr2.setWord(wr.getWord() + optSegPath.get(i + 1).getWord() + optSegPath.get(i + 2).getWord());
wr2.setPos(optSegPath.get(i + 1).getPos());
i += 2;
isBeProcess = true;
result.add(wr2);
}
// 28275=='n'*256+'s' ����+X
else if (wr.getPos() == POSTag.NOUN_SPACE && i + 1 < optSegPath.size())// PostFix
{
SegNode next = optSegPath.get(i + 1);
int gbkID = next.getGbkID();
if (placeTagger.isExist(next.getSrcWord(), 4, gbkID)) {
SegNode wr2 = new SegNode();
wr2.setWord(wr.getSrcWord() + next.getSrcWord());
wr2.setPos(POSTag.NOUN_SPACE);
i += 1;
isBeProcess = true;
result.add(wr2);
} else if ("��".equals(next.getSrcWord())) {
SegNode wr2 = new SegNode();
wr2.setWord(wr.getSrcWord() + next.getSrcWord());
wr2.setPos(POSTag.NOUN_ORG);
i += 1;
isBeProcess = true;
result.add(wr2);
} else if (optSegPath.get(i + 1).getLen() == 2 && "�����ֱ�".indexOf(next.getSrcWord()) != -1) {
SegNode wr2 = new SegNode();
wr2.setWord(wr.getSrcWord() + next.getSrcWord());
wr2.setPos(POSTag.NOUN_ZHUAN);
i += 1;
isBeProcess = true;
result.add(wr2);
} else if ("��".equals(next.getSrcWord())) {
SegNode wr2 = new SegNode();
wr2.setWord(wr.getSrcWord() + next.getSrcWord());
wr2.setPos(POSTag.NOUN);
i += 1;
isBeProcess = true;
result.add(wr2);
}
} else if (wr.getPos() == POSTag.VERB || wr.getPos() == POSTag.VERB_NOUN || wr.getPos() == POSTag.NOUN)// v
{
if (i + 1 < optSegPath.size() && "Ա".equals(optSegPath.get(i + 1).getSrcWord())) {
SegNode wr2 = new SegNode();
wr2.setWord(wr.getSrcWord() + optSegPath.get(i + 1).getSrcWord());
wr2.setPos(POSTag.NOUN);
i += 1;
isBeProcess = true;
result.add(wr2);
}
}
// www/nx ./w sina/nx;
// �ţɣ�/nx -������/m
// �ӣȣ�/nx ��/w ������/m
// 28280=='n'*256+'r'
// 27904=='m'*256
else if (wr.getPos() == POSTag.NOUN_LETTER && i + 1 < optSegPath.size()) {
SegNode wr2 = new SegNode();
wr2.setWord(wr.getSrcWord());
wr2.setPos(POSTag.NOUN_LETTER);
while (true) {
SegNode nextSN = optSegPath.get(i + 1);
if (nextSN.getPos() == POSTag.NOUN_LETTER || ".��-��".indexOf(nextSN.getSrcWord()) != -1
|| (nextSN.getPos() == POSTag.NUM && Utility.isAllNum(nextSN.getSrcWord()))) {
wr2.setWord(wr2.getSrcWord() + nextSN.getSrcWord());
i++;
} else
break;
}
isBeProcess = true;
result.add(wr2);
}
// If not processed,that's mean: not need to adjust;
// just copy to the final result
if (!isBeProcess) {
try {
SegNode wr2 = wr.clone();
result.add(wr2);
} catch (CloneNotSupportedException e) {
e.printStackTrace();
}