SegNode sn = null;
Atom atom = null;
if (atoms != null && atoms.size() > 0 && dictLib != null) {
segGraph = new SegGraph();
Dictionary dict = dictLib.getCoreDict();
// �ȰѷǺ����ַ��Ĵ���ʶ�����
for (int i = 0; i < atoms.size(); i++) {
atom = atoms.get(i);
String word = atom.getWord();
if (atom.getPos() == Utility.CT_CHINESE)
sn = new SegNode(i, i + 1, 0, 0, atom.getWord());
else {
int pos = 0;
double value = Utility.MAX_FREQUENCE;
switch (atom.getPos()) {
case Utility.CT_INDEX:
case Utility.CT_NUM:
pos = -POSTag.NUM;// 'm'*256
word = Utility.UNKNOWN_NUM;
value = 0;
break;
case Utility.CT_DELIMITER:
pos = POSTag.PUNC;// 'w'*256;
break;
case Utility.CT_LETTER:
pos = -POSTag.NOUN_LETTER;//
value = 0;
word = Utility.UNKNOWN_LETTER;
break;
case Utility.CT_SINGLE:// 12021-2129-3121
if (Utility.getCharCount("+-1234567890", atom.getWord()) == atom.getLen()) {
pos = -POSTag.NUM;// 'm'*256
word = Utility.UNKNOWN_NUM;
} else {
pos = -POSTag.NOUN_LETTER;//
word = Utility.UNKNOWN_LETTER;
}
value = 0;
break;
default:
pos = atom.getPos();// '?'*256;
break;
}
int gbkID = dictLib.getGBKID(word);
sn = new SegNode(i, i + 1, pos, value, word);
sn.setGbkID(gbkID);
}
sn.setSrcWord(atom.getWord());
segGraph.insert(sn, true);
}
StringBuffer words = new StringBuffer();
for (int i = 0; i < atoms.size(); i++) {
int j = i + 1;
words.delete(0, words.length());
words.append(atoms.get(i).getWord());
// ����ǡ��·ݡ�����Ҫ�ָ�
boolean flag = false;
if (j < atoms.size()) {
Atom a2 = atoms.get(j);
if ("��".equals(words.toString()) && "��".equals(a2.getWord())) {
segGraph.delete(i, j);
segGraph.delete(i + 1, j + 1);
words.append(a2.getWord());
flag = true;
j++;
}
}
SegAtom sa = null;
String word = words.toString();
int gbkID = dictLib.getGBKID(word);
int wordMaxLen = dict.getWordMaxLen(word, gbkID);
for (; j <= atoms.size() && word.length() < wordMaxLen; j++) {
word = words.toString();
sa = dict.getSegAtom(word, gbkID);
if (sa != null) {
// 1���ڣ�1999��ĩ
// if (word.length() == 2 && segGraph.getSize() > 0) {
// SegNode g2 = segGraph.getLast();
// if (Utility.isAllNum(g2.getWord()) ||