Examples of SegNode


Examples of org.ictclas4j.bean.SegNode

  public int getMaxCol() {
    int result = -1;

    if (snList != null && snList.size() > 0) {
      int size = snList.size();
      SegNode sn = snList.get(size - 1);
      result = sn.getCol();
    }

    return result;
  }
View Full Code Here

Examples of org.ictclas4j.bean.SegNode

   * ��ȡǰһ�����뵱ǰ����ƥ��Ĵ���λ��
   *
   */
  public void getBestPrev(ContextStat context) {
    if (snList != null) {
      SegNode sn = null;
      ArrayList<POS> posList = null;
      for (int i = 1; i < snList.size(); i++) {
        sn = snList.get(i);
        posList = sn.getAllPos();
        for (int j = 0; posList != null && j < posList.size(); j++) {
          double minFee = 1000000;
          int minPrev = 100000;
          POS pos = posList.get(j);
          SegNode psn = snList.get(i - 1);
          ArrayList<POS> pposList = psn.getAllPos();
          for (int k = 0; pposList != null && k < pposList.size(); k++) {
            double temp = -Math.log(context
                .getPossibility(0, pposList.get(k).getTag(), pos.getTag()));
            temp += pposList.get(k).getFreq();// Add the fees
            if (temp < minFee) {
View Full Code Here

Examples of org.ictclas4j.bean.SegNode

    String temp=null;
    char[] pos = new char[2];
    if (wrList != null && wrList.size() > 0) {
      result = "";
      for (int i = 0; i < wrList.size(); i++) {
        SegNode sn = wrList.get(i);
        if (sn.getPos() != POSTag.SEN_BEGIN && sn.getPos() != POSTag.SEN_END) {
          int tag = Math.abs(sn.getPos());
          pos[0] = (char) (tag / 256);
          pos[1] = (char) (tag % 256);
          temp=""+pos[0];
          if(pos[1]>0)
            temp+=""+pos[1];
          result += sn.getSrcWord() + "/" + temp + " ";
        }
      }
    }

    return result;
View Full Code Here

Examples of org.ictclas4j.bean.SegNode

    if (sgs != null) {
      wordResult = new ArrayList<SegNode>();

      for (int i = 0; i < sgs.size(); i++, index++) {
        SegNode sn = sgs.get(i);
        String srcWord = null;
        String curWord = sn.getSrcWord();
        SegNode newsn = new SegNode();
        pos = sn.getPos();

        boolean isNum = false;
        if ((Utility.isAllNum(curWord) || Utility.isAllChineseNum(curWord))) {
          isNum = true;
          for (j = i + 1; j < sgs.size() - 1; j++) {
            String temp = sgs.get(j).getSrcWord();
            // ������ڵļ����ַ��������֣�������ǽ��кϲ�
            if (Utility.isAllNum(temp) || Utility.isAllChineseNum(temp)) {
              isNum = true;
              index = j;
              curWord += temp;
            } else
              break;

          }
        }

        // ����������֣����ǿ��Ժ�ǰ������ֹ������ڣ�����������ǰһ���ڵ�
        // ����ֱ�ӰѸýڵ���ӵ��������
        if (!isNum) {
          SegNode prevsn = null;
          if (wordResult.size() > 0)
            prevsn = wordResult.get(wordResult.size() - 1);
          if (Utility.isDelimiter(curWord)) {
            // �����һ���ַ�Ҳ�Ƿָ���������кϲ�
            if (prevsn != null && Utility.isDelimiter(prevsn.getWord())) {
              prevsn.setCol(sn.getCol());
              prevsn.appendWord(curWord);
              continue;
            } else
              // 'w'*256;Set the POS with 'w'
              pos = POSTag.PUNC;
          } else if (curWord.length() == 1 && "����ʱ����".indexOf(curWord) != -1 || "�·�".equals(curWord)) {
            if (prevsn != null && prevsn.getPos() == -POSTag.NUM) {
              prevsn.setCol(sn.getCol());
              prevsn.setWord(Utility.UNKNOWN_TIME);
              prevsn.setSrcWord(prevsn.getSrcWord() + curWord);
              prevsn.setPos(-POSTag.TIME);
              continue;
            }
          } else if ("��".equals(curWord)) {
            if (prevsn != null && Utility.isYearTime(prevsn.getSrcWord())) {
              prevsn.setCol(sn.getCol());
              prevsn.setWord(Utility.UNKNOWN_TIME);
              prevsn.setSrcWord(prevsn.getSrcWord() + curWord);
              prevsn.setPos(-POSTag.TIME);
              continue;
            }
          }
        } else {

          // �����ǰ�ַ����������������ַ���ɵĶ�����һ�����֣��������Ӧ��ԭʼ�ڵ���ϢҲ��ӵ��������
          if (NumUtil.isNumStrNotNum(curWord)) {
            for (int k = i; k <= index; k++)
              wordResult.add(sgs.get(k));
            continue;
          }
          // ��һ������
          else {
            // �����������������ʽ��
            // 3-4�£�����ǰԪ����һ�����֣�ǰһ���Ƿָ�����ǰǰһ��Ҳ�����֣���ǰԪ��Ӧ��������
            boolean flag = false;
            int size = wordResult.size();
            if (wordResult.size() > 1) {
              SegNode prevPrevsn = wordResult.get(size - 2);
              SegNode prevsn = wordResult.get(size - 1);
              if (NumUtil.isNumDelimiter(prevPrevsn.getPos(), prevsn.getWord())) {
                pos = POSTag.NUM;
                flag = true;
              }
            }
            if (!flag) {
View Full Code Here

Examples of org.ictclas4j.bean.SegNode

   * @return
   */
  public static ArrayList<SegNode> finaAdjust(ArrayList<SegNode> optSegPath, PosTagger personTagger,
      PosTagger placeTagger) {
    ArrayList<SegNode> result = null;
    SegNode wr = null;

    if (optSegPath != null && optSegPath.size() > 0 && personTagger != null && placeTagger != null) {

      result = new ArrayList<SegNode>();
      for (int i = 0; i < optSegPath.size(); i++) {
        boolean isBeProcess = false;
        wr = optSegPath.get(i);
        // if (wr.getPos() == POSTag.NOUN_PERSON
        // && (pname = Utility.chineseNameSplit(wr.getSrcWord(),
        // personTagger)) != null
        // && !"Ҷ����".equals(wr.getSrcWord())) {
        // if (pname.getFirstName() != null) {
        // SegNode wr2 = new SegNode();
        // wr2.setWord(pname.getFirstName());
        // wr2.setPos(POSTag.NOUN_PERSON);
        // result.add(wr2);
        // }
        //
        // if (pname.getMidName() != null) {
        // SegNode wr2 = new SegNode();
        // wr2.setWord(pname.getMidName());
        // wr2.setPos(POSTag.NOUN_PERSON);
        // result.add(wr2);
        // }
        //
        // if (pname.getLastName() != null) {
        // SegNode wr2 = new SegNode();
        // wr2.setWord(pname.getLastName());
        // wr2.setPos(POSTag.NOUN_PERSON);
        // result.add(wr2);
        // }
        //
        // isBeProcess = true;
        // }
        // Rule2 for overlap words ABB һ�ζΡ�һƬƬ
        if (wr.getPos() == POSTag.NUM && i + 2 < optSegPath.size() && optSegPath.get(i + 1).getLen() == 2
            && optSegPath.get(i + 1).getSrcWord().equals(optSegPath.get(i + 2).getSrcWord())) {
          SegNode wr2 = new SegNode();
          wr2.setWord(wr.getSrcWord() + optSegPath.get(i + 1).getSrcWord()
              + optSegPath.get(i + 2).getSrcWord());
          wr2.setPos(POSTag.NUM);
          result.add(wr2);
          i += 2;
          isBeProcess = true;
        }
        // Rule3 for overlap words AA
        else if (wr.getLen() == 2 && i + 1 < optSegPath.size()
            && wr.getSrcWord().equals(optSegPath.get(i + 1).getSrcWord())) {
          SegNode wr2 = new SegNode();
          wr2.setWord(wr.getSrcWord() + optSegPath.get(i + 1).getSrcWord());
          wr2.setPos(POSTag.ADJ);
          if (wr.getPos() == POSTag.VERB || optSegPath.get(i + 1).getPos() == POSTag.VERB)// 30208='v'8256
            wr2.setPos(POSTag.VERB);

          if (wr.getPos() == POSTag.NOUN || optSegPath.get(i + 1).getPos() == POSTag.NOUN)// 30208='v'8256
            wr2.setPos(POSTag.NOUN);

          i += 1;
          if (optSegPath.get(i + 1).getLen() == 2) {// AAB:ϴ/ϴ/����������
            if ((wr2.getPos() == POSTag.VERB && optSegPath.get(i + 1).getPos() == POSTag.NOUN)
                || (wr2.getPos() == POSTag.ADJ && optSegPath.get(i + 1).getPos() == POSTag.ADJ)) {
              wr2.setWord(wr2.getWord() + optSegPath.get(i + 1).getSrcWord());
              i += 1;
            }
          }
          isBeProcess = true;
          result.add(wr2);
        }
        // Rule 4: AAB ϴ/ϴ��
        else if (wr.getLen() == 2 && i + 1 < optSegPath.size()
            && (wr.getPos() == POSTag.VERB || wr.getPos() == POSTag.ADJ)
            && optSegPath.get(i + 1).getLen() == 4
            && optSegPath.get(i + 1).getSrcWord().indexOf(wr.getSrcWord()) == 0) {
          SegNode wr2 = new SegNode();
          wr2.setWord(wr.getWord() + optSegPath.get(i + 1).getSrcWord());
          wr2.setPos(POSTag.ADJ); // 24832=='a'*256

          if (wr.getPos() == POSTag.VERB || optSegPath.get(i + 1).getPos() == POSTag.VERB)// 30208='v'8256
            wr2.setPos(POSTag.VERB);

          i += 1;
          isBeProcess = true;
          result.add(wr2);
        } else if (wr.getPos() / 256 == 'u' && wr.getPos() % 256 != 0)// uj,ud,uv,uz,ul,ug->u
          wr.setPos('u' * 256);
        // AABB,��������
        else if (wr.getLen() == 2 && i + 2 < optSegPath.size() && optSegPath.get(i + 1).getLen() == 4
            && optSegPath.get(i + 1).getWord().indexOf(wr.getWord()) == 0
            && optSegPath.get(i + 1).getWord().indexOf(optSegPath.get(i + 2).getWord()) == 0) {
          SegNode wr2 = new SegNode();
          wr2.setWord(wr.getWord() + optSegPath.get(i + 1).getWord() + optSegPath.get(i + 2).getWord());
          wr2.setPos(optSegPath.get(i + 1).getPos());
          i += 2;
          isBeProcess = true;
          result.add(wr2);
        }
        // 28275=='n'*256+'s' ����+X
        else if (wr.getPos() == POSTag.NOUN_SPACE && i + 1 < optSegPath.size())// PostFix
        {
          SegNode next = optSegPath.get(i + 1);
          if (placeTagger.getUnknownDict().isExist(next.getSrcWord(), 4)) {
            SegNode wr2 = new SegNode();
            wr2.setWord(wr.getSrcWord() + next.getSrcWord());
            wr2.setPos(POSTag.NOUN_SPACE);
            i += 1;
            isBeProcess = true;
            result.add(wr2);
          } else if ("��".equals(next.getSrcWord())) {
            SegNode wr2 = new SegNode();
            wr2.setWord(wr.getSrcWord() + next.getSrcWord());
            wr2.setPos(POSTag.NOUN_ORG);
            i += 1;
            isBeProcess = true;
            result.add(wr2);
          } else if (optSegPath.get(i + 1).getLen() == 2 && "�����ֱ�".indexOf(next.getSrcWord()) != -1) {
            SegNode wr2 = new SegNode();
            wr2.setWord(wr.getSrcWord() + next.getSrcWord());
            wr2.setPos(POSTag.NOUN_ZHUAN);
            i += 1;
            isBeProcess = true;
            result.add(wr2);
          } else if ("��".equals(next.getSrcWord())) {
            SegNode wr2 = new SegNode();
            wr2.setWord(wr.getSrcWord() + next.getSrcWord());
            wr2.setPos(POSTag.NOUN);
            i += 1;
            isBeProcess = true;
            result.add(wr2);
          }
        } else if (wr.getPos() == POSTag.VERB  || wr.getPos() == POSTag.VERB_NOUN  ||wr.getPos() == POSTag.NOUN)// v
        {
          if (i + 1 < optSegPath.size() && "Ա".equals(optSegPath.get(i + 1).getSrcWord())) {
            SegNode wr2 = new SegNode();
            wr2.setWord(wr.getSrcWord() + optSegPath.get(i + 1).getSrcWord());
            wr2.setPos(POSTag.NOUN);
            i += 1;
            isBeProcess = true;
            result.add(wr2);
          }
        }
        // www/nx ./w sina/nx;
        // �ţɣ�/nx -������/m
        // �ӣȣ�/nx ��/w ������/m
        // 28280=='n'*256+'r'
        // 27904=='m'*256
        else if (wr.getPos() == POSTag.NOUN_LETTER && i + 1 < optSegPath.size()) {
          SegNode wr2 = new SegNode();
          wr2.setWord(wr.getSrcWord());
          wr2.setPos(POSTag.NOUN_LETTER);
          while (true) {
            SegNode nextSN = optSegPath.get(i + 1);
            if (nextSN.getPos() == POSTag.NOUN_LETTER || ".��-��".indexOf(nextSN.getSrcWord()) != -1
                || (nextSN.getPos() == POSTag.NUM && Utility.isAllNum(nextSN.getSrcWord()))) {
              wr2.setWord(wr2.getSrcWord() + nextSN.getSrcWord());
              i++;
            } else
              break;
          }
          isBeProcess = true;
          result.add(wr2);
        }
        // If not processed,that's mean: not need to adjust;
        // just copy to the final result
        if (!isBeProcess) {
          SegNode wr2 = new SegNode();
          wr2.setWord(wr.getSrcWord());
          wr2.setPos(wr.getPos());
          result.add(wr2);

        }
      }
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.