Examples of Dictionary


Examples of org.dmd.util.parsing.Dictionary

        if (dict == null){
            Iterator<StringName>    it  = allDefs.keySet().iterator();
            int         id = Token.CUSTOM+1;
            StringName  key = null;

            dict = new Dictionary();
            while(it.hasNext()){
                key = (StringName)it.next();
                dict.add(new Token(key.getNameString(),id++,allDefs.get(key)));
            }
        }
View Full Code Here

Examples of org.eobjects.analyzer.reference.Dictionary

    Arrays.sort(names);

    final Icon icon = imageManager.getImageIcon(IconUtils.DICTIONARY_IMAGEPATH);

    for (final String name : names) {
      final Dictionary dictionary = _catalog.getDictionary(name);

      final DCLabel dictLabel = DCLabel
          .dark("<html><b>" + name + "</b><br/>" + getDescription(dictionary) + "</html>");
      dictLabel.setIcon(icon);
View Full Code Here

Examples of org.eobjects.analyzer.reference.Dictionary

    final String[] dictionaryNames = referenceDataCatalog.getDictionaryNames();
    for (String name : dictionaryNames) {
      _comboBox.addItem(referenceDataCatalog.getDictionary(name));
    }

    Dictionary currentValue = (Dictionary) beanJobBuilder.getConfiguredProperty(propertyDescriptor);
    _comboBox.setSelectedItem(currentValue);

    _comboBox.addActionListener(new ActionListener() {
      @Override
      public void actionPerformed(ActionEvent e) {
View Full Code Here

Examples of org.fnlp.ml.types.Dictionary

    System.out.println(tag.getSupportedTags().size());
    s = tag.tag(str);
    System.out.println(s);   
    System.out.println();
   
    CWSTagger cws2 = new CWSTagger("../models/seg.m", new Dictionary("../models/dict.txt"));
   
    //bool值指定该dict是否用于cws分词(分词和词性可以使用不同的词典)
    tag = new POSTagger(cws2, "../models/pos.m"
        , new Dictionary("../models/dict.txt"), true);//true就替换了之前的dict.txt
    tag.removeDictionary(false);//不移除分词的词典
    tag.setDictionary(new Dictionary("../models/dict.txt"), false);//设置POS词典,分词使用原来设置
   
    String str2 = "媒体计算研究所成立了,高级数据挖掘很难。乐phone很好!";
    String s2 = tag.tag(str2);
    System.out.println("处理未分词的句子,使用词典");
    System.out.println(s2);
    System.out.println();
   
    Dictionary dict = new Dictionary();
    dict.add("媒体计算","mypos1","mypos2");
    dict.add("乐phone","专有名");
    tag.setDictionary(dict, true);
    String s22 = tag.tag(str2);
    System.out.println(s22);
    System.out.println();
   
    POSTagger tag1 = new POSTagger("../models/pos.m");
    String str1 = "媒体计算 研究所 成立 了 , 高级 数据挖掘 很 难";
    String[] w = str1.split(" ");
    String[] s1 = tag1.tagSeged(w);
    System.out.println("直接处理分好词的句子:++++++++++");
    for(int i=0;i<s1.length;i++){
      System.out.print(w[i]+"/"+s1[i]+" ");
    }
    System.out.println("\n");
   
    POSTagger tag3 = new POSTagger("../models/pos.m", new Dictionary("../models/dict.txt"));
    String str3 = "媒体计算 研究所 成立 了 , 高级 数据挖掘 很 难 ";
    String[] w3 = str3.split(" ");
    String[] s3 = tag3.tagSeged(w3);
    System.out.println("直接处理分好词的句子,使用词典");
    for(int i=0;i<s3.length;i++){
View Full Code Here

Examples of org.ictclas4j.bean.Dictionary

  public static ArrayList<SegNode> finalAdjust(ArrayList<SegNode> optSegPath, DictLib dictLib) {
    SegNode wr = null;
    ArrayList<SegNode> result = null;
    if (dictLib == null)
      return null;
    Dictionary placeTagger = dictLib.getPlaceUnknownDict();
    Dictionary personTagger = dictLib.getPersonUnknownDict();

    if (optSegPath != null && optSegPath.size() > 0 && personTagger != null && placeTagger != null) {

      result = new ArrayList<SegNode>();
      for (int i = 0; i < optSegPath.size(); i++) {
View Full Code Here

Examples of org.ictclas4j.bean.Dictionary

 
  public static  PersonName chineseNameSplit(String word, PosTagger personTagger ) {
    PersonName result = null;

    if (word != null && personTagger!=null  ) {
      Dictionary personDict =personTagger.getUnknownDict();
      int len = word.length();
      if (len < 2 || len > 4)
        return null;
      String[] atoms = GFString.atomSplit(word);
      for (String s : atoms) {
        if (Utility.charType(s) != Utility.CT_CHINESE && Utility.charType(s) != Utility.CT_OTHER)
          return null;
      }

      String surName = null;
      int surNameLen = 2;
      if (len > 2)
        surName = word.substring(0, surNameLen);
      else if (len == 2)
        surName = word;
      if (!personDict.isExist(surName, 1)) {
        surNameLen = 1;
        if (len > 1)
          surName = word.substring(0, surNameLen);
        else if (len == 1)
          surName = word;
        if (!personDict.isExist(surName, 1)) {
          surName = null;
          surNameLen = 0;
        }
      }
      String giveName = word.substring(surNameLen);
      if (len > 3) {
        String temp = word.substring(surNameLen, surNameLen + 1);
        if (personDict.isExist(temp, 1)) {

          giveName = word.substring(surNameLen + 1);
        }
      }

      double freq = personDict.getFreq(surName, 1);
      String temp = giveName.substring(0, 1);
      double freq2 = personDict.getFreq(temp, 2);

      if (surNameLen != 2
          && ((surNameLen == 0 && len > 2) || giveName.length() > 2 || getForeignCharCount(word) >= 3
              && freq < personDict.getFreq("��", 1) / 40 && freq2 < personDict.getFreq("��", 2) / 20 || (freq < 10 && getForeignCharCount(giveName) == (len - surNameLen) / 2)))
        return null;
      if (len == 2 && personTagger.isGivenName(word))
        return null;
      result=new PersonName();
      result.setFirstName(surName);
View Full Code Here

Examples of org.ictclas4j.bean.Dictionary

    SegNode sn = null;
    Atom atom = null;

    if (atoms != null && atoms.size() > 0 && dictLib != null) {
      segGraph = new SegGraph();
      Dictionary dict = dictLib.getCoreDict();

      // �ȰѷǺ����ַ��Ĵ���ʶ�����
      for (int i = 0; i < atoms.size(); i++) {
        atom = atoms.get(i);
        String word = atom.getWord();
        if (atom.getPos() == Utility.CT_CHINESE)
          sn = new SegNode(i, i + 1, 0, 0, atom.getWord());
        else {
          int pos = 0;
          double value = Utility.MAX_FREQUENCE;

          switch (atom.getPos()) {
          case Utility.CT_INDEX:
          case Utility.CT_NUM:
            pos = -POSTag.NUM;// 'm'*256
            word = Utility.UNKNOWN_NUM;
            value = 0;
            break;
          case Utility.CT_DELIMITER:
            pos = POSTag.PUNC;// 'w'*256;
            break;
          case Utility.CT_LETTER:
            pos = -POSTag.NOUN_LETTER;//
            value = 0;
            word = Utility.UNKNOWN_LETTER;
            break;
          case Utility.CT_SINGLE:// 12021-2129-3121
            if (Utility.getCharCount("+-1234567890", atom.getWord()) == atom.getLen()) {
              pos = -POSTag.NUM;// 'm'*256
              word = Utility.UNKNOWN_NUM;
            } else {
              pos = -POSTag.NOUN_LETTER;//
              word = Utility.UNKNOWN_LETTER;
            }
            value = 0;
            break;
          default:
            pos = atom.getPos();// '?'*256;
            break;
          }

          int gbkID = dictLib.getGBKID(word);
          sn = new SegNode(i, i + 1, pos, value, word);
          sn.setGbkID(gbkID);
        }

        sn.setSrcWord(atom.getWord());
        segGraph.insert(sn, true);
      }

      StringBuffer words = new StringBuffer();
      for (int i = 0; i < atoms.size(); i++) {
        int j = i + 1;
        words.delete(0, words.length());
        words.append(atoms.get(i).getWord());

        // ����ǡ��·ݡ�����Ҫ�ָ�
        boolean flag = false;
        if (j < atoms.size()) {
          Atom a2 = atoms.get(j);
          if ("��".equals(words.toString()) && "��".equals(a2.getWord())) {
            segGraph.delete(i, j);
            segGraph.delete(i + 1, j + 1);
            words.append(a2.getWord());
            flag = true;
            j++;
          }
        }

        SegAtom sa = null;
        String word = words.toString();
        int gbkID = dictLib.getGBKID(word);
        int wordMaxLen = dict.getWordMaxLen(word, gbkID);
        for (; j <= atoms.size() && word.length() < wordMaxLen; j++) {
          word = words.toString();
          sa = dict.getSegAtom(word, gbkID);
          if (sa != null) {
            // 1���ڣ�1999��ĩ
            // if (word.length() == 2 && segGraph.getSize() > 0) {
            // SegNode g2 = segGraph.getLast();
            // if (Utility.isAllNum(g2.getWord()) ||
View Full Code Here

Examples of org.ictclas4j.bean.Dictionary

    double curFreq;
    SegGraph segGraph = null;
    final double smoothParam = 0.1;
    if (dictLib == null)
      return null;
    Dictionary dict = dictLib.getCoreDict();
    Dictionary biDict = dictLib.getBigramDict();

    if (seg != null && dict != null && biDict != null) {
      segGraph = new SegGraph();
      ArrayList<SegNode> sgs = seg.getSnList();

      for (int i = 0; sgs != null && i < sgs.size(); i++) {
        SegNode sg = sgs.get(i);
        if (sg.getPos() >= 0)
          curFreq = sg.getWeight();
        else {
          int gbkID = sg.getGbkID();// dictLib.getGBKID(sg.getWord());
          curFreq = dict.getFreq(sg.getWord(), 2, gbkID);
        }

        // �õ�������ֵ�͸���ֵ��ȵ�����Ԫ��
        ArrayList<SegNode> nextSgs = seg.getNextElements(i);
        for (SegNode graph : nextSgs) {
          String twoWords = sg.getWord();
          twoWords += Utility.WORD_SEGMENTER;
          twoWords += graph.getWord();
          int gbkID = sg.getGbkID();// dictLib.getGBKID(twoWords);

          // ��������������֮���ƽ��ֵ
          // -log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
          int twoFreq = biDict.getFreq(twoWords, 3, gbkID);
          double temp = (double) 1 / Utility.MAX_FREQUENCE;
          double value = smoothParam * (1 + curFreq) / (Utility.MAX_FREQUENCE + 80000);
          value += (1 - smoothParam) * ((1 - temp) * twoFreq / (1 + curFreq) + temp);
          value = -Math.log(value);

View Full Code Here

Examples of org.ictclas4j.bean.Dictionary

  public static PersonName chineseNameSplit( PosTagger personTagger,String word, int index) {
    PersonName result = null;

    if ( word != null && personTagger != null) {
      Dictionary personDict = personTagger.getUnknownDict();
      int len = word.length();
      if (len < 2 || len > 4)
        return null;
      String[] atoms = GFString.atomSplit(word);
      for (String s : atoms) {
        if (Utility.charType(s) != Utility.CT_CHINESE && Utility.charType(s) != Utility.CT_OTHER)
          return null;
      }

      String surName = null;
      int surNameLen = 2;
      if (len > 2)
        surName = word.substring(0, surNameLen);
      else if (len == 2)
        surName = word;
      if (!personDict.isExist( surName, 1,index)) {
        surNameLen = 1;
        if (len > 1)
          surName = word.substring(0, surNameLen);
        else if (len == 1)
          surName = word;
        if (!personDict.isExist( surName, 1,index)) {
          surName = null;
          surNameLen = 0;
        }
      }
      String giveName = word.substring(surNameLen);
      if (len > 3) {
        String temp = word.substring(surNameLen, surNameLen + 1);
        if (personDict.isExist( temp, 1,index)) {

          giveName = word.substring(surNameLen + 1);
        }
      }

      double freq = personDict.getFreq( surName, 1,index);
      String temp = giveName.substring(0, 1);
      double freq2 = personDict.getFreq( temp, 2,index);

      if (surNameLen != 2
          && ((surNameLen == 0 && len > 2) || giveName.length() > 2 || getForeignCharCount(word) >= 3
              && freq < personDict.getFreq( "��", 1,index) / 40 && freq2 < personDict.getFreq( "��", 2,index) / 20 || (freq < 10 && getForeignCharCount(giveName) == (len - surNameLen) / 2)))
        return null;
      if (len == 2 && personTagger.isGivenName(word))
        return null;
      result = new PersonName();
      result.setFirstName(surName);
View Full Code Here

Examples of org.ictclas4j.bean.Dictionary

  static Logger logger = Logger.getLogger(Segment.class);

  public Segment(int segPathCount) {
    this.segPathCount = segPathCount;
    logger.info("Load coreDict  ...");
    coreDict = new Dictionary("data\\coreDict.dct");

    logger.info("Load bigramDict ...");
    bigramDict = new Dictionary("data\\bigramDict.dct");

    logger.info("Load tagger dict ...");
    personTagger = new PosTagger(Utility.TAG_TYPE.TT_PERSON, "data\\nr", coreDict);
    transPersonTagger = new PosTagger(Utility.TAG_TYPE.TT_TRANS_PERSON, "data\\tr", coreDict);
    placeTagger = new PosTagger(Utility.TAG_TYPE.TT_TRANS_PERSON, "data\\ns", coreDict);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.