Package org.apache.lucene.analysis

Examples of org.apache.lucene.analysis.CharArraySet$UnmodifiableCharArraySet


  protected static CharArraySet loadStopwordSet(final boolean ignoreCase,
      final Class<? extends ReusableAnalyzerBase> aClass, final String resource,
      final String comment) throws IOException {
    final Set<String> wordSet = WordlistLoader.getWordSet(aClass, resource,
        comment);
    final CharArraySet set = new CharArraySet(Version.LUCENE_31, wordSet.size(), ignoreCase);
    set.addAll(wordSet);
    return set;
  }
View Full Code Here


      }
    }
   
    br.close();
   
    final CharArraySet stopSet = new CharArraySet(stopWords.size(), false);
    stopSet.addAll(stopWords)
       
    ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);

  }
View Full Code Here

            try {
                File protectedWordFiles = new File(wordFiles);
                if (protectedWordFiles.exists()) {
                    List<String> wlist = loader.getLines(wordFiles);
                    //This cast is safe in Lucene
                    protectedWords = new CharArraySet(wlist, false);//No need to go through StopFilter as before, since it just uses a List internally
                } else {
                    List<String> files = StrUtils.splitFileNames(wordFiles);
                    for (String file : files) {
                        List<String> wlist = loader.getLines(file.trim());
                        if (protectedWords == null)
                            protectedWords = new CharArraySet(wlist, false);
                        else
                            protectedWords.addAll(wlist);
                    }
                }
            } catch (IOException e) {
View Full Code Here

    this.onlyLongestMatch=onlyLongestMatch;
   
    if (dictionary instanceof CharArraySet) {
      this.dictionary = (CharArraySet) dictionary;
    } else {
      this.dictionary = new CharArraySet(dictionary.size(), false);
      addAllLowerCase(this.dictionary, dictionary);
    }
  }
View Full Code Here

   * TODO We should look for a faster dictionary lookup approach.
   * @param dictionary
   * @return
   */
  public static final Set makeDictionary(final String[] dictionary) {
    CharArraySet dict = new CharArraySet(dictionary.length, false);
    addAllLowerCase(dict, Arrays.asList(dictionary));
    return dict;
  }
View Full Code Here

    private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   
    public ChineseFilter(TokenStream in) {
        super(in);

        stopTable = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(STOP_WORDS), false);
    }
View Full Code Here

    this.onlyLongestMatch=onlyLongestMatch;
   
    if (dictionary instanceof CharArraySet) {
      this.dictionary = (CharArraySet) dictionary;
    } else {
      this.dictionary = new CharArraySet(dictionary.size(), false);
      addAllLowerCase(this.dictionary, dictionary);
    }
   
    termAtt = (TermAttribute) addAttribute(TermAttribute.class);
    offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
View Full Code Here

   * @param dictionary
   * @return {@link Set} of lowercased terms
   */
  public static final Set makeDictionary(final String[] dictionary) {
    // is the below really case insensitive?
    CharArraySet dict = new CharArraySet(dictionary.length, false);
    addAllLowerCase(dict, Arrays.asList(dictionary));
    return dict;
  }
View Full Code Here

    this.onlyLongestMatch=onlyLongestMatch;
   
    if (dictionary instanceof CharArraySet) {
      this.dictionary = (CharArraySet) dictionary;
    } else {
      this.dictionary = new CharArraySet(dictionary.size(), false);
      addAllLowerCase(this.dictionary, dictionary);
    }
   
    termAtt = addAttribute(TermAttribute.class);
    offsetAtt = addAttribute(OffsetAttribute.class);
View Full Code Here

   * @param dictionary
   * @return {@link Set} of lowercased terms
   */
  public static final Set makeDictionary(final String[] dictionary) {
    // is the below really case insensitive?
    CharArraySet dict = new CharArraySet(dictionary.length, false);
    addAllLowerCase(dict, Arrays.asList(dictionary));
    return dict;
  }
View Full Code Here

TOP

Related Classes of org.apache.lucene.analysis.CharArraySet$UnmodifiableCharArraySet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.