Examples of org.apache.lucene.util.CharsRef

org.apache.lucene.util.CharsRef
Represents char[], as a slice (offset + length) into an existing char[]. The {@link #chars} member should never be null; use{@link #EMPTY_ARRAY} if necessary. @lucene.internal

    }
  }
 
  private CharsRef parseSynonym(String line, CharsRef reuse) throws IOException {
    if (reuse == null) {
      reuse = new CharsRef(8);
    }
    
    int start = line.indexOf('\'')+1;
    int end = line.lastIndexOf('\'');

View Full Code Here

      }


      final byte[] spare = new byte[5];
      
      Set<CharsRef> keys = workingSet.keySet();
      CharsRef sortedKeys[] = keys.toArray(new CharsRef[keys.size()]);
      Arrays.sort(sortedKeys, CharsRef.getUTF16SortedAsUTF8Comparator());


      final IntsRef scratchIntsRef = new IntsRef();
      
      //System.out.println("fmap.build");
      for (int keyIdx = 0; keyIdx < sortedKeys.length; keyIdx++) {
        CharsRef input = sortedKeys[keyIdx];
        MapEntry output = workingSet.get(input);


        int numEntries = output.ords.size();
        // output size, assume the worst case
        int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry

View Full Code Here

        this.keepOriginalWord = keepOriginalWord;
    }


    public static SynonymMap buildAcronymsMergingMap() throws IOException {
        SynonymMap.Builder synonymMap = new SynonymMap.Builder(true);
        synonymMap.add(new CharsRef("אף על פי כן"), new CharsRef("אעפ\"כ"), false);
        synonymMap.add(new CharsRef("אף על פי"), new CharsRef("אע\"פ"), false);
        synonymMap.add(new CharsRef("כמו כן"), new CharsRef("כמו\"כ"), false);
        synonymMap.add(new CharsRef("על ידי"), new CharsRef("ע\"י"), false);
        synonymMap.add(new CharsRef("על פי"), new CharsRef("ע\"פ"), false);
        synonymMap.add(new CharsRef("כל כך"), new CharsRef("כ\"כ"), false);
        synonymMap.add(new CharsRef("בדרך כלל"), new CharsRef("בד\"כ"), false);
        synonymMap.add(new CharsRef("תל אביב"), new CharsRef("ת\"א"), false);
        return synonymMap.build();
    }

View Full Code Here

    final BytesRef utf8Key = new BytesRef(key);
    try {


      Automaton lookupAutomaton = toLookupAutomaton(key);


      final CharsRef spare = new CharsRef();


      //System.out.println("  now intersect exactFirst=" + exactFirst);
    
      // Intersect automaton w/ suggest wFST and get all
      // prefix starting nodes & their outputs:

View Full Code Here

      tfit = new UnsortedTermFreqIteratorWrapper(tfit);
    }
    trie = new JaspellTernarySearchTrie();
    trie.setMatchAlmostDiff(editDistance);
    BytesRef spare;
    final CharsRef charsSpare = new CharsRef();


    while ((spare = tfit.next()) != null) {
      final long weight = tfit.weight();
      if (spare.length == 0) {
        continue;
      }
      charsSpare.grow(spare.length);
      UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
      trie.put(charsSpare.toString(), Long.valueOf(weight));
    }
  }

View Full Code Here

    int maxCnt = Math.min(num, list.size());
    if (onlyMorePopular) {
      LookupPriorityQueue queue = new LookupPriorityQueue(num);
      for (String s : list) {
        long freq = ((Number)trie.get(s)).longValue();
        queue.insertWithOverflow(new LookupResult(new CharsRef(s), freq));
      }
      for (LookupResult lr : queue.getResults()) {
        res.add(lr);
      }
    } else {
      for (int i = 0; i < maxCnt; i++) {
        String s = list.get(i);
        long freq = ((Number)trie.get(s)).longValue();
        res.add(new LookupResult(new CharsRef(s), freq));
      }      
    }
    return res;
  }

View Full Code Here

    if (prefixOutput == null) {
      return Collections.<LookupResult>emptyList();
    }
    
    List<LookupResult> results = new ArrayList<LookupResult>(num);
    CharsRef spare = new CharsRef();
    if (exactFirst && arc.isFinal()) {
      spare.grow(scratch.length);
      UnicodeUtil.UTF8toUTF16(scratch, spare);
      results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + arc.nextFinalOutput)));
      if (--num == 0) {
        return results; // that was quick
      }
    }


    // complete top-N
    MinResult<Long> completions[] = null;
    try {
      completions = Util.shortestPaths(fst, arc, prefixOutput, weightComparator, num, !exactFirst);
    } catch (IOException bogus) {
      throw new RuntimeException(bogus);
    }
    
    BytesRef suffix = new BytesRef(8);
    for (MinResult<Long> completion : completions) {
      scratch.length = prefixLength;
      // append suffix
      Util.toBytesRef(completion.input, suffix);
      scratch.append(suffix);
      spare.grow(scratch.length);
      UnicodeUtil.UTF8toUTF16(scratch, spare);
      results.add(new LookupResult(spare.toString(), decodeWeight(completion.output)));
    }
    return results;
  }

View Full Code Here

    final BytesRef utf8Key = new BytesRef(key);
    try {


      Automaton lookupAutomaton = toLookupAutomaton(key);


      final CharsRef spare = new CharsRef();


      //System.out.println("  now intersect exactFirst=" + exactFirst);
    
      // Intersect automaton w/ suggest wFST and get all
      // prefix starting nodes & their outputs:

View Full Code Here


  @Override
  public Bits readLiveDocs(Directory dir, SegmentInfoPerCommit info, IOContext context) throws IOException {
    assert info.hasDeletions();
    BytesRef scratch = new BytesRef();
    CharsRef scratchUTF16 = new CharsRef();
    
    String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen());
    IndexInput in = null;
    boolean success = false;
    try {

View Full Code Here

    if (vector == null) {
      // null snippet
      return;
    }


    final CharsRef spare = new CharsRef();
    final TermsEnum termsEnum = vector.iterator(null);
    DocsAndPositionsEnum dpEnum = null;
    BytesRef text;
    
    int numDocs = reader.maxDoc();
    
    while ((text = termsEnum.next()) != null) {
      UnicodeUtil.UTF8toUTF16(text, spare);
      final String term = spare.toString();
      if (!termSet.contains(term)) {
        continue;
      }
      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
      if (dpEnum == null) {

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.lucene.util.CharsRef

org.apache.lucene.analysis.charfilter.MappingCharFilter

org.apache.lucene.analysis.charfilter.NormalizeCharMap$Builder

org.apache.lucene.analysis.hebrew.MorphAnalyzer

org.apache.lucene.analysis.hunspell.Dictionary

org.apache.lucene.analysis.hunspell.HunspellStemFilter

org.apache.lucene.analysis.hunspell.Stemmer

org.apache.lucene.analysis.hunspell.TestDictionary

org.apache.lucene.analysis.miscellaneous.TestLimitTokenPositionFilter

org.apache.lucene.analysis.miscellaneous.TestRemoveDuplicatesTokenFilter

org.apache.lucene.analysis.nl.DutchAnalyzer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.