Package org.apache.lucene.util

Examples of org.apache.lucene.util.CharsRef


    }
  }
  private CharsRef parseSynonym(String line, CharsRef reuse) throws IOException {
    if (reuse == null) {
      reuse = new CharsRef(8);
    }
   
    int start = line.indexOf('\'')+1;
    int end = line.lastIndexOf('\'');
   
View Full Code Here


      }

      final byte[] spare = new byte[5];
     
      Set<CharsRef> keys = workingSet.keySet();
      CharsRef sortedKeys[] = keys.toArray(new CharsRef[keys.size()]);
      Arrays.sort(sortedKeys, CharsRef.getUTF16SortedAsUTF8Comparator());

      final IntsRef scratchIntsRef = new IntsRef();
     
      //System.out.println("fmap.build");
      for (int keyIdx = 0; keyIdx < sortedKeys.length; keyIdx++) {
        CharsRef input = sortedKeys[keyIdx];
        MapEntry output = workingSet.get(input);

        int numEntries = output.ords.size();
        // output size, assume the worst case
        int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry
View Full Code Here

        this.keepOriginalWord = keepOriginalWord;
    }

    public static SynonymMap buildAcronymsMergingMap() throws IOException {
        SynonymMap.Builder synonymMap = new SynonymMap.Builder(true);
        synonymMap.add(new CharsRef("אף על פי כן"), new CharsRef("אעפ\"כ"), false);
        synonymMap.add(new CharsRef("אף על פי"), new CharsRef("אע\"פ"), false);
        synonymMap.add(new CharsRef("כמו כן"), new CharsRef("כמו\"כ"), false);
        synonymMap.add(new CharsRef("על ידי"), new CharsRef("ע\"י"), false);
        synonymMap.add(new CharsRef("על פי"), new CharsRef("ע\"פ"), false);
        synonymMap.add(new CharsRef("כל כך"), new CharsRef("כ\"כ"), false);
        synonymMap.add(new CharsRef("בדרך כלל"), new CharsRef("בד\"כ"), false);
        synonymMap.add(new CharsRef("תל אביב"), new CharsRef("ת\"א"), false);
        return synonymMap.build();
    }
View Full Code Here

    final BytesRef utf8Key = new BytesRef(key);
    try {

      Automaton lookupAutomaton = toLookupAutomaton(key);

      final CharsRef spare = new CharsRef();

      //System.out.println("  now intersect exactFirst=" + exactFirst);
   
      // Intersect automaton w/ suggest wFST and get all
      // prefix starting nodes & their outputs:
View Full Code Here

      tfit = new UnsortedTermFreqIteratorWrapper(tfit);
    }
    trie = new JaspellTernarySearchTrie();
    trie.setMatchAlmostDiff(editDistance);
    BytesRef spare;
    final CharsRef charsSpare = new CharsRef();

    while ((spare = tfit.next()) != null) {
      final long weight = tfit.weight();
      if (spare.length == 0) {
        continue;
      }
      charsSpare.grow(spare.length);
      UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
      trie.put(charsSpare.toString(), Long.valueOf(weight));
    }
  }
View Full Code Here

    int maxCnt = Math.min(num, list.size());
    if (onlyMorePopular) {
      LookupPriorityQueue queue = new LookupPriorityQueue(num);
      for (String s : list) {
        long freq = ((Number)trie.get(s)).longValue();
        queue.insertWithOverflow(new LookupResult(new CharsRef(s), freq));
      }
      for (LookupResult lr : queue.getResults()) {
        res.add(lr);
      }
    } else {
      for (int i = 0; i < maxCnt; i++) {
        String s = list.get(i);
        long freq = ((Number)trie.get(s)).longValue();
        res.add(new LookupResult(new CharsRef(s), freq));
      }     
    }
    return res;
  }
View Full Code Here

    if (prefixOutput == null) {
      return Collections.<LookupResult>emptyList();
    }
   
    List<LookupResult> results = new ArrayList<LookupResult>(num);
    CharsRef spare = new CharsRef();
    if (exactFirst && arc.isFinal()) {
      spare.grow(scratch.length);
      UnicodeUtil.UTF8toUTF16(scratch, spare);
      results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + arc.nextFinalOutput)));
      if (--num == 0) {
        return results; // that was quick
      }
    }

    // complete top-N
    MinResult<Long> completions[] = null;
    try {
      completions = Util.shortestPaths(fst, arc, prefixOutput, weightComparator, num, !exactFirst);
    } catch (IOException bogus) {
      throw new RuntimeException(bogus);
    }
   
    BytesRef suffix = new BytesRef(8);
    for (MinResult<Long> completion : completions) {
      scratch.length = prefixLength;
      // append suffix
      Util.toBytesRef(completion.input, suffix);
      scratch.append(suffix);
      spare.grow(scratch.length);
      UnicodeUtil.UTF8toUTF16(scratch, spare);
      results.add(new LookupResult(spare.toString(), decodeWeight(completion.output)));
    }
    return results;
  }
View Full Code Here

    final BytesRef utf8Key = new BytesRef(key);
    try {

      Automaton lookupAutomaton = toLookupAutomaton(key);

      final CharsRef spare = new CharsRef();

      //System.out.println("  now intersect exactFirst=" + exactFirst);
   
      // Intersect automaton w/ suggest wFST and get all
      // prefix starting nodes & their outputs:
View Full Code Here

  @Override
  public Bits readLiveDocs(Directory dir, SegmentInfoPerCommit info, IOContext context) throws IOException {
    assert info.hasDeletions();
    BytesRef scratch = new BytesRef();
    CharsRef scratchUTF16 = new CharsRef();
   
    String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen());
    IndexInput in = null;
    boolean success = false;
    try {
View Full Code Here

    if (vector == null) {
      // null snippet
      return;
    }

    final CharsRef spare = new CharsRef();
    final TermsEnum termsEnum = vector.iterator(null);
    DocsAndPositionsEnum dpEnum = null;
    BytesRef text;
   
    int numDocs = reader.maxDoc();
   
    while ((text = termsEnum.next()) != null) {
      UnicodeUtil.UTF8toUTF16(text, spare);
      final String term = spare.toString();
      if (!termSet.contains(term)) {
        continue;
      }
      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
      if (dpEnum == null) {
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.CharsRef

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.