Examples of org.apache.lucene.search.spell.TermFreqPayloadIterator

org.apache.lucene.search.spell.TermFreqPayloadIterator
Interface for enumerating term,weight,payload triples; currently only {@link AnalyzingSuggester} and {@link FuzzySuggester} support payloads.

    String prefix = getClass().getSimpleName();
    File directory = Sort.defaultTempDir();
    File tempInput = File.createTempFile(prefix, ".input", directory);
    File tempSorted = File.createTempFile(prefix, ".sorted", directory);


    TermFreqPayloadIterator payloads;
    if (iterator instanceof TermFreqPayloadIterator) {
      payloads = (TermFreqPayloadIterator) iterator;
    } else {
      payloads = null;
    }
    hasPayloads = payloads != null;


    Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
    Sort.ByteSequencesReader reader = null;
    BytesRef scratch = new BytesRef();


    TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();


    boolean success = false;
    byte buffer[] = new byte[8];
    try {
      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
      BytesRef surfaceForm;


      while ((surfaceForm = iterator.next()) != null) {
        Set<IntsRef> paths = toFiniteStrings(surfaceForm, ts2a);
        
        maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, paths.size());


        for (IntsRef path : paths) {


          Util.toBytesRef(path, scratch);
          
          // length of the analyzed text (FST input)
          if (scratch.length > Short.MAX_VALUE-2) {
            throw new IllegalArgumentException("cannot handle analyzed forms > " + (Short.MAX_VALUE-2) + " in length (got " + scratch.length + ")");
          }
          short analyzedLength = (short) scratch.length;


          // compute the required length:
          // analyzed sequence + weight (4) + surface + analyzedLength (short)
          int requiredLength = analyzedLength + 4 + surfaceForm.length + 2;


          BytesRef payload;


          if (hasPayloads) {
            if (surfaceForm.length > (Short.MAX_VALUE-2)) {
              throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")");
            }
            payload = payloads.payload();
            // payload + surfaceLength (short)
            requiredLength += payload.length + 2;
          } else {
            payload = null;
          }

View Full Code Here

    if (searcher != null) {
      searcher.getIndexReader().close();
      searcher = null;
    }


    TermFreqPayloadIterator payloads;
    if (iter instanceof TermFreqPayloadIterator) {
      payloads = (TermFreqPayloadIterator) iter;
    } else {
      payloads = null;
    }
    Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp"));


    IndexWriter w = null;
    IndexWriter w2 = null;
    AtomicReader r = null;
    boolean success = false;
    try {
      Analyzer gramAnalyzer = new AnalyzerWrapper() {
          @Override
          protected Analyzer getWrappedAnalyzer(String fieldName) {
            return indexAnalyzer;
          }


          @Override
          protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
            if (fieldName.equals("textgrams") && minPrefixChars > 0) {
              return new TokenStreamComponents(components.getTokenizer(),
                                               new EdgeNGramTokenFilter(matchVersion,
                                                                        components.getTokenStream(),
                                                                        1, minPrefixChars));
            } else {
              return components;
            }
          }
        };


      w = new IndexWriter(dirTmp,
                          getIndexWriterConfig(matchVersion, gramAnalyzer));
      BytesRef text;
      Document doc = new Document();
      FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
      ft.setIndexOptions(IndexOptions.DOCS_ONLY);
      ft.setOmitNorms(true);
      Field textField = new Field(TEXT_FIELD_NAME, "", ft);
      doc.add(textField);


      Field textGramField = new Field("textgrams", "", ft);
      doc.add(textGramField);


      Field textDVField = new BinaryDocValuesField(TEXT_FIELD_NAME, new BytesRef());
      doc.add(textDVField);


      // TODO: use threads...?
      Field weightField = new NumericDocValuesField("weight", 0);
      doc.add(weightField);


      Field payloadField;
      if (payloads != null) {
        payloadField = new BinaryDocValuesField("payloads", new BytesRef());
        doc.add(payloadField);
      } else {
        payloadField = null;
      }


      //long t0 = System.nanoTime();
      while ((text = iter.next()) != null) {
        String textString = text.utf8ToString();
        textField.setStringValue(textString);
        textGramField.setStringValue(textString);
        textDVField.setBytesValue(text);
        weightField.setLongValue(iter.weight());
        if (payloads != null) {
          payloadField.setBytesValue(payloads.payload());
        }
        w.addDocument(doc);
      }
      //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");

View Full Code Here

    if (searcher != null) {
      searcher.getIndexReader().close();
      searcher = null;
    }


    TermFreqPayloadIterator payloads;
    if (iter instanceof TermFreqPayloadIterator) {
      payloads = (TermFreqPayloadIterator) iter;
    } else {
      payloads = null;
    }
    Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp"));


    IndexWriter w = null;
    IndexWriter w2 = null;
    AtomicReader r = null;
    boolean success = false;
    try {
      Analyzer gramAnalyzer = new AnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
          @Override
          protected Analyzer getWrappedAnalyzer(String fieldName) {
            return indexAnalyzer;
          }


          @Override
          protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
            if (fieldName.equals("textgrams") && minPrefixChars > 0) {
              return new TokenStreamComponents(components.getTokenizer(),
                                               new EdgeNGramTokenFilter(matchVersion,
                                                                        components.getTokenStream(),
                                                                        1, minPrefixChars));
            } else {
              return components;
            }
          }
        };


      w = new IndexWriter(dirTmp,
                          getIndexWriterConfig(matchVersion, gramAnalyzer));
      BytesRef text;
      Document doc = new Document();
      FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
      ft.setIndexOptions(IndexOptions.DOCS_ONLY);
      ft.setOmitNorms(true);
      Field textField = new Field(TEXT_FIELD_NAME, "", ft);
      doc.add(textField);


      Field textGramField = new Field("textgrams", "", ft);
      doc.add(textGramField);


      Field textDVField = new BinaryDocValuesField(TEXT_FIELD_NAME, new BytesRef());
      doc.add(textDVField);


      // TODO: use threads...?
      Field weightField = new NumericDocValuesField("weight", 0);
      doc.add(weightField);


      Field payloadField;
      if (payloads != null) {
        payloadField = new BinaryDocValuesField("payloads", new BytesRef());
        doc.add(payloadField);
      } else {
        payloadField = null;
      }


      //long t0 = System.nanoTime();
      while ((text = iter.next()) != null) {
        String textString = text.utf8ToString();
        textField.setStringValue(textString);
        textGramField.setStringValue(textString);
        textDVField.setBytesValue(text);
        weightField.setLongValue(iter.weight());
        if (payloads != null) {
          payloadField.setBytesValue(payloads.payload());
        }
        w.addDocument(doc);
      }
      //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");

View Full Code Here

    String prefix = getClass().getSimpleName();
    File directory = Sort.defaultTempDir();
    File tempInput = File.createTempFile(prefix, ".input", directory);
    File tempSorted = File.createTempFile(prefix, ".sorted", directory);


    TermFreqPayloadIterator payloads;
    if (iterator instanceof TermFreqPayloadIterator) {
      payloads = (TermFreqPayloadIterator) iterator;
    } else {
      payloads = null;
    }
    hasPayloads = payloads != null;


    Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
    Sort.ByteSequencesReader reader = null;
    BytesRef scratch = new BytesRef();


    TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();


    boolean success = false;
    byte buffer[] = new byte[8];
    try {
      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
      BytesRef surfaceForm;


      while ((surfaceForm = iterator.next()) != null) {
        Set<IntsRef> paths = toFiniteStrings(surfaceForm, ts2a);
        
        maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, paths.size());


        for (IntsRef path : paths) {


          Util.toBytesRef(path, scratch);
          
          // length of the analyzed text (FST input)
          if (scratch.length > Short.MAX_VALUE-2) {
            throw new IllegalArgumentException("cannot handle analyzed forms > " + (Short.MAX_VALUE-2) + " in length (got " + scratch.length + ")");
          }
          short analyzedLength = (short) scratch.length;


          // compute the required length:
          // analyzed sequence + weight (4) + surface + analyzedLength (short)
          int requiredLength = analyzedLength + 4 + surfaceForm.length + 2;


          BytesRef payload;


          if (hasPayloads) {
            if (surfaceForm.length > (Short.MAX_VALUE-2)) {
              throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")");
            }
            payload = payloads.payload();
            // payload + surfaceLength (short)
            requiredLength += payload.length + 2;
          } else {
            payload = null;
          }

View Full Code Here

    String prefix = getClass().getSimpleName();
    File directory = Sort.defaultTempDir();
    File tempInput = File.createTempFile(prefix, ".input", directory);
    File tempSorted = File.createTempFile(prefix, ".sorted", directory);


    TermFreqPayloadIterator payloads;
    if (iterator instanceof TermFreqPayloadIterator) {
      payloads = (TermFreqPayloadIterator) iterator;
    } else {
      payloads = null;
    }
    hasPayloads = payloads != null;


    Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
    Sort.ByteSequencesReader reader = null;
    BytesRef scratch = new BytesRef();


    TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();


    boolean success = false;
    byte buffer[] = new byte[8];
    try {
      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
      BytesRef surfaceForm;


      while ((surfaceForm = iterator.next()) != null) {
        Set<IntsRef> paths = toFiniteStrings(surfaceForm, ts2a);
        
        maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, paths.size());


        for (IntsRef path : paths) {


          Util.toBytesRef(path, scratch);
          
          // length of the analyzed text (FST input)
          if (scratch.length > Short.MAX_VALUE-2) {
            throw new IllegalArgumentException("cannot handle analyzed forms > " + (Short.MAX_VALUE-2) + " in length (got " + scratch.length + ")");
          }
          short analyzedLength = (short) scratch.length;


          // compute the required length:
          // analyzed sequence + weight (4) + surface + analyzedLength (short)
          int requiredLength = analyzedLength + 4 + surfaceForm.length + 2;


          BytesRef payload;


          if (hasPayloads) {
            if (surfaceForm.length > (Short.MAX_VALUE-2)) {
              throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")");
            }
            payload = payloads.payload();
            // payload + surfaceLength (short)
            requiredLength += payload.length + 2;
          } else {
            payload = null;
          }

View Full Code Here

TOP

Related Classes of org.apache.lucene.search.spell.TermFreqPayloadIterator

org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester

org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.