Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

Package org.apache.lucene.analysis.tokenattributes

Examples of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute
must call termAtt.fillBytesRef() before doing something with the bytes. // this encodes the term value (internally it might be a char[], etc) into the bytes. int hashCode = termAtt.fillBytesRef(); if (isInteresting(bytes)) { // because the bytes are reused by the attribute (like CharTermAttribute's char[] buffer), // you should make a copy if you need persistent access to the bytes, otherwise they will // be rewritten across calls to incrementToken() doSomethingWith(new BytesRef(bytes)); } } ... @lucene.experimental This is a very expert API, please use{@link CharTermAttributeImpl} and its implementation of this methodfor UTF-8 terms.

      source.reset();
    } catch (IOException e) {
      throw new RuntimeException("Unable to initialize TokenStream to analyze multiTerm term: " + part, e);
    }
      
    TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
    BytesRef bytes = termAtt.getBytesRef();


    try {
      if (!source.incrementToken())
        throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
      termAtt.fillBytesRef();
      if (source.incrementToken())
        throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
    } catch (IOException e) {
      throw new RuntimeException("error analyzing range part: " + part, e);
    }

View Full Code Here

      String s = _TestUtil.randomRealisticUnicodeString(random());
      if (other != null && s.equals(other)) {
        continue;
      }
      final TokenStream ts = a.tokenStream("foo", new StringReader(s));
      final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
      final BytesRef termBytes = termAtt.getBytesRef();
      ts.reset();


      int count = 0;
      boolean changed = false;


      while(ts.incrementToken()) {
        termAtt.fillBytesRef();
        if (count == 0 && !termBytes.utf8ToString().equals(s)) {
          // The value was changed during analysis.  Keep iterating so the
          // tokenStream is exhausted.
          changed = true;
        }

View Full Code Here


      if (!fieldInfos.containsKey(fieldName)) {
        fieldInfos.put(fieldName, 
            new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS , null, null, null));
      }
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
      BytesRef ref = termAtt.getBytesRef();
      stream.reset();
      
      while (stream.incrementToken()) {
        termAtt.fillBytesRef();
//        if (DEBUG) System.err.println("token='" + term + "'");
        numTokens++;
        final int posIncr = posIncrAttribute.getPositionIncrement();
        if (posIncr == 0)
          numOverlapTokens++;

View Full Code Here

    BooleanQuery bq = new BooleanQuery(DOMUtils.getAttribute(e, "disableCoord", false));
    bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e, "minimumNumberShouldMatch", 0));
    TokenStream ts = null;
    try {
      ts = analyzer.tokenStream(fieldName, text);
      TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      Term term = null;
      BytesRef bytes = termAtt.getBytesRef();
      ts.reset();
      while (ts.incrementToken()) {
        termAtt.fillBytesRef();
        term = new Term(fieldName, BytesRef.deepCopyOf(bytes));
        bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD));
      }
      ts.end();
    }

View Full Code Here

    TokenStream source = null;
    try {
      source = analyzerIn.tokenStream(field, part);
      source.reset();
      
      TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();


      if (!source.incrementToken())
        throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
      termAtt.fillBytesRef();
      if (source.incrementToken())
        throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
      source.end();
      return BytesRef.deepCopyOf(bytes);
    } catch (IOException e) {

View Full Code Here

    String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");


    TokenStream ts = null;
    try {
      ts = analyzer.tokenStream(fieldName, text);
      TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();
      ts.reset();
      while (ts.incrementToken()) {
        termAtt.fillBytesRef();
        terms.add(BytesRef.deepCopyOf(bytes));
      }
      ts.end();
    }
    catch (IOException ioe) {

View Full Code Here

        continue;
      }
      IOException priorException = null;
      TokenStream ts = a.tokenStream("foo", s);
      try {
        final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
        final BytesRef termBytes = termAtt.getBytesRef();
        ts.reset();


        int count = 0;
        boolean changed = false;


        while(ts.incrementToken()) {
          termAtt.fillBytesRef();
          if (count == 0 && !termBytes.utf8ToString().equals(s)) {
            // The value was changed during analysis.  Keep iterating so the
            // tokenStream is exhausted.
            changed = true;
          }

View Full Code Here

   *  if unicodeArcs = true) from each term. */
  public Automaton toAutomaton(TokenStream in) throws IOException {
    final Automaton a = new Automaton();
    boolean deterministic = true;


    final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class);
    final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
    final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
    final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class);


    final BytesRef term = termBytesAtt.getBytesRef();


    in.reset();


    // Only temporarily holds states ahead of our current
    // position:


    final RollingBuffer<Position> positions = new Positions();


    int pos = -1;
    Position posData = null;
    int maxOffset = 0;
    while (in.incrementToken()) {
      int posInc = posIncAtt.getPositionIncrement();
      if (!preservePositionIncrements && posInc > 1) {
        posInc = 1;
      }
      assert pos > -1 || posInc > 0;


      if (posInc > 0) {


        // New node:
        pos += posInc;


        posData = positions.get(pos);
        assert posData.leaving == null;


        if (posData.arriving == null) {
          // No token ever arrived to this position
          if (pos == 0) {
            // OK: this is the first token
            posData.leaving = a.getInitialState();
          } else {
            // This means there's a hole (eg, StopFilter
            // does this):
            posData.leaving = new State();
            addHoles(a.getInitialState(), positions, pos);
          }
        } else {
          posData.leaving = new State();
          posData.arriving.addTransition(new Transition(POS_SEP, posData.leaving));
          if (posInc > 1) {
            // A token spanned over a hole; add holes
            // "under" it:
            addHoles(a.getInitialState(), positions, pos);
          }
        }
        positions.freeBefore(pos);
      } else {
        // note: this isn't necessarily true. its just that we aren't surely det.
        // we could optimize this further (e.g. buffer and sort synonyms at a position)
        // but thats probably overkill. this is cheap and dirty
        deterministic = false;
      }


      final int endPos = pos + posLengthAtt.getPositionLength();


      termBytesAtt.fillBytesRef();
      final BytesRef termUTF8 = changeToken(term);
      int[] termUnicode = null;
      final Position endPosData = positions.get(endPos);
      if (endPosData.arriving == null) {
        endPosData.arriving = new State();

View Full Code Here

    List<SpanQuery> clausesList = new ArrayList<>();


    TokenStream ts = null;
    try {
      ts = analyzer.tokenStream(fieldName, value);
      TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();
      ts.reset();
      while (ts.incrementToken()) {
        termAtt.fillBytesRef();
        SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes)));
        clausesList.add(stq);
      }
      ts.end();
      SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));

View Full Code Here

  protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
    List<BytesRef> bytesRefs = new ArrayList<>();


    TokenStream tokenStream = analyzer.tokenStream(field, text);
    try {
      TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);


      BytesRef bytesRef = termAttribute.getBytesRef();


      tokenStream.reset();
    
      while (tokenStream.incrementToken()) {
        termAttribute.fillBytesRef();
        bytesRefs.add(BytesRef.deepCopyOf(bytesRef));
      }


      tokenStream.end();
    } finally {

View Full Code Here

0 1 2 3 4 5 6

TOP

Related Classes of org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute

com.stratio.cassandra.index.query.Condition

org.apache.lucene.analysis.CollationTestBase

org.apache.lucene.analysis.TestNumericTokenStream

org.apache.lucene.analysis.TokenStreamToAutomaton

org.apache.lucene.benchmark.byTask.tasks.ReadTokensTask

org.apache.lucene.benchmark.byTask.TestPerfTasksLogic

org.apache.lucene.index.memory.MemoryIndex

org.apache.lucene.index.TestLongPostings

org.apache.lucene.queryparser.classic.QueryParserBase

org.apache.lucene.queryparser.xml.builders.SpanOrTermsBuilder

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.