Package org.hibernate.search.test.util

Source Code of org.hibernate.search.test.util.AnalyzerUtils

// $Id: AnalyzerUtils.java 15547 2008-11-11 12:57:47Z hardy.ferentschik $
package org.hibernate.search.test.util;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import junit.framework.Assert;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.slf4j.Logger;

import org.hibernate.search.util.LoggerFactory;

/**
* Helper class to test analyzers. Taken and modified from <i>Lucene in Action</i>.
*
* @author Hardy Ferentschik
*/
public class AnalyzerUtils {

  public static final Logger log = LoggerFactory.make();

  public static Token[] tokensFromAnalysis(Analyzer analyzer, String field, String text) throws IOException {
    TokenStream stream = analyzer.tokenStream( field, new StringReader( text ) );
    List<Token> tokenList = new ArrayList<Token>();
    Token reusableToken = new Token();
    while ( true ) {

      Token token = stream.next( reusableToken );
      if ( token == null ) {
        break;
      }

      tokenList.add( ( Token ) token.clone() );
    }

    return tokenList.toArray( new Token[tokenList.size()] );
  }

  public static void displayTokens(Analyzer analyzer, String field, String text) throws IOException {
    Token[] tokens = tokensFromAnalysis( analyzer, field, text );

    for ( Token token : tokens ) {
      log.debug( "[" + getTermText( token ) + "] " );
    }
  }

  public static void displayTokensWithPositions(Analyzer analyzer, String field, String text) throws IOException {
    Token[] tokens = tokensFromAnalysis( analyzer, field, text );

    int position = 0;

    for ( Token token : tokens ) {
      int increment = token.getPositionIncrement();

      if ( increment > 0 ) {
        position = position + increment;
        System.out.println();
        System.out.print( position + ": " );
      }

      log.debug( "[" + getTermText( token ) + "] " );
    }
  }

  public static void displayTokensWithFullDetails(Analyzer analyzer, String field, String text) throws IOException {
    Token[] tokens = tokensFromAnalysis( analyzer, field, text );
    StringBuilder builder = new StringBuilder();
    int position = 0;

    for ( Token token : tokens ) {
      int increment = token.getPositionIncrement();

      if ( increment > 0 ) {
        position = position + increment;
        builder.append( "\n" ).append( position ).append( ": " );
      }

      builder.append( "[" )
          .append( getTermText( token ) )
          .append( ":" )
          .append( token.startOffset() )
          .append( "->" )
          .append(
              token.endOffset()
          )
          .append( ":" )
          .append( token.type() )
          .append( "] " );
      log.debug( builder.toString() );
    }
  }

  public static void assertTokensEqual(Token[] tokens, String[] strings) {
    Assert.assertEquals( strings.length, tokens.length );

    for ( int i = 0; i < tokens.length; i++ ) {
      Assert.assertEquals( "index " + i, strings[i], getTermText( tokens[i] ) );
    }
  }

  public static String getTermText(Token token) {
    return new String( token.termBuffer(), 0, token.termLength() );
  }
}
TOP

Related Classes of org.hibernate.search.test.util.AnalyzerUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.