
Source Code of$Indexer


import com.starlight.IOKit;
import com.starlight.NotNull;
import com.starlight.Nullable;
import com.starlight.ValidationKit;
import com.starlight.listeners.ErrorCountDeliveryErrorHandler;
import com.starlight.listeners.ListenerSupport;
import com.starlight.thread.SharedThreadPool;
import gnu.trove.procedure.TObjectProcedure;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Arrays;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

* Watches and indexes text files (typically log files) for notifications of changes
* and tracking of line position. The benefit to callers is that it allows fast
* retrieval of lines and notification of how many lines are available.
public class LogIndexer<A> implements LogAccess<A> {
  private static final Logger LOG = LoggerFactory.getLogger( LogIndexer.class );

  private static final AtomicInteger SEARCH_ID_COUNTER = new AtomicInteger( 0 );

  private static final boolean DEBUG_ROW_INDEX_MAP = false;

  private final File file;
  private final A attachment;
  private final ListenerSupport<LogIndexListener,?> listeners;
  private final int max_index_size;
  private final int max_search_hits;

  private final TIntLongMap row_index_map = new TIntLongHashMap();
  private final Lock row_index_map_lock = new ReentrantLock();

  private volatile int row_skip_mod = 0;
  // The number of rows found in the file
  private volatile int num_lines = 1;
  private volatile long last_index_size = 0;

  private final AtomicBoolean indexer_scheduled;

  private final ScheduledFuture file_change_future;

  private final Lock search_lock = new ReentrantLock();
  private final TIntObjectMap<Searcher> searchers = new TIntObjectHashMap<>();

   * Create the indexer and do an initial indexing of the file.
   * @param file              The file to index.
   * @param listener          If non-null, this listener will be notified when indexes
   *                          are running.
   * @param max_index_size    The maximum size of the row index. The larger this number
   *                          the more memory will be used with large files. If the
   *                          size if smaller than the number of rows in the file, only
   *                          a certain portion of the row indexes will be kept which
   *                          will require more seeking when looking for a particular
   *                          line.
   * @param max_search_hits   Maximum number of matches allowed for a search.
   * @param all_listeners_removed     Runnable that is called when/if the last listener
   *                          if removed due to a message delivery problem.
  public LogIndexer( @NotNull File file, @Nullable A attachment,
    @Nullable LogIndexListener<A> listener,
    int max_index_size, int max_search_hits,
    @Nullable final Runnable all_listeners_removed ) {

    ValidationKit.checkNonnull( file, "file" );

    this.file = file;
    this.attachment = attachment;
    this.listeners = ListenerSupport.forType( LogIndexListener.class )
        new ErrorCountDeliveryErrorHandler<LogIndexListener>( 3, true ) {
          public void lastListenerRemoved() {
            if ( all_listeners_removed != null );
        } )
    this.max_index_size = max_index_size;
    this.max_search_hits = max_search_hits;

    if ( listener != null ) listeners.add( listener );

    // Start an index right away
    indexer_scheduled = new AtomicBoolean( true );
    SharedThreadPool.INSTANCE.execute( new Indexer( 0, 0 ) );

    // String checking for changes after a couple seconds
    file_change_future = SharedThreadPool.INSTANCE.scheduleAtFixedRate(
      new FileChangeChecker(), 2, 1, TimeUnit.SECONDS );

   * Close the indexer and free all resources. Continuing to use the object after
   * closing it will result in undefined results.
  public void close() {
    // Halt any running searchers
    try {
      searchers.forEachValue( new TObjectProcedure<Searcher>() {
        public boolean execute( Searcher searcher ) {
          return true;
      } );
    finally {

    // Prevent indexer from being restarted
    indexer_scheduled.set( true );

    // Cancel the scheduled task
    file_change_future.cancel( false );

   * Indicates whether or not the indexer has {@link LogIndexListener LogIndexListeners}.
  @SuppressWarnings( "UnusedDeclaration" )
  public boolean hasLogIndexListeners() {
    return listeners.hasListeners();

   * Retrieve a number of lines. Any lines that are unavailable will be set to null.
   * @param start     The starting line index (zero-based).
   * @param count     The number of lines to retrieve.
   * @return  The array of lines. Will always be non-null and of length <tt>count</tt>.
   * @throws IOException
  public String[] readLines( final int start, final int count ) throws IOException {
    final String[] to_return = new String[ count ];

    streamLines( start, new TObjectProcedure<String>() {
      int processed = 0;
      public boolean execute( String line ) {
//        if ( processed == 0 ) {
//          System.out.println( "Read line: " + start + "->" + line );
//        }
        to_return[ processed ] = line;

        return processed < count;
    } );

    return to_return;

  public A getAttachment() {
    return attachment;

   * Returns the current number of lines known in the file.
  public int getLineCount() {
    return num_lines;

   * Add an index listener.
   * @return      The number of lines currently known in the file.
  public int addListener( LogIndexListener listener ) {
    listeners.add( listener );
    return num_lines;

  public void removeListener( LogIndexListener listener ) {
    listeners.remove( listener );

   * Starts a new search. Searches will continue to run until cancelled, or until
   * communication errors occur to the listener.
   * @param params        Parameters for the search.
   * @param listener      The SearchListener that will be notified of search results.
   * @return              The search ID.
  public int startSearch( SearchParams params, SearchListener listener ) {
    ValidationKit.checkNonnull( params, "params" );
    ValidationKit.checkNonnull( listener, "listener" );

    try {
      int id = SEARCH_ID_COUNTER.getAndIncrement();

      Searcher searcher =
        new Searcher( this, id, params, listener, max_search_hits );
      searchers.put( id, searcher );

      return id;
    finally {

   * Cancels a currently running search.
  public void stopSearch( int search_id ) {
    try {
      Searcher searcher = searchers.get( search_id );
      if ( searcher != null ) {
    finally {

   * Opens the file in Reader format. This can be overridden to support alternate file
   * formats.
  protected Reader openReaderForFile( File file ) throws IOException {
    return new FileReader( file );

   * Opens the file in InputStream format. This can be overridden to support alternate
   * file formats.
  protected InputStream openStreamForFile( File file ) throws IOException {
    return new FileInputStream( file );

   * Read lines from a file until instructed to stop.
   * @param start             Index of the first line to read.
   * @param line_processor    Calls {@link gnu.trove.procedure.TObjectProcedure#execute(Object)}
   *                          to process a line. The read will stop when that method
   *                          returns false or the end of file is reached.
   * @return                  The number of lines processed.
  int streamLines( int start, TObjectProcedure<String> line_processor )
    throws IOException {

    // If it's outside the bounds, just return null entries
    if ( start > num_lines ) return 0;

    int lines_processed = 0;

    Reader in = null;
    BufferedReader bin = null;
    try {
      in = openReaderForFile( file );

      int current_line = start;

      if ( start != 0 ) {
        // Find the start position
        try {
          // We may not have information for the give line, so step back
          // until we do
          while( !row_index_map.containsKey( current_line ) ) {
            if ( current_line == 0 ) {

          // Skip to the starting point
          if ( current_line > 0 ) {
            long location = row_index_map.get( current_line );
            if ( LOG.isDebugEnabled() ) {
              LOG.debug( "Skipping to location {} for line {} to read {}",
                location, current_line, start );
            in.skip( location );
        finally {

      // Create the BufferedReader, now starting at the correct position
      bin = new BufferedReader( in );

      String line;
      while( ( line = bin.readLine() ) != null ) {
//        System.out.println( "current_line " + current_line + ": " + line +
//          " (start: " + start + ")" );
        if ( current_line < start ) {

//        System.out.println( "Line " + current_line + ": " + line );
        boolean keep_going = line_processor.execute( line );


        if ( !keep_going ) {
//          System.out.println( "Done at line: " + current_line +
//            " (line: " + line + ")");
          return lines_processed;
//      System.out.println( "Done at line: " + current_line + " (line: " + line + ")");

      // We'll get here if the processed told us to stop
      return lines_processed;
    finally {
      IOKit.close( bin );
      IOKit.close( in );

   * Class that does indexing, both full and partial.
  private class Indexer implements Runnable {
    private final int starting_line;
    private final long starting_position;
    Indexer( int starting_line, long starting_position ) {
      this.starting_line = starting_line;
      this.starting_position = starting_position;
    public void run() {
      Thread.currentThread().setName( file.getName() + " indexer" );

      //noinspection unchecked
      listeners.dispatch().indexingStarting( attachment, starting_line == 0 );

      InputStream root_stream = null;
      PositionTrackingInputStream in = null;
      try {
        root_stream = openStreamForFile( file );
        in = new PositionTrackingInputStream(
          new BufferedInputStream( root_stream ) );

        int line = 0;
        // If we're skipping some data, do it now
        if ( starting_position > 0 ) {
          long actual = in.skip( starting_position );
          // If we weren't able to skip the desired number of bytes, we'll need
          // to do a full index because we don't know what line we're at.
          if ( actual != starting_position ) {
            // If mark is support, we can just reset to the beginning and
            // go on
            if ( in.markSupported() ) {
              in.reset(); // pop to beginning (no mark set)

              // Indicate that we're starting a full index
              //noinspection unchecked
              listeners.dispatch().indexingStarting( attachment, true );
            // If it's not supported, we'll need to close the files are run
            // a new instance.
            else {
              // Close out the files
//              IOKit.close( line_reader );
              IOKit.close( in );
              // Run new instance
              Indexer sub = new Indexer( 0, 0 );
          else line = starting_line;
        // At this point, ready to do the work, so grab a lock...
        try {
          // If this is a full index, clear the existing map
          if ( line == 0 ) {
            row_skip_mod = 1;

          // This is not very efficient, but buffering messes up the position
          int bite;
          while( ( bite = ) != -1 ) {
            // If it's not a newline, ignore.
            // WARNING: this doesn't handle different line endings well
            if ( bite != '\n' ) continue;

            // NOTE: increment line right away since we're now at the end
            //       of the preceding line.

            if ( line % row_skip_mod == 0 ) {
              row_index_map.put( line, in.position() );
//              printRowIndexMap( "after add" );
              if ( row_index_map.size() > max_index_size ) {
                row_skip_mod = increaseRowSkipMod( row_skip_mod, line );
                printRowIndexMap( "after grow" );
        finally {
        num_lines = line;
        last_index_size = in.position();

//        System.out.println( "  Found " + num_lines + " lines");
//        System.out.println( "  Map size: " + row_index_map.size() );
//        System.out.println( "Map: " );
//        for( int i = 0; i <= num_lines; i++ ) {
//          if ( !row_index_map.containsKey( i ) ) continue;
//          System.out.println( "  " + i + ": " + row_index_map.get( i ) );
//        }
      catch( IOException ex ) {
        // TODO?
      finally {
//        IOKit.close( line_reader );
        IOKit.close( in );
        IOKit.close( root_stream );

        //noinspection unchecked
        listeners.dispatch().indexingFinished( attachment, num_lines );

        indexer_scheduled.set( false );
    private int increaseRowSkipMod( int old_mod, int current_line ) {
      int new_mod = old_mod * 2;

      int old_size = row_index_map.size();

      // NOTE: skip 0, it's never in the map
      for( int i = 1; i <= current_line; i++ ) {
        // If it matched the old mod and now doesn't match, drop it
        if ( i % old_mod == 0 && i % new_mod != 0 ) {
          row_index_map.remove( i );

      if ( DEBUG_ROW_INDEX_MAP ) {
        System.out.println( " Row skip modified. Map size: " + old_size + "->" +
          row_index_map.size() + "  Mod: " + old_mod + "->" + new_mod );

      return new_mod;

  private void printRowIndexMap( String reason ) {
    if ( !DEBUG_ROW_INDEX_MAP ) return;

    StringBuilder buf = new StringBuilder( "Map " );
    buf.append( reason );
    buf.append( ": " );

    int[] keys = row_index_map.keys();
    Arrays.sort( keys );
    boolean first = true;
    for( int key : keys ) {
      if ( first ) first = false;
      else buf.append( ", " );

      buf.append( key );
      buf.append( "=" );
      buf.append( row_index_map.get( key ) );
    System.out.println( buf.toString() );

   * Checks the file to see if the size has changed (indicating we need to do additional
   * indexing).
   * NOTE: this can be MUCH more efficient with Java 7's file system watcher mechanism.
  private class FileChangeChecker implements Runnable {
    public void run() {
      long file_length = file.length();
      long last_index_size = LogIndexer.this.last_index_size;
      if ( file_length == last_index_size ) {
//        System.out.println( "File unchanged" );
      // If the file size has decreased, the file has rotated. Do a full index.
      int starting_line;
      long starting_position;
      if ( file_length < last_index_size ) {
        starting_line = 0;
        starting_position = 0;
      else {
        starting_line = num_lines;
        starting_position = last_index_size;

      if ( indexer_scheduled.compareAndSet( false, true ) ) {
          new Indexer( starting_line, starting_position ) );

//  public static void main( String[] args ) {
//    final AtomicReference<LogIndexer> indexer_slot = new AtomicReference<LogIndexer>();
//    LogIndexListener listener = new LogIndexListener() {
//      long start;
//      @Override
//      public void indexingStarting( File file, boolean full ) {
//        System.out.println( "Index starting: " + ( full ? "full" : "partial" ) );
//        start = System.currentTimeMillis();
//      }
//      @Override
//      public void indexingFinished( File file, int total_rows ) {
//        System.out.println( "Index finished: " + total_rows + " (" +
//          ( System.currentTimeMillis() - start ) + " ms)" );
//        LogIndexer indexer = indexer_slot.get();
//        if ( indexer == null ) return;
//        int i = 0;
//        if ( total_rows > 10 ) i = total_rows - 10;
//        for( ; i < total_rows; i++ ) {
//          if ( i != 0 ) System.out.println();
//          int count = Math.min( 5, total_rows - i );
//          try {
//            long start = System.currentTimeMillis();
//            String[] lines = indexer.readLines( i, count );
//            long time = System.currentTimeMillis() - start;
//            System.out.println( "-- Rows " + i + " - " + ( i + count ) +
//              "(lookup: " + time + " ms):" );
//            for( String line : lines ) {
//              System.out.println( "--   " + line );
//            }
//          }
//          catch( IOException ex ) {
//            ex.printStackTrace();
//          }
//        }
//      }
//    };
//    LogIndexer indexer = new LogIndexer( new File( args[ 0 ] ), listener, 25000 );
//    indexer_slot.set( indexer );
//    CountDownLatch exit_latch = new CountDownLatch( 1 );
//    try {
//      exit_latch.await();
//    }
//    catch ( InterruptedException e ) {
//      // ignore
//    }
//  }

Related Classes of$Indexer

Copyright © 2018 All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact