Source Code of org.tmatesoft.hg.internal.RevlogStream$Observer

/*
 * Copyright (c) 2010-2013 TMate Software Ltd
 *  
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * For information on how to redistribute this software under
 * the terms of a license other than GNU General Public License
 * contact TMate Software at support@hg4j.com
 */
package org.tmatesoft.hg.internal;


import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION;
import static org.tmatesoft.hg.repo.HgRepository.NO_REVISION;
import static org.tmatesoft.hg.repo.HgRepository.TIP;
import static org.tmatesoft.hg.internal.Internals.REVLOGV1_RECORD_SIZE;


import java.io.File;
import java.io.IOException;
import java.lang.ref.Reference;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.SoftReference;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.Inflater;


import org.tmatesoft.hg.core.HgIOException;
import org.tmatesoft.hg.core.Nodeid;
import org.tmatesoft.hg.repo.HgInternals;
import org.tmatesoft.hg.repo.HgInvalidControlFileException;
import org.tmatesoft.hg.repo.HgInvalidRevisionException;
import org.tmatesoft.hg.repo.HgInvalidStateException;
import org.tmatesoft.hg.repo.HgRepository;
import org.tmatesoft.hg.repo.HgRuntimeException;
import org.tmatesoft.hg.util.Adaptable;




/**
 * ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations), 
 * or numerous RevlogStream with separate representation of the underlying data (cached, lazy ChunkStream)?
 * 
 * @see http://mercurial.selenic.com/wiki/Revlog
 * @see http://mercurial.selenic.com/wiki/RevlogNG
 *
 * @author Artem Tikhomirov
 * @author TMate Software Ltd.
 */
public class RevlogStream {


  static final int INLINEDATA = 1 << 16;


  /*
   * makes sense for index with inline data only - actual offset of the record in the .i file (record entry + revision * record size))
   * 
   * long[] in fact (there are 8-bytes field in the revlog)
   * However, (a) DataAccess currently doesn't operate with long seek/length
   * and, of greater significance, (b) files with inlined data are designated for smaller files,  
   * guess, about 130 Kb, and offset there won't ever break int capacity
   */
  private int[] indexRecordOffset;  
  private int[] baseRevisions;
  private boolean inline = false;
  private final File indexFile;
  private File dataFile;
  private final Internals repo;
  // keeps last complete revision we've read. Note, this cached revision doesn't help
  // for subsequent #iterate() calls with the same revision (Inspector needs more data than 
  // we currently cache here, perhaps, we shall cache everything it wants to cover same 
  // revision case as well). Now this helps when second #iterate() call is for a revision greater
  // than one from the first call, and both revisions got same base rev. It's often the case when
  // parents/children are analyzed.
  private SoftReference<CachedRevision> lastRevisionRead;
  private final ReferenceQueue<CachedRevision> lastRevisionQueue = new ReferenceQueue<CachedRevision>();
  //
  private final RevlogChangeMonitor changeTracker;
  private List<Observer> observers;
  private boolean shallDropDerivedCaches = false;


  public RevlogStream(Internals hgRepo, File indexFile) {
    repo = hgRepo;
    this.indexFile = indexFile;
    changeTracker = repo.getRevlogTracker(indexFile);
  }
  
  public boolean exists() {
    return indexFile.exists();
  }


  /**
   * @param shortRead pass <code>true</code> to indicate intention to read few revisions only (as opposed to reading most of/complete revlog)
   * @return never <code>null</code>, empty {@link DataAccess} if no stream is available
   */
  /*package*/ DataAccess getIndexStream(boolean shortRead) {
    // shortRead hint helps  to avoid mmap files when only 
    // few bytes are to be read (i.e. #dataLength())
    DataAccessProvider dataAccess = repo.getDataAccess();
    return dataAccess.createReader(indexFile, shortRead);
  }


  /*package*/ DataAccess getDataStream() {
    DataAccessProvider dataAccess = repo.getDataAccess();
    return dataAccess.createReader(getDataFile(), false);
  }
  
  /*package*/ DataSerializer getIndexStreamWriter(Transaction tr) throws HgIOException {
    DataAccessProvider dataAccess = repo.getDataAccess();
    return dataAccess.createWriter(tr, indexFile, true);
  }
  
  /*package*/ DataSerializer getDataStreamWriter(Transaction tr) throws HgIOException {
    DataAccessProvider dataAccess = repo.getDataAccess();
    return dataAccess.createWriter(tr, getDataFile(), true);
  }
  
  /**
   * Constructs file object that corresponds to .d revlog counterpart. 
   * Note, it's caller responsibility to ensure this file makes any sense (i.e. check {@link #inline} attribute)
   */
  private File getDataFile() {
    if (dataFile == null) {
      final String indexName = indexFile.getName();
      dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d");
    }
    return dataFile;
  }
  
  // initialize exception with the file where revlog structure information comes from
  public HgInvalidControlFileException initWithIndexFile(HgInvalidControlFileException ex) {
    return ex.setFile(indexFile);
  }


  public HgIOException initWithIndexFile(HgIOException ex) {
    return ex.setFile(indexFile);
  }


  // initialize exception with the file where revlog data comes from
  public HgInvalidControlFileException initWithDataFile(HgInvalidControlFileException ex) {
    // exceptions are usually raised after read attepmt, hence inline shall be initialized
    // although honest approach is to call #initOutline() first
    return ex.setFile(inline ? indexFile : getDataFile());
  }
  
  /*package-private*/String getDataFileName() {
    // XXX a temporary solution to provide more info to fill in exceptions other than 
    // HgInvalidControlFileException (those benefit from initWith* methods above)
    //
    // Besides, since RevlogStream represents both revlogs with user data (those with WC representative and 
    // system data under store/data) and system-only revlogs (like changelog and manifest), there's no
    // easy way to supply human-friendly name of the active file (independent from whether it's index of data)
    return inline ? indexFile.getPath() : getDataFile().getPath();
  }


  public boolean isInlineData() throws HgInvalidControlFileException {
    initOutline();
    return inline;
  }
  
  public int revisionCount() throws HgInvalidControlFileException {
    initOutline();
    return baseRevisions.length;
  }
  
  /**
   * @throws HgInvalidControlFileException if attempt to read index file failed
   * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog
   */
  public int dataLength(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException {
    // XXX in fact, use of iterate() instead of this implementation may be quite reasonable.
    //
    revisionIndex = checkRevisionIndex(revisionIndex);
    DataAccess daIndex = getIndexStream(true);
    try {
      int recordOffset = getIndexOffsetInt(revisionIndex);
      daIndex.seek(recordOffset + 12); // 6+2+4
      int actualLen = daIndex.readInt();
      return actualLen; 
    } catch (IOException ex) {
      throw new HgInvalidControlFileException(null, ex, indexFile).setRevisionIndex(revisionIndex);
    } finally {
      daIndex.done();
    }
  }
  
  /**
   * Read nodeid at given index
   * 
   * @throws HgInvalidControlFileException if attempt to read index file failed
   * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog
   */
  public byte[] nodeid(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException {
    revisionIndex = checkRevisionIndex(revisionIndex);
    DataAccess daIndex = getIndexStream(true);
    try {
      int recordOffset = getIndexOffsetInt(revisionIndex);
      daIndex.seek(recordOffset + 32);
      byte[] rv = new byte[20];
      daIndex.readBytes(rv, 0, 20);
      return rv;
    } catch (IOException ex) {
      throw new HgInvalidControlFileException("Revision lookup failed", ex, indexFile).setRevisionIndex(revisionIndex);
    } finally {
      daIndex.done();
    }
  }


  /**
   * Get link field from the index record.
   * 
   * @throws HgInvalidControlFileException if attempt to read index file failed
   * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog
   */
  public int linkRevision(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException {
    revisionIndex = checkRevisionIndex(revisionIndex);
    DataAccess daIndex = getIndexStream(true);
    try {
      int recordOffset = getIndexOffsetInt(revisionIndex);
      daIndex.seek(recordOffset + 20);
      int linkRev = daIndex.readInt();
      return linkRev;
    } catch (IOException ex) {
      throw new HgInvalidControlFileException("Linked revision lookup failed", ex, indexFile).setRevisionIndex(revisionIndex);
    } finally {
      daIndex.done();
    }
  }
  
  /**
   * Extract base revision field from the revlog
   * 
   * @throws HgInvalidControlFileException if attempt to read index file failed
   * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog
   */
  public int baseRevision(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException {
    revisionIndex = checkRevisionIndex(revisionIndex);
    return getBaseRevision(revisionIndex);
  }
  
  /**
   * Read indexes of parent revisions
   * @param revisionIndex index of child revision
   * @param parents array to hold return value, length >= 2
   * @return value of <code>parents</code> parameter for convenience
   * @throws HgInvalidControlFileException if attempt to read index file failed
   * @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog
   */
  public int[] parents(int revisionIndex, int[] parents) throws HgInvalidControlFileException, HgInvalidRevisionException {
    assert parents.length > 1;
    revisionIndex = checkRevisionIndex(revisionIndex);
    DataAccess daIndex = getIndexStream(true);
    try {
      int recordOffset = getIndexOffsetInt(revisionIndex);
      daIndex.seek(recordOffset + 24);
      int p1 = daIndex.readInt();
      int p2 = daIndex.readInt();
      // although NO_REVISION == -1, it doesn't hurt to ensure this
      parents[0] = p1 == -1 ? NO_REVISION : p1;
      parents[1] = p2 == -1 ? NO_REVISION : p2;
      return parents;
    } catch (IOException ex) {
      throw new HgInvalidControlFileException("Parents lookup failed", ex, indexFile).setRevisionIndex(revisionIndex);
    } finally {
      daIndex.done();
    }
  }
  
  // Perhaps, RevlogStream should be limited to use of plain int revisions for access,
  // while Nodeids should be kept on the level up, in Revlog. Guess, Revlog better keep
  // map of nodeids, and once this comes true, we may get rid of this method.
  // Unlike its counterpart, {@link Revlog#getLocalRevisionNumber()}, doesn't fail with exception if node not found,
  /**
   * @return integer in [0..revisionCount()) or {@link HgRepository#BAD_REVISION} if not found
   * @throws HgInvalidControlFileException if attempt to read index file failed
   */
  public int findRevisionIndex(Nodeid nodeid) throws HgInvalidControlFileException {
    // XXX this one may be implemented with iterate() once there's mechanism to stop iterations
    final int indexSize = revisionCount();
    DataAccess daIndex = getIndexStream(false);
    try {
      byte[] nodeidBuf = new byte[20];
      for (int i = 0; i < indexSize; i++) {
        daIndex.skip(8);
        int compressedLen = daIndex.readInt();
        daIndex.skip(20);
        daIndex.readBytes(nodeidBuf, 0, 20);
        if (nodeid.equalsTo(nodeidBuf)) {
          return i;
        }
        daIndex.skip(inline ? 12 + compressedLen : 12);
      }
    } catch (IOException ex) {
      throw new HgInvalidControlFileException("Revision lookup failed", ex, indexFile).setRevision(nodeid);
    } finally {
      daIndex.done();
    }
    return BAD_REVISION;
  }
  
  /**
   * @return value suitable for the corresponding field in the new revision's header, not physical offset in the file 
   * (which is different in case of inline revlogs)
   */
  public long newEntryOffset() throws HgInvalidControlFileException {
    if (revisionCount() == 0) {
      return 0;
    }
    DataAccess daIndex = getIndexStream(true);
    int lastRev = revisionCount() - 1;
    try {
      int recordOffset = getIndexOffsetInt(lastRev);
      daIndex.seek(recordOffset);
      long value = daIndex.readLong();
      value = value >>> 16;
      int compressedLen = daIndex.readInt();
      return lastRev == 0 ? compressedLen : value + compressedLen;
    } catch (IOException ex) {
      throw new HgInvalidControlFileException("Linked revision lookup failed", ex, indexFile).setRevisionIndex(lastRev);
    } finally {
      daIndex.done();
    }
  }


  /**
   * should be possible to use TIP, ALL, or -1, -2, -n notation of Hg
   * ? boolean needsNodeid
   * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em>
   */
  public void iterate(int start, int end, boolean needData, Inspector inspector) throws HgRuntimeException {
    initOutline();
    final int indexSize = revisionCount();
    if (indexSize == 0) {
      return;
    }
    if (end == TIP) {
      end = indexSize - 1;
    }
    if (start == TIP) {
      start = indexSize - 1;
    }
    HgInternals.checkRevlogRange(start, end, indexSize-1);
    // XXX may cache [start .. end] from index with a single read (pre-read)
    
    ReaderN1 r = new ReaderN1(needData, inspector, repo.shallMergePatches());
    try {
      r.start(end - start + 1, getLastRevisionRead());
      r.range(start, end);
    } catch (IOException ex) {
      throw new HgInvalidControlFileException(String.format("Failed reading [%d..%d]", start, end), ex, indexFile);
    } finally {
      CachedRevision cr = r.finish();
      setLastRevisionRead(cr);
    }
  }
  
  /**
   * Effective alternative to {@link #iterate(int, int, boolean, Inspector) batch read}, when only few selected 
   * revisions are of interest.
   * @param sortedRevisions revisions to walk, in ascending order.
   * @param needData whether inspector needs access to header only
   * @param inspector callback to process entries
   * @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em>
   */
  public void iterate(int[] sortedRevisions, boolean needData, Inspector inspector) throws HgRuntimeException {
    final int indexSize = revisionCount();
    if (indexSize == 0 || sortedRevisions.length == 0) {
      return;
    }
    if (sortedRevisions[0] > indexSize) {
      throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[0]);
    }
    if (sortedRevisions[sortedRevisions.length - 1] > indexSize) {
      throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[sortedRevisions.length - 1]);
    }


    ReaderN1 r = new ReaderN1(needData, inspector, repo.shallMergePatches());
    try {
      r.start(sortedRevisions.length, getLastRevisionRead());
      for (int i = 0; i < sortedRevisions.length; ) {
        int x = i;
        i++;
        while (i < sortedRevisions.length) {
          if (sortedRevisions[i] == sortedRevisions[i-1] + 1) {
            i++;
          } else {
            break;
          }
        }
        // commitRevisions[x..i-1] are sequential
        if (!r.range(sortedRevisions[x], sortedRevisions[i-1])) {
          return;
        }
      }
    } catch (IOException ex) {
      final int c = sortedRevisions.length;
      throw new HgInvalidControlFileException(String.format("Failed reading %d revisions in [%d; %d]", c, sortedRevisions[0], sortedRevisions[c-1]), ex, indexFile);
    } finally {
      CachedRevision cr = r.finish();
      setLastRevisionRead(cr);
    }
  }
  
  public void attach(Observer listener) {
    assert listener != null;
    if (observers == null) {
      observers = new ArrayList<Observer>(3);
    }
    observers.add(listener);
  }
  
  public void detach(Observer listener) {
    assert listener != null;
    if (observers != null) {
      observers.remove(listener);
    }
  }
  
  /*
   * Note, this method IS NOT a replacement for Observer. It has to be invoked when the validity of any
   * cache built using revision information is in doubt, but it provides reasonable value only till the
   * first initOutline() to be invoked, i.e. in [change..revlog read operation] time frame. If your code
   * accesses cached information without any prior explicit read operation, you shall consult this method
   * if next read operation would in fact bring changed content.
   * Observer is needed in addition to this method because any revlog read operation (e.g. Revlog#getLastRevision)
   * would clear shallDropDerivedCaches(), and if code relies only on this method to clear its derived caches,
   * it would miss the update.
   */
  public boolean shallDropDerivedCaches() {
    if (shallDropDerivedCaches) {
      return shallDropDerivedCaches;
    }
    return shallDropDerivedCaches = changeTracker.hasChanged(indexFile);
  }


  void revisionAdded(int revisionIndex, Nodeid revision, int baseRevisionIndex, long revisionOffset) throws HgInvalidControlFileException {
    shallDropDerivedCaches = true;
    if (!outlineCached()) {
      return;
    }
    if (baseRevisions.length != revisionIndex) {
      throw new HgInvalidControlFileException(String.format("New entry's index shall be %d, not %d", baseRevisions.length, revisionIndex), null, indexFile);
    }
    if (baseRevisionIndex < 0 || baseRevisionIndex > baseRevisions.length) {
      // baseRevisionIndex MAY be == to baseRevisions.length, it's when new revision is based on itself
      throw new HgInvalidControlFileException(String.format("Base revision index %d doesn't fit [0..%d] range", baseRevisionIndex, baseRevisions.length), null, indexFile);
    }
    assert revision != null;
    assert !revision.isNull();
    // next effort doesn't seem to be of any value at least in case of regular commit
    // as the next call to #initOutline would recognize the file change and reload complete revlog anyway
    // OTOH, there might be transaction strategy that doesn't update the file until its completion,
    // while it's handy to know new revisions meanwhile.
    int[] baseRevisionsCopy = new int[baseRevisions.length + 1];
    System.arraycopy(baseRevisions, 0, baseRevisionsCopy, 0, baseRevisions.length);
    baseRevisionsCopy[baseRevisions.length] = baseRevisionIndex;
    baseRevisions = baseRevisionsCopy;
    if (inline && indexRecordOffset != null) {
      assert indexRecordOffset.length == revisionIndex;
      int[] indexRecordOffsetCopy = new int[indexRecordOffset.length + 1];
      System.arraycopy(indexRecordOffset, 0, indexRecordOffsetCopy, 0, indexRecordOffset.length);
      indexRecordOffsetCopy[indexRecordOffset.length] = offsetFieldToInlineFileOffset(revisionOffset, revisionIndex);
      indexRecordOffset = indexRecordOffsetCopy;
    }
  }
  
  private int getBaseRevision(int revision) {
    return baseRevisions[revision];
  }


  /**
   * @param revisionIndex shall be valid index, [0..baseRevisions.length-1]. 
   * It's advised to use {@link #checkRevisionIndex(int)} to ensure argument is correct. 
   * @return  offset of the revision's record in the index (.i) stream
   */
  private int getIndexOffsetInt(int revisionIndex) {
    return inline ? indexRecordOffset[revisionIndex] : revisionIndex * REVLOGV1_RECORD_SIZE;
  }
  
  private int checkRevisionIndex(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException {
    final int last = revisionCount() - 1;
    if (revisionIndex == TIP) {
      revisionIndex = last;
    }
    if (revisionIndex < 0 || revisionIndex > last) {
      throw new HgInvalidRevisionException(revisionIndex).setRevisionIndex(revisionIndex, 0, last);
    }
    return revisionIndex;
  }
  
  private boolean outlineCached() {
    return baseRevisions != null && baseRevisions.length > 0;
  }
  
  // translate 6-byte offset field value to physical file offset for inline revlogs
  // DOESN'T MAKE SENSE if revlog with data is separate
  private static int offsetFieldToInlineFileOffset(long offset, int recordIndex) throws HgInvalidStateException {
    int o = Internals.ltoi(offset);
    if (o != offset) {
      // just in case, can't happen, ever, unless HG (or some other bad tool) produces index file 
      // with inlined data of size greater than 2 Gb.
      throw new HgInvalidStateException("Data too big, offset didn't fit to sizeof(int)");
    }
    return o + REVLOGV1_RECORD_SIZE * recordIndex;
  }


  // every access to index revlog goes after this method only.
  private void initOutline() throws HgInvalidControlFileException {
    // true to send out 'drop-your-caches' event after outline has been built
    final boolean notifyReload;
    if (outlineCached()) {
      if (!changeTracker.hasChanged(indexFile)) {
        return;
      }
      notifyReload = true;
    } else {
      // no cached outline - inital read, do not send any reload/invalidate notifications
      notifyReload = false;
    }
    changeTracker.touch(indexFile);
    DataAccess da = getIndexStream(false);
    try {
      if (da.isEmpty()) {
        // do not fail with exception if stream is empty, it's likely intentional
        baseRevisions = new int[0];
        // empty revlog, likely to be populated, indicate we start with a single file
        inline = true;
        return;
      }
      int versionField = da.readInt();
      da.readInt(); // just to skip next 4 bytes of offset + flags
      inline = (versionField & INLINEDATA) != 0;
      IntVector resBases, resOffsets = null;
      int entryCountGuess = Internals.ltoi(da.longLength() / REVLOGV1_RECORD_SIZE);
      if (inline) {
        entryCountGuess >>>= 2; // pure guess, assume useful data takes 3/4 of total space
        resOffsets = new IntVector(entryCountGuess, 5000);
      }
      resBases = new IntVector(entryCountGuess, 5000);
      
      long offset = 0; // first offset is always 0, thus Hg uses it for other purposes
      while(true) {
        int compressedLen = da.readInt();
        // 8+4 = 12 bytes total read here
        @SuppressWarnings("unused")
        int actualLen = da.readInt();
        int baseRevision = da.readInt();
        // 12 + 8 = 20 bytes read here
//        int linkRevision = di.readInt();
//        int parent1Revision = di.readInt();
//        int parent2Revision = di.readInt();
//        byte[] nodeid = new byte[32];
        resBases.add(baseRevision);
        if (inline) {
          int o = offsetFieldToInlineFileOffset(offset, resOffsets.size());
          resOffsets.add(o);
          da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size)
        } else {
          da.skip(3*4 + 32);
        }
        if (da.isEmpty()) {
          // fine, done then
          baseRevisions = resBases.toArray(true);
          if (inline) {
            indexRecordOffset = resOffsets.toArray(true);
          }
          break;
        } else {
          // start reading next record
          long l = da.readLong();
          offset = l >>> 16;
        }
      }
    } catch (IOException ex) {
      throw new HgInvalidControlFileException("Failed to analyze revlog index", ex, indexFile);
    } finally {
      da.done();
      if (notifyReload && observers != null) {
        for (Observer l : observers) {
          l.reloaded(this);
        }
        shallDropDerivedCaches = false;
      }
    }
  }
  
  private CachedRevision getLastRevisionRead() {
    return lastRevisionRead == null ? null : lastRevisionRead.get();
  }
  
  private void setLastRevisionRead(CachedRevision cr) {
    // done() for lastRevisionRead.userData has been called by ReaderN1 once
    // it noticed unsuitable DataAccess.
    // Now, done() for any CachedRevision cleared by GC:
    for (Reference<? extends CachedRevision> r; (r = lastRevisionQueue.poll()) != null;) {
      CachedRevision toClean = r.get();
      if (toClean != null && toClean.userData != null) {
        toClean.userData.done();
      }
    }
    if (cr != null) {
      lastRevisionRead = new SoftReference<CachedRevision>(cr, lastRevisionQueue);
    } else {
      lastRevisionRead = null;
    }
  }
  
  final static class CachedRevision {
    final int revision;
    final DataAccess userData;
    
    public CachedRevision(int lastRevisionRead, DataAccess lastUserData) {
      revision = lastRevisionRead;
      userData = lastUserData;
    }
  }


  /**
   * operation with single file open/close and multiple diverse reads.
   * XXX initOutline might need similar extraction to keep N1 format knowledge  
   */
  final class ReaderN1 {
    private final Inspector inspector;
    private final boolean needData;
    private final boolean mergePatches;
    private DataAccess daIndex = null, daData = null;
    private Lifecycle.BasicCallback cb = null;
    private Lifecycle lifecycleListener = null;
    private int lastRevisionRead = BAD_REVISION;
    private DataAccess lastUserData;
    //
    // next are transient values, for range() use only
    private final Inflater inflater = new Inflater();
    // can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel
    private final byte[] inflaterBuffer = new byte[10 * 1024]; // TODO [post-1.1] consider using DAP.DEFAULT_FILE_BUFFER
    private final ByteBuffer inflaterOutBuffer = ByteBuffer.allocate(inflaterBuffer.length * 2);
    private final byte[] nodeidBuf = new byte[20];
    // revlog record fields
    private long offset;
    @SuppressWarnings("unused")
    private int flags;
    private int compressedLen;
    private int actualLen;
    private int baseRevision;
    private int linkRevision;
    private int parent1Revision;
    private int parent2Revision;
    
    public ReaderN1(boolean dataRequested, Inspector insp, boolean usePatchMerge) {
      assert insp != null;
      needData = dataRequested;
      inspector = insp;
      mergePatches = usePatchMerge;
    }
    
    public void start(int totalWork, CachedRevision cachedRevision) {
      daIndex = getIndexStream(totalWork <= 10);
      if (needData && !inline) {
        daData = getDataStream();
      }
      lifecycleListener = Adaptable.Factory.getAdapter(inspector, Lifecycle.class, null);
      if (lifecycleListener != null) {
        cb = new Lifecycle.BasicCallback();
        lifecycleListener.start(totalWork, cb, cb);
      }
      if (needData && cachedRevision != null) {
        lastUserData = cachedRevision.userData;
        lastRevisionRead = cachedRevision.revision;
        assert lastUserData != null;
      }
    }


    // invoked only once per instance
    public CachedRevision finish() {
      CachedRevision rv = null;
      if (lastUserData != null) {
        if (lastUserData instanceof ByteArrayDataAccess) {
          // it's safe to cache only in-memory revision texts,
          // if lastUserData is merely a filter over file stream,
          // we'd need to keep file open, and this is bad.
          // XXX perhaps, wrap any DataAccess.byteArray into
          // ByteArrayDataAccess?
          rv = new CachedRevision(lastRevisionRead, lastUserData);
        } else {
          lastUserData.done();
        }
        lastUserData = null;
      }
      if (lifecycleListener != null) {
        lifecycleListener.finish(cb);
        lifecycleListener = null;
        cb = null;
        
      }
      daIndex.done();
      if (daData != null) {
        daData.done();
        daData = null;
      }
      return rv;
    }
    
    private void readHeaderRecord(int i) throws IOException {
      if (inline && needData) {
        // inspector reading data (though FilterDataAccess) may have affected index position
        daIndex.seek(getIndexOffsetInt(i));
      }
      long l = daIndex.readLong(); // 0
      offset = i == 0 ? 0 : (l >>> 16);
      flags = (int) (l & 0x0FFFF);
      compressedLen = daIndex.readInt(); // +8
      actualLen = daIndex.readInt(); // +12
      baseRevision = daIndex.readInt(); // +16
      linkRevision = daIndex.readInt(); // +20
      parent1Revision = daIndex.readInt();
      parent2Revision = daIndex.readInt();
      // Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty
      daIndex.readBytes(nodeidBuf, 0, 20); // +32
      daIndex.skip(12);
    }
    
    private boolean isPatch(int i) {
      return baseRevision != i; // the only way I found to tell if it's a patch
    }
    
    private DataAccess getStoredData(int i) throws IOException {
      DataAccess userDataAccess = null;
      DataAccess streamDataAccess;
      long streamOffset;
      if (inline) {
        streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE;
        streamDataAccess = daIndex;
         // don't need to do seek as it's actual position in the index stream, but it's safe to seek, just in case
        daIndex.longSeek(streamOffset);
      } else {
        streamOffset = offset;
        streamDataAccess = daData;
        daData.longSeek(streamOffset);
      }
      if (streamDataAccess.isEmpty() || compressedLen == 0) {
        userDataAccess = new DataAccess(); // empty
      } else {
        final byte firstByte = streamDataAccess.readByte();
        if (firstByte == 0x78 /* 'x' */) {
          inflater.reset();
          userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, isPatch(i) ? -1 : actualLen, inflater, inflaterBuffer, inflaterOutBuffer);
        } else if (firstByte == 0x75 /* 'u' */) {
          userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1);
        } else {
          // XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' but I don't see reason not to return data as is
          //
          // although firstByte is already read from the streamDataAccess, FilterDataAccess#readByte would seek to
          // initial offset before first attempt to read a byte
          userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen);
        }
      }
      return userDataAccess;
    }


    // may be invoked few times per instance life
    public boolean range(int start, int end) throws IOException, HgRuntimeException {
      int i;
      // it (i.e. replace with i >= start)
      if (needData && (i = getBaseRevision(start)) < start) {
        // if lastRevisionRead in [baseRevision(start), start)  can reuse lastUserData
        // doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below). 
        if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) {
          i = lastRevisionRead + 1; // start with first not-yet-read revision
        } else {
          if (lastUserData != null) {
            lastUserData.done();
            lastUserData = null;
          }
        }
      } else {
        // don't need to clean lastUserData as it's always null when !needData
        i = start;
      }
      
      daIndex.seek(getIndexOffsetInt(i));
      //
      // reuse instance, do not normalize it as patches from the stream are unlikely to need it
      final Patch patch = new Patch(false);
      //
      if (needData && mergePatches && start-i > 2) {
        // i+1 == start just reads lastUserData, i+2 == start applies one patch - not worth dedicated effort 
        Patch ultimatePatch = new Patch(true);
        for ( ; i < start; i++) {
          readHeaderRecord(i);
          DataAccess userDataAccess = getStoredData(i);
          if (lastUserData == null) {
            assert !isPatch(i);
            lastUserData = userDataAccess;
          } else {
            assert isPatch(i); // i < start and i == getBaseRevision()
            patch.read(userDataAccess);
            userDataAccess.done();
            // I assume empty patches are applied ok
            ultimatePatch = ultimatePatch.apply(patch);
            patch.clear();
          }
        }
        lastUserData.reset();
        byte[] userData = ultimatePatch.apply(lastUserData, actualLen);
        ultimatePatch.clear();
        lastUserData.done();
        lastUserData = new ByteArrayDataAccess(userData);
      }
      //
      
      for (; i <= end; i++ ) {
        readHeaderRecord(i);
        DataAccess userDataAccess = null;
        if (needData) {
          userDataAccess = getStoredData(i);
          // userDataAccess is revision content, either complete revision, patch of a previous content, or an empty patch
          if (isPatch(i)) {
            // this is a patch
            if (userDataAccess.isEmpty()) {
              // Issue 22, empty patch to an empty base revision
              // Issue 24, empty patch to non-empty base revision
              // empty patch modifies nothing, use content of a previous revision (shall present - it's a patch here)
              //
              assert lastUserData.length() == actualLen; // with no patch, data size shall be the same
              userDataAccess = lastUserData;
            } else {
              patch.read(userDataAccess);
              userDataAccess.done();
              //
              // it shall be reset at the end of prev iteration, when it got assigned from userDataAccess
              // however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset
              // Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess)
              lastUserData.reset();
//              final long startMeasuring = System.currentTimeMillis(); // TIMING
              byte[] userData = patch.apply(lastUserData, actualLen);
//              applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING
              patch.clear(); // do not keep any reference, allow byte[] data to be gc'd
              userDataAccess = new ByteArrayDataAccess(userData);
            }
          }
        } else {
          if (inline) {
            daIndex.skip(compressedLen);
          }
        }
        if (i >= start) {
//          final long startMeasuring = System.currentTimeMillis(); // TIMING
          inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess);
//          inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING
        }
        if (cb != null) {
          if (cb.isStopped()) {
            return false;
          }
        }
        if (userDataAccess != null) {
          userDataAccess.reset(); // not sure this is necessary here, as lastUserData would get reset anyway before next use.
        }
        if (lastUserData != null && lastUserData != userDataAccess /* empty patch case, reuse of recent data in actual revision */) {
          // release lastUserData only if we didn't reuse it in actual revision due to empty patch:
          // empty patch means we have previous revision and didn't alter it with a patch, hence use lastUserData for userDataAccess above
          lastUserData.done();
        }
        lastUserData = userDataAccess;
      }
      lastRevisionRead = end;
      return true;
    }
  }


  
  public interface Inspector {
    /**
     * XXX boolean retVal to indicate whether to continue?
     * 
     * Implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropriate moment
     * 
     * @param revisionIndex absolute index of revision in revlog being iterated
     * @param actualLen length of the user data at this revision
     * @param baseRevision last revision known to hold complete revision (other hold patches). 
     *        if baseRevision != revisionIndex, data for this revision is a result of a sequence of patches
     * @param linkRevision index of corresponding changeset revision
     * @param parent1Revision index of first parent revision in this revlog, or {@link HgRepository#NO_REVISION}
     * @param parent2Revision index of second parent revision in this revlog, or {@link HgRepository#NO_REVISION}
     * @param nodeid 20-byte buffer, shared between invocations 
     * @param data access to revision content of actualLen size, or <code>null</code> if no data has been requested with 
     *        {@link RevlogStream#iterate(int[], boolean, Inspector)}
     */
    void next(int revisionIndex, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data) throws HgRuntimeException;
  }


  public interface Observer {
    // notify observer of invalidate/reload event in the stream
    public void reloaded(RevlogStream src);
  }
}
Source Code of org.tmatesoft.hg.internal.RevlogStream$Observer

Related Classes of org.tmatesoft.hg.internal.RevlogStream$Observer