/*
* Copyright (c) 2010-2013 TMate Software Ltd
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* For information on how to redistribute this software under
* the terms of a license other than GNU General Public License
* contact TMate Software at support@hg4j.com
*/
package org.tmatesoft.hg.internal;
import static org.tmatesoft.hg.repo.HgRepository.BAD_REVISION;
import static org.tmatesoft.hg.repo.HgRepository.NO_REVISION;
import static org.tmatesoft.hg.repo.HgRepository.TIP;
import static org.tmatesoft.hg.internal.Internals.REVLOGV1_RECORD_SIZE;
import java.io.File;
import java.io.IOException;
import java.lang.ref.Reference;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.SoftReference;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.Inflater;
import org.tmatesoft.hg.core.HgIOException;
import org.tmatesoft.hg.core.Nodeid;
import org.tmatesoft.hg.repo.HgInternals;
import org.tmatesoft.hg.repo.HgInvalidControlFileException;
import org.tmatesoft.hg.repo.HgInvalidRevisionException;
import org.tmatesoft.hg.repo.HgInvalidStateException;
import org.tmatesoft.hg.repo.HgRepository;
import org.tmatesoft.hg.repo.HgRuntimeException;
import org.tmatesoft.hg.util.Adaptable;
/**
* ? Single RevlogStream per file per repository with accessor to record access session (e.g. with back/forward operations),
* or numerous RevlogStream with separate representation of the underlying data (cached, lazy ChunkStream)?
*
* @see http://mercurial.selenic.com/wiki/Revlog
* @see http://mercurial.selenic.com/wiki/RevlogNG
*
* @author Artem Tikhomirov
* @author TMate Software Ltd.
*/
public class RevlogStream {
static final int INLINEDATA = 1 << 16;
/*
* makes sense for index with inline data only - actual offset of the record in the .i file (record entry + revision * record size))
*
* long[] in fact (there are 8-bytes field in the revlog)
* However, (a) DataAccess currently doesn't operate with long seek/length
* and, of greater significance, (b) files with inlined data are designated for smaller files,
* guess, about 130 Kb, and offset there won't ever break int capacity
*/
private int[] indexRecordOffset;
private int[] baseRevisions;
private boolean inline = false;
private final File indexFile;
private File dataFile;
private final Internals repo;
// keeps last complete revision we've read. Note, this cached revision doesn't help
// for subsequent #iterate() calls with the same revision (Inspector needs more data than
// we currently cache here, perhaps, we shall cache everything it wants to cover same
// revision case as well). Now this helps when second #iterate() call is for a revision greater
// than one from the first call, and both revisions got same base rev. It's often the case when
// parents/children are analyzed.
private SoftReference<CachedRevision> lastRevisionRead;
private final ReferenceQueue<CachedRevision> lastRevisionQueue = new ReferenceQueue<CachedRevision>();
//
private final RevlogChangeMonitor changeTracker;
private List<Observer> observers;
private boolean shallDropDerivedCaches = false;
public RevlogStream(Internals hgRepo, File indexFile) {
repo = hgRepo;
this.indexFile = indexFile;
changeTracker = repo.getRevlogTracker(indexFile);
}
public boolean exists() {
return indexFile.exists();
}
/**
* @param shortRead pass <code>true</code> to indicate intention to read few revisions only (as opposed to reading most of/complete revlog)
* @return never <code>null</code>, empty {@link DataAccess} if no stream is available
*/
/*package*/ DataAccess getIndexStream(boolean shortRead) {
// shortRead hint helps to avoid mmap files when only
// few bytes are to be read (i.e. #dataLength())
DataAccessProvider dataAccess = repo.getDataAccess();
return dataAccess.createReader(indexFile, shortRead);
}
/*package*/ DataAccess getDataStream() {
DataAccessProvider dataAccess = repo.getDataAccess();
return dataAccess.createReader(getDataFile(), false);
}
/*package*/ DataSerializer getIndexStreamWriter(Transaction tr) throws HgIOException {
DataAccessProvider dataAccess = repo.getDataAccess();
return dataAccess.createWriter(tr, indexFile, true);
}
/*package*/ DataSerializer getDataStreamWriter(Transaction tr) throws HgIOException {
DataAccessProvider dataAccess = repo.getDataAccess();
return dataAccess.createWriter(tr, getDataFile(), true);
}
/**
* Constructs file object that corresponds to .d revlog counterpart.
* Note, it's caller responsibility to ensure this file makes any sense (i.e. check {@link #inline} attribute)
*/
private File getDataFile() {
if (dataFile == null) {
final String indexName = indexFile.getName();
dataFile = new File(indexFile.getParentFile(), indexName.substring(0, indexName.length() - 1) + "d");
}
return dataFile;
}
// initialize exception with the file where revlog structure information comes from
public HgInvalidControlFileException initWithIndexFile(HgInvalidControlFileException ex) {
return ex.setFile(indexFile);
}
public HgIOException initWithIndexFile(HgIOException ex) {
return ex.setFile(indexFile);
}
// initialize exception with the file where revlog data comes from
public HgInvalidControlFileException initWithDataFile(HgInvalidControlFileException ex) {
// exceptions are usually raised after read attepmt, hence inline shall be initialized
// although honest approach is to call #initOutline() first
return ex.setFile(inline ? indexFile : getDataFile());
}
/*package-private*/String getDataFileName() {
// XXX a temporary solution to provide more info to fill in exceptions other than
// HgInvalidControlFileException (those benefit from initWith* methods above)
//
// Besides, since RevlogStream represents both revlogs with user data (those with WC representative and
// system data under store/data) and system-only revlogs (like changelog and manifest), there's no
// easy way to supply human-friendly name of the active file (independent from whether it's index of data)
return inline ? indexFile.getPath() : getDataFile().getPath();
}
public boolean isInlineData() throws HgInvalidControlFileException {
initOutline();
return inline;
}
public int revisionCount() throws HgInvalidControlFileException {
initOutline();
return baseRevisions.length;
}
/**
* @throws HgInvalidControlFileException if attempt to read index file failed
* @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog
*/
public int dataLength(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException {
// XXX in fact, use of iterate() instead of this implementation may be quite reasonable.
//
revisionIndex = checkRevisionIndex(revisionIndex);
DataAccess daIndex = getIndexStream(true);
try {
int recordOffset = getIndexOffsetInt(revisionIndex);
daIndex.seek(recordOffset + 12); // 6+2+4
int actualLen = daIndex.readInt();
return actualLen;
} catch (IOException ex) {
throw new HgInvalidControlFileException(null, ex, indexFile).setRevisionIndex(revisionIndex);
} finally {
daIndex.done();
}
}
/**
* Read nodeid at given index
*
* @throws HgInvalidControlFileException if attempt to read index file failed
* @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog
*/
public byte[] nodeid(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException {
revisionIndex = checkRevisionIndex(revisionIndex);
DataAccess daIndex = getIndexStream(true);
try {
int recordOffset = getIndexOffsetInt(revisionIndex);
daIndex.seek(recordOffset + 32);
byte[] rv = new byte[20];
daIndex.readBytes(rv, 0, 20);
return rv;
} catch (IOException ex) {
throw new HgInvalidControlFileException("Revision lookup failed", ex, indexFile).setRevisionIndex(revisionIndex);
} finally {
daIndex.done();
}
}
/**
* Get link field from the index record.
*
* @throws HgInvalidControlFileException if attempt to read index file failed
* @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog
*/
public int linkRevision(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException {
revisionIndex = checkRevisionIndex(revisionIndex);
DataAccess daIndex = getIndexStream(true);
try {
int recordOffset = getIndexOffsetInt(revisionIndex);
daIndex.seek(recordOffset + 20);
int linkRev = daIndex.readInt();
return linkRev;
} catch (IOException ex) {
throw new HgInvalidControlFileException("Linked revision lookup failed", ex, indexFile).setRevisionIndex(revisionIndex);
} finally {
daIndex.done();
}
}
/**
* Extract base revision field from the revlog
*
* @throws HgInvalidControlFileException if attempt to read index file failed
* @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog
*/
public int baseRevision(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException {
revisionIndex = checkRevisionIndex(revisionIndex);
return getBaseRevision(revisionIndex);
}
/**
* Read indexes of parent revisions
* @param revisionIndex index of child revision
* @param parents array to hold return value, length >= 2
* @return value of <code>parents</code> parameter for convenience
* @throws HgInvalidControlFileException if attempt to read index file failed
* @throws HgInvalidRevisionException if revisionIndex argument doesn't represent a valid record in the revlog
*/
public int[] parents(int revisionIndex, int[] parents) throws HgInvalidControlFileException, HgInvalidRevisionException {
assert parents.length > 1;
revisionIndex = checkRevisionIndex(revisionIndex);
DataAccess daIndex = getIndexStream(true);
try {
int recordOffset = getIndexOffsetInt(revisionIndex);
daIndex.seek(recordOffset + 24);
int p1 = daIndex.readInt();
int p2 = daIndex.readInt();
// although NO_REVISION == -1, it doesn't hurt to ensure this
parents[0] = p1 == -1 ? NO_REVISION : p1;
parents[1] = p2 == -1 ? NO_REVISION : p2;
return parents;
} catch (IOException ex) {
throw new HgInvalidControlFileException("Parents lookup failed", ex, indexFile).setRevisionIndex(revisionIndex);
} finally {
daIndex.done();
}
}
// Perhaps, RevlogStream should be limited to use of plain int revisions for access,
// while Nodeids should be kept on the level up, in Revlog. Guess, Revlog better keep
// map of nodeids, and once this comes true, we may get rid of this method.
// Unlike its counterpart, {@link Revlog#getLocalRevisionNumber()}, doesn't fail with exception if node not found,
/**
* @return integer in [0..revisionCount()) or {@link HgRepository#BAD_REVISION} if not found
* @throws HgInvalidControlFileException if attempt to read index file failed
*/
public int findRevisionIndex(Nodeid nodeid) throws HgInvalidControlFileException {
// XXX this one may be implemented with iterate() once there's mechanism to stop iterations
final int indexSize = revisionCount();
DataAccess daIndex = getIndexStream(false);
try {
byte[] nodeidBuf = new byte[20];
for (int i = 0; i < indexSize; i++) {
daIndex.skip(8);
int compressedLen = daIndex.readInt();
daIndex.skip(20);
daIndex.readBytes(nodeidBuf, 0, 20);
if (nodeid.equalsTo(nodeidBuf)) {
return i;
}
daIndex.skip(inline ? 12 + compressedLen : 12);
}
} catch (IOException ex) {
throw new HgInvalidControlFileException("Revision lookup failed", ex, indexFile).setRevision(nodeid);
} finally {
daIndex.done();
}
return BAD_REVISION;
}
/**
* @return value suitable for the corresponding field in the new revision's header, not physical offset in the file
* (which is different in case of inline revlogs)
*/
public long newEntryOffset() throws HgInvalidControlFileException {
if (revisionCount() == 0) {
return 0;
}
DataAccess daIndex = getIndexStream(true);
int lastRev = revisionCount() - 1;
try {
int recordOffset = getIndexOffsetInt(lastRev);
daIndex.seek(recordOffset);
long value = daIndex.readLong();
value = value >>> 16;
int compressedLen = daIndex.readInt();
return lastRev == 0 ? compressedLen : value + compressedLen;
} catch (IOException ex) {
throw new HgInvalidControlFileException("Linked revision lookup failed", ex, indexFile).setRevisionIndex(lastRev);
} finally {
daIndex.done();
}
}
/**
* should be possible to use TIP, ALL, or -1, -2, -n notation of Hg
* ? boolean needsNodeid
* @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em>
*/
public void iterate(int start, int end, boolean needData, Inspector inspector) throws HgRuntimeException {
initOutline();
final int indexSize = revisionCount();
if (indexSize == 0) {
return;
}
if (end == TIP) {
end = indexSize - 1;
}
if (start == TIP) {
start = indexSize - 1;
}
HgInternals.checkRevlogRange(start, end, indexSize-1);
// XXX may cache [start .. end] from index with a single read (pre-read)
ReaderN1 r = new ReaderN1(needData, inspector, repo.shallMergePatches());
try {
r.start(end - start + 1, getLastRevisionRead());
r.range(start, end);
} catch (IOException ex) {
throw new HgInvalidControlFileException(String.format("Failed reading [%d..%d]", start, end), ex, indexFile);
} finally {
CachedRevision cr = r.finish();
setLastRevisionRead(cr);
}
}
/**
* Effective alternative to {@link #iterate(int, int, boolean, Inspector) batch read}, when only few selected
* revisions are of interest.
* @param sortedRevisions revisions to walk, in ascending order.
* @param needData whether inspector needs access to header only
* @param inspector callback to process entries
* @throws HgRuntimeException subclass thereof to indicate issues with the library. <em>Runtime exception</em>
*/
public void iterate(int[] sortedRevisions, boolean needData, Inspector inspector) throws HgRuntimeException {
final int indexSize = revisionCount();
if (indexSize == 0 || sortedRevisions.length == 0) {
return;
}
if (sortedRevisions[0] > indexSize) {
throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[0]);
}
if (sortedRevisions[sortedRevisions.length - 1] > indexSize) {
throw new HgInvalidRevisionException(String.format("Can't iterate [%d, %d] in range [0..%d]", sortedRevisions[0], sortedRevisions[sortedRevisions.length - 1], indexSize), null, sortedRevisions[sortedRevisions.length - 1]);
}
ReaderN1 r = new ReaderN1(needData, inspector, repo.shallMergePatches());
try {
r.start(sortedRevisions.length, getLastRevisionRead());
for (int i = 0; i < sortedRevisions.length; ) {
int x = i;
i++;
while (i < sortedRevisions.length) {
if (sortedRevisions[i] == sortedRevisions[i-1] + 1) {
i++;
} else {
break;
}
}
// commitRevisions[x..i-1] are sequential
if (!r.range(sortedRevisions[x], sortedRevisions[i-1])) {
return;
}
}
} catch (IOException ex) {
final int c = sortedRevisions.length;
throw new HgInvalidControlFileException(String.format("Failed reading %d revisions in [%d; %d]", c, sortedRevisions[0], sortedRevisions[c-1]), ex, indexFile);
} finally {
CachedRevision cr = r.finish();
setLastRevisionRead(cr);
}
}
public void attach(Observer listener) {
assert listener != null;
if (observers == null) {
observers = new ArrayList<Observer>(3);
}
observers.add(listener);
}
public void detach(Observer listener) {
assert listener != null;
if (observers != null) {
observers.remove(listener);
}
}
/*
* Note, this method IS NOT a replacement for Observer. It has to be invoked when the validity of any
* cache built using revision information is in doubt, but it provides reasonable value only till the
* first initOutline() to be invoked, i.e. in [change..revlog read operation] time frame. If your code
* accesses cached information without any prior explicit read operation, you shall consult this method
* if next read operation would in fact bring changed content.
* Observer is needed in addition to this method because any revlog read operation (e.g. Revlog#getLastRevision)
* would clear shallDropDerivedCaches(), and if code relies only on this method to clear its derived caches,
* it would miss the update.
*/
public boolean shallDropDerivedCaches() {
if (shallDropDerivedCaches) {
return shallDropDerivedCaches;
}
return shallDropDerivedCaches = changeTracker.hasChanged(indexFile);
}
void revisionAdded(int revisionIndex, Nodeid revision, int baseRevisionIndex, long revisionOffset) throws HgInvalidControlFileException {
shallDropDerivedCaches = true;
if (!outlineCached()) {
return;
}
if (baseRevisions.length != revisionIndex) {
throw new HgInvalidControlFileException(String.format("New entry's index shall be %d, not %d", baseRevisions.length, revisionIndex), null, indexFile);
}
if (baseRevisionIndex < 0 || baseRevisionIndex > baseRevisions.length) {
// baseRevisionIndex MAY be == to baseRevisions.length, it's when new revision is based on itself
throw new HgInvalidControlFileException(String.format("Base revision index %d doesn't fit [0..%d] range", baseRevisionIndex, baseRevisions.length), null, indexFile);
}
assert revision != null;
assert !revision.isNull();
// next effort doesn't seem to be of any value at least in case of regular commit
// as the next call to #initOutline would recognize the file change and reload complete revlog anyway
// OTOH, there might be transaction strategy that doesn't update the file until its completion,
// while it's handy to know new revisions meanwhile.
int[] baseRevisionsCopy = new int[baseRevisions.length + 1];
System.arraycopy(baseRevisions, 0, baseRevisionsCopy, 0, baseRevisions.length);
baseRevisionsCopy[baseRevisions.length] = baseRevisionIndex;
baseRevisions = baseRevisionsCopy;
if (inline && indexRecordOffset != null) {
assert indexRecordOffset.length == revisionIndex;
int[] indexRecordOffsetCopy = new int[indexRecordOffset.length + 1];
System.arraycopy(indexRecordOffset, 0, indexRecordOffsetCopy, 0, indexRecordOffset.length);
indexRecordOffsetCopy[indexRecordOffset.length] = offsetFieldToInlineFileOffset(revisionOffset, revisionIndex);
indexRecordOffset = indexRecordOffsetCopy;
}
}
private int getBaseRevision(int revision) {
return baseRevisions[revision];
}
/**
* @param revisionIndex shall be valid index, [0..baseRevisions.length-1].
* It's advised to use {@link #checkRevisionIndex(int)} to ensure argument is correct.
* @return offset of the revision's record in the index (.i) stream
*/
private int getIndexOffsetInt(int revisionIndex) {
return inline ? indexRecordOffset[revisionIndex] : revisionIndex * REVLOGV1_RECORD_SIZE;
}
private int checkRevisionIndex(int revisionIndex) throws HgInvalidControlFileException, HgInvalidRevisionException {
final int last = revisionCount() - 1;
if (revisionIndex == TIP) {
revisionIndex = last;
}
if (revisionIndex < 0 || revisionIndex > last) {
throw new HgInvalidRevisionException(revisionIndex).setRevisionIndex(revisionIndex, 0, last);
}
return revisionIndex;
}
private boolean outlineCached() {
return baseRevisions != null && baseRevisions.length > 0;
}
// translate 6-byte offset field value to physical file offset for inline revlogs
// DOESN'T MAKE SENSE if revlog with data is separate
private static int offsetFieldToInlineFileOffset(long offset, int recordIndex) throws HgInvalidStateException {
int o = Internals.ltoi(offset);
if (o != offset) {
// just in case, can't happen, ever, unless HG (or some other bad tool) produces index file
// with inlined data of size greater than 2 Gb.
throw new HgInvalidStateException("Data too big, offset didn't fit to sizeof(int)");
}
return o + REVLOGV1_RECORD_SIZE * recordIndex;
}
// every access to index revlog goes after this method only.
private void initOutline() throws HgInvalidControlFileException {
// true to send out 'drop-your-caches' event after outline has been built
final boolean notifyReload;
if (outlineCached()) {
if (!changeTracker.hasChanged(indexFile)) {
return;
}
notifyReload = true;
} else {
// no cached outline - inital read, do not send any reload/invalidate notifications
notifyReload = false;
}
changeTracker.touch(indexFile);
DataAccess da = getIndexStream(false);
try {
if (da.isEmpty()) {
// do not fail with exception if stream is empty, it's likely intentional
baseRevisions = new int[0];
// empty revlog, likely to be populated, indicate we start with a single file
inline = true;
return;
}
int versionField = da.readInt();
da.readInt(); // just to skip next 4 bytes of offset + flags
inline = (versionField & INLINEDATA) != 0;
IntVector resBases, resOffsets = null;
int entryCountGuess = Internals.ltoi(da.longLength() / REVLOGV1_RECORD_SIZE);
if (inline) {
entryCountGuess >>>= 2; // pure guess, assume useful data takes 3/4 of total space
resOffsets = new IntVector(entryCountGuess, 5000);
}
resBases = new IntVector(entryCountGuess, 5000);
long offset = 0; // first offset is always 0, thus Hg uses it for other purposes
while(true) {
int compressedLen = da.readInt();
// 8+4 = 12 bytes total read here
@SuppressWarnings("unused")
int actualLen = da.readInt();
int baseRevision = da.readInt();
// 12 + 8 = 20 bytes read here
// int linkRevision = di.readInt();
// int parent1Revision = di.readInt();
// int parent2Revision = di.readInt();
// byte[] nodeid = new byte[32];
resBases.add(baseRevision);
if (inline) {
int o = offsetFieldToInlineFileOffset(offset, resOffsets.size());
resOffsets.add(o);
da.skip(3*4 + 32 + compressedLen); // Check: 44 (skip) + 20 (read) = 64 (total RevlogNG record size)
} else {
da.skip(3*4 + 32);
}
if (da.isEmpty()) {
// fine, done then
baseRevisions = resBases.toArray(true);
if (inline) {
indexRecordOffset = resOffsets.toArray(true);
}
break;
} else {
// start reading next record
long l = da.readLong();
offset = l >>> 16;
}
}
} catch (IOException ex) {
throw new HgInvalidControlFileException("Failed to analyze revlog index", ex, indexFile);
} finally {
da.done();
if (notifyReload && observers != null) {
for (Observer l : observers) {
l.reloaded(this);
}
shallDropDerivedCaches = false;
}
}
}
private CachedRevision getLastRevisionRead() {
return lastRevisionRead == null ? null : lastRevisionRead.get();
}
private void setLastRevisionRead(CachedRevision cr) {
// done() for lastRevisionRead.userData has been called by ReaderN1 once
// it noticed unsuitable DataAccess.
// Now, done() for any CachedRevision cleared by GC:
for (Reference<? extends CachedRevision> r; (r = lastRevisionQueue.poll()) != null;) {
CachedRevision toClean = r.get();
if (toClean != null && toClean.userData != null) {
toClean.userData.done();
}
}
if (cr != null) {
lastRevisionRead = new SoftReference<CachedRevision>(cr, lastRevisionQueue);
} else {
lastRevisionRead = null;
}
}
final static class CachedRevision {
final int revision;
final DataAccess userData;
public CachedRevision(int lastRevisionRead, DataAccess lastUserData) {
revision = lastRevisionRead;
userData = lastUserData;
}
}
/**
* operation with single file open/close and multiple diverse reads.
* XXX initOutline might need similar extraction to keep N1 format knowledge
*/
final class ReaderN1 {
private final Inspector inspector;
private final boolean needData;
private final boolean mergePatches;
private DataAccess daIndex = null, daData = null;
private Lifecycle.BasicCallback cb = null;
private Lifecycle lifecycleListener = null;
private int lastRevisionRead = BAD_REVISION;
private DataAccess lastUserData;
//
// next are transient values, for range() use only
private final Inflater inflater = new Inflater();
// can share buffer between instances of InflaterDataAccess as I never read any two of them in parallel
private final byte[] inflaterBuffer = new byte[10 * 1024]; // TODO [post-1.1] consider using DAP.DEFAULT_FILE_BUFFER
private final ByteBuffer inflaterOutBuffer = ByteBuffer.allocate(inflaterBuffer.length * 2);
private final byte[] nodeidBuf = new byte[20];
// revlog record fields
private long offset;
@SuppressWarnings("unused")
private int flags;
private int compressedLen;
private int actualLen;
private int baseRevision;
private int linkRevision;
private int parent1Revision;
private int parent2Revision;
public ReaderN1(boolean dataRequested, Inspector insp, boolean usePatchMerge) {
assert insp != null;
needData = dataRequested;
inspector = insp;
mergePatches = usePatchMerge;
}
public void start(int totalWork, CachedRevision cachedRevision) {
daIndex = getIndexStream(totalWork <= 10);
if (needData && !inline) {
daData = getDataStream();
}
lifecycleListener = Adaptable.Factory.getAdapter(inspector, Lifecycle.class, null);
if (lifecycleListener != null) {
cb = new Lifecycle.BasicCallback();
lifecycleListener.start(totalWork, cb, cb);
}
if (needData && cachedRevision != null) {
lastUserData = cachedRevision.userData;
lastRevisionRead = cachedRevision.revision;
assert lastUserData != null;
}
}
// invoked only once per instance
public CachedRevision finish() {
CachedRevision rv = null;
if (lastUserData != null) {
if (lastUserData instanceof ByteArrayDataAccess) {
// it's safe to cache only in-memory revision texts,
// if lastUserData is merely a filter over file stream,
// we'd need to keep file open, and this is bad.
// XXX perhaps, wrap any DataAccess.byteArray into
// ByteArrayDataAccess?
rv = new CachedRevision(lastRevisionRead, lastUserData);
} else {
lastUserData.done();
}
lastUserData = null;
}
if (lifecycleListener != null) {
lifecycleListener.finish(cb);
lifecycleListener = null;
cb = null;
}
daIndex.done();
if (daData != null) {
daData.done();
daData = null;
}
return rv;
}
private void readHeaderRecord(int i) throws IOException {
if (inline && needData) {
// inspector reading data (though FilterDataAccess) may have affected index position
daIndex.seek(getIndexOffsetInt(i));
}
long l = daIndex.readLong(); // 0
offset = i == 0 ? 0 : (l >>> 16);
flags = (int) (l & 0x0FFFF);
compressedLen = daIndex.readInt(); // +8
actualLen = daIndex.readInt(); // +12
baseRevision = daIndex.readInt(); // +16
linkRevision = daIndex.readInt(); // +20
parent1Revision = daIndex.readInt();
parent2Revision = daIndex.readInt();
// Hg has 32 bytes here, uses 20 for nodeid, and keeps 12 last bytes empty
daIndex.readBytes(nodeidBuf, 0, 20); // +32
daIndex.skip(12);
}
private boolean isPatch(int i) {
return baseRevision != i; // the only way I found to tell if it's a patch
}
private DataAccess getStoredData(int i) throws IOException {
DataAccess userDataAccess = null;
DataAccess streamDataAccess;
long streamOffset;
if (inline) {
streamOffset = getIndexOffsetInt(i) + REVLOGV1_RECORD_SIZE;
streamDataAccess = daIndex;
// don't need to do seek as it's actual position in the index stream, but it's safe to seek, just in case
daIndex.longSeek(streamOffset);
} else {
streamOffset = offset;
streamDataAccess = daData;
daData.longSeek(streamOffset);
}
if (streamDataAccess.isEmpty() || compressedLen == 0) {
userDataAccess = new DataAccess(); // empty
} else {
final byte firstByte = streamDataAccess.readByte();
if (firstByte == 0x78 /* 'x' */) {
inflater.reset();
userDataAccess = new InflaterDataAccess(streamDataAccess, streamOffset, compressedLen, isPatch(i) ? -1 : actualLen, inflater, inflaterBuffer, inflaterOutBuffer);
} else if (firstByte == 0x75 /* 'u' */) {
userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset+1, compressedLen-1);
} else {
// XXX Python impl in fact throws exception when there's not 'x', 'u' or '0' but I don't see reason not to return data as is
//
// although firstByte is already read from the streamDataAccess, FilterDataAccess#readByte would seek to
// initial offset before first attempt to read a byte
userDataAccess = new FilterDataAccess(streamDataAccess, streamOffset, compressedLen);
}
}
return userDataAccess;
}
// may be invoked few times per instance life
public boolean range(int start, int end) throws IOException, HgRuntimeException {
int i;
// it (i.e. replace with i >= start)
if (needData && (i = getBaseRevision(start)) < start) {
// if lastRevisionRead in [baseRevision(start), start) can reuse lastUserData
// doesn't make sense to reuse if lastRevisionRead == start (too much to change in the cycle below).
if (lastRevisionRead != BAD_REVISION && i <= lastRevisionRead && lastRevisionRead < start) {
i = lastRevisionRead + 1; // start with first not-yet-read revision
} else {
if (lastUserData != null) {
lastUserData.done();
lastUserData = null;
}
}
} else {
// don't need to clean lastUserData as it's always null when !needData
i = start;
}
daIndex.seek(getIndexOffsetInt(i));
//
// reuse instance, do not normalize it as patches from the stream are unlikely to need it
final Patch patch = new Patch(false);
//
if (needData && mergePatches && start-i > 2) {
// i+1 == start just reads lastUserData, i+2 == start applies one patch - not worth dedicated effort
Patch ultimatePatch = new Patch(true);
for ( ; i < start; i++) {
readHeaderRecord(i);
DataAccess userDataAccess = getStoredData(i);
if (lastUserData == null) {
assert !isPatch(i);
lastUserData = userDataAccess;
} else {
assert isPatch(i); // i < start and i == getBaseRevision()
patch.read(userDataAccess);
userDataAccess.done();
// I assume empty patches are applied ok
ultimatePatch = ultimatePatch.apply(patch);
patch.clear();
}
}
lastUserData.reset();
byte[] userData = ultimatePatch.apply(lastUserData, actualLen);
ultimatePatch.clear();
lastUserData.done();
lastUserData = new ByteArrayDataAccess(userData);
}
//
for (; i <= end; i++ ) {
readHeaderRecord(i);
DataAccess userDataAccess = null;
if (needData) {
userDataAccess = getStoredData(i);
// userDataAccess is revision content, either complete revision, patch of a previous content, or an empty patch
if (isPatch(i)) {
// this is a patch
if (userDataAccess.isEmpty()) {
// Issue 22, empty patch to an empty base revision
// Issue 24, empty patch to non-empty base revision
// empty patch modifies nothing, use content of a previous revision (shall present - it's a patch here)
//
assert lastUserData.length() == actualLen; // with no patch, data size shall be the same
userDataAccess = lastUserData;
} else {
patch.read(userDataAccess);
userDataAccess.done();
//
// it shall be reset at the end of prev iteration, when it got assigned from userDataAccess
// however, actual userDataAccess and lastUserData may share Inflater object, which needs to be reset
// Alternatively, userDataAccess.done() above may be responsible to reset Inflater (if it's InflaterDataAccess)
lastUserData.reset();
// final long startMeasuring = System.currentTimeMillis(); // TIMING
byte[] userData = patch.apply(lastUserData, actualLen);
// applyTime += (System.currentTimeMillis() - startMeasuring); // TIMING
patch.clear(); // do not keep any reference, allow byte[] data to be gc'd
userDataAccess = new ByteArrayDataAccess(userData);
}
}
} else {
if (inline) {
daIndex.skip(compressedLen);
}
}
if (i >= start) {
// final long startMeasuring = System.currentTimeMillis(); // TIMING
inspector.next(i, actualLen, baseRevision, linkRevision, parent1Revision, parent2Revision, nodeidBuf, userDataAccess);
// inspectorTime += (System.currentTimeMillis() - startMeasuring); // TIMING
}
if (cb != null) {
if (cb.isStopped()) {
return false;
}
}
if (userDataAccess != null) {
userDataAccess.reset(); // not sure this is necessary here, as lastUserData would get reset anyway before next use.
}
if (lastUserData != null && lastUserData != userDataAccess /* empty patch case, reuse of recent data in actual revision */) {
// release lastUserData only if we didn't reuse it in actual revision due to empty patch:
// empty patch means we have previous revision and didn't alter it with a patch, hence use lastUserData for userDataAccess above
lastUserData.done();
}
lastUserData = userDataAccess;
}
lastRevisionRead = end;
return true;
}
}
public interface Inspector {
/**
* XXX boolean retVal to indicate whether to continue?
*
* Implementers shall not invoke DataAccess.done(), it's accomplished by #iterate at appropriate moment
*
* @param revisionIndex absolute index of revision in revlog being iterated
* @param actualLen length of the user data at this revision
* @param baseRevision last revision known to hold complete revision (other hold patches).
* if baseRevision != revisionIndex, data for this revision is a result of a sequence of patches
* @param linkRevision index of corresponding changeset revision
* @param parent1Revision index of first parent revision in this revlog, or {@link HgRepository#NO_REVISION}
* @param parent2Revision index of second parent revision in this revlog, or {@link HgRepository#NO_REVISION}
* @param nodeid 20-byte buffer, shared between invocations
* @param data access to revision content of actualLen size, or <code>null</code> if no data has been requested with
* {@link RevlogStream#iterate(int[], boolean, Inspector)}
*/
void next(int revisionIndex, int actualLen, int baseRevision, int linkRevision, int parent1Revision, int parent2Revision, byte[/*20*/] nodeid, DataAccess data) throws HgRuntimeException;
}
public interface Observer {
// notify observer of invalidate/reload event in the stream
public void reloaded(RevlogStream src);
}
}