/*
* Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.revwalk;
import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.util.RawParseUtils;
/**
* Specialized subclass of RevWalk to include trees, blobs and tags.
* <p>
* Unlike RevWalk this subclass is able to remember starting roots that include
* annotated tags, or arbitrary trees or blobs. Once commit generation is
* complete and all commits have been popped by the application, individual
* annotated tag, tree and blob objects can be popped through the additional
* method {@link #nextObject()}.
* <p>
* Tree and blob objects reachable from interesting commits are automatically
* scheduled for inclusion in the results of {@link #nextObject()}, returning
* each object exactly once. Objects are sorted and returned according to the
* the commits that reference them and the order they appear within a tree.
* Ordering can be affected by changing the {@link RevSort} used to order the
* commits that are returned first.
*/
public class ObjectWalk extends RevWalk {
private static final int ID_SZ = 20;
private static final int TYPE_SHIFT = 12;
private static final int TYPE_TREE = 0040000 >>> TYPE_SHIFT;
private static final int TYPE_SYMLINK = 0120000 >>> TYPE_SHIFT;
private static final int TYPE_FILE = 0100000 >>> TYPE_SHIFT;
private static final int TYPE_GITLINK = 0160000 >>> TYPE_SHIFT;
/**
* Indicates a non-RevCommit is in {@link #pendingObjects}.
* <p>
* We can safely reuse {@link RevWalk#REWRITE} here for the same value as it
* is only set on RevCommit and {@link #pendingObjects} never has RevCommit
* instances inserted into it.
*/
private static final int IN_PENDING = RevWalk.REWRITE;
private List<RevObject> rootObjects;
private BlockObjQueue pendingObjects;
private RevCommit firstCommit;
private RevCommit lastCommit;
private TreeVisit freeVisit;
private TreeVisit currVisit;
private byte[] pathBuf;
private int pathLen;
private boolean boundary;
/**
* Create a new revision and object walker for a given repository.
*
* @param repo
* the repository the walker will obtain data from.
*/
public ObjectWalk(final Repository repo) {
this(repo.newObjectReader());
}
/**
* Create a new revision and object walker for a given repository.
*
* @param or
* the reader the walker will obtain data from. The reader should
* be released by the caller when the walker is no longer
* required.
*/
public ObjectWalk(ObjectReader or) {
super(or);
rootObjects = new ArrayList<RevObject>();
pendingObjects = new BlockObjQueue();
pathBuf = new byte[256];
}
/**
* Mark an object or commit to start graph traversal from.
* <p>
* Callers are encouraged to use {@link RevWalk#parseAny(AnyObjectId)}
* instead of {@link RevWalk#lookupAny(AnyObjectId, int)}, as this method
* requires the object to be parsed before it can be added as a root for the
* traversal.
* <p>
* The method will automatically parse an unparsed object, but error
* handling may be more difficult for the application to explain why a
* RevObject is not actually valid. The object pool of this walker would
* also be 'poisoned' by the invalid RevObject.
* <p>
* This method will automatically call {@link RevWalk#markStart(RevCommit)}
* if passed RevCommit instance, or a RevTag that directly (or indirectly)
* references a RevCommit.
*
* @param o
* the object to start traversing from. The object passed must be
* from this same revision walker.
* @throws MissingObjectException
* the object supplied is not available from the object
* database. This usually indicates the supplied object is
* invalid, but the reference was constructed during an earlier
* invocation to {@link RevWalk#lookupAny(AnyObjectId, int)}.
* @throws IncorrectObjectTypeException
* the object was not parsed yet and it was discovered during
* parsing that it is not actually the type of the instance
* passed in. This usually indicates the caller used the wrong
* type in a {@link RevWalk#lookupAny(AnyObjectId, int)} call.
* @throws IOException
* a pack file or loose object could not be read.
*/
public void markStart(RevObject o) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
while (o instanceof RevTag) {
addObject(o);
o = ((RevTag) o).getObject();
parseHeaders(o);
}
if (o instanceof RevCommit)
super.markStart((RevCommit) o);
else
addObject(o);
}
/**
* Mark an object to not produce in the output.
* <p>
* Uninteresting objects denote not just themselves but also their entire
* reachable chain, back until the merge base of an uninteresting commit and
* an otherwise interesting commit.
* <p>
* Callers are encouraged to use {@link RevWalk#parseAny(AnyObjectId)}
* instead of {@link RevWalk#lookupAny(AnyObjectId, int)}, as this method
* requires the object to be parsed before it can be added as a root for the
* traversal.
* <p>
* The method will automatically parse an unparsed object, but error
* handling may be more difficult for the application to explain why a
* RevObject is not actually valid. The object pool of this walker would
* also be 'poisoned' by the invalid RevObject.
* <p>
* This method will automatically call {@link RevWalk#markStart(RevCommit)}
* if passed RevCommit instance, or a RevTag that directly (or indirectly)
* references a RevCommit.
*
* @param o
* the object to start traversing from. The object passed must be
* @throws MissingObjectException
* the object supplied is not available from the object
* database. This usually indicates the supplied object is
* invalid, but the reference was constructed during an earlier
* invocation to {@link RevWalk#lookupAny(AnyObjectId, int)}.
* @throws IncorrectObjectTypeException
* the object was not parsed yet and it was discovered during
* parsing that it is not actually the type of the instance
* passed in. This usually indicates the caller used the wrong
* type in a {@link RevWalk#lookupAny(AnyObjectId, int)} call.
* @throws IOException
* a pack file or loose object could not be read.
*/
public void markUninteresting(RevObject o) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
while (o instanceof RevTag) {
o.flags |= UNINTERESTING;
if (boundary)
addObject(o);
o = ((RevTag) o).getObject();
parseHeaders(o);
}
if (o instanceof RevCommit)
markUninteresting((RevCommit) o);
else if (o instanceof RevTree)
markTreeUninteresting((RevTree) o);
else
o.flags |= UNINTERESTING;
if (o.getType() != OBJ_COMMIT && boundary)
addObject(o);
}
@Override
public void markUninteresting(RevCommit c) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
super.markUninteresting(c);
try {
markTreeUninteresting(c.getTree());
} catch (MissingObjectException e) {
// we don't care if the tree of the commit does not exist locally
}
}
@Override
public void sort(RevSort s) {
super.sort(s);
boundary = hasRevSort(RevSort.BOUNDARY);
}
@Override
public void sort(RevSort s, boolean use) {
super.sort(s, use);
boundary = hasRevSort(RevSort.BOUNDARY);
}
@Override
public RevCommit next() throws MissingObjectException,
IncorrectObjectTypeException, IOException {
for (;;) {
final RevCommit r = super.next();
if (r == null) {
if (firstCommit != null)
reader.walkAdviceBeginTrees(this, firstCommit, lastCommit);
return null;
}
if ((r.flags & UNINTERESTING) != 0) {
markTreeUninteresting(r.getTree());
if (boundary)
return r;
continue;
}
if (firstCommit == null)
firstCommit = r;
lastCommit = r;
pendingObjects.add(r.getTree());
return r;
}
}
/**
* Pop the next most recent object.
*
* @return next most recent object; null if traversal is over.
* @throws MissingObjectException
* one or or more of the next objects are not available from the
* object database, but were thought to be candidates for
* traversal. This usually indicates a broken link.
* @throws IncorrectObjectTypeException
* one or or more of the objects in a tree do not match the type
* indicated.
* @throws IOException
* a pack file or loose object could not be read.
*/
public RevObject nextObject() throws MissingObjectException,
IncorrectObjectTypeException, IOException {
pathLen = 0;
TreeVisit tv = currVisit;
while (tv != null) {
byte[] buf = tv.buf;
for (int ptr = tv.ptr; ptr < buf.length;) {
int startPtr = ptr;
ptr = findObjectId(buf, ptr);
idBuffer.fromRaw(buf, ptr);
ptr += ID_SZ;
RevObject obj = objects.get(idBuffer);
if (obj != null && (obj.flags & SEEN) != 0)
continue;
int mode = parseMode(buf, startPtr, ptr, tv);
int flags;
switch (mode >>> TYPE_SHIFT) {
case TYPE_FILE:
case TYPE_SYMLINK:
if (obj == null) {
obj = new RevBlob(idBuffer);
obj.flags = SEEN;
objects.add(obj);
return obj;
}
if (!(obj instanceof RevBlob))
throw new IncorrectObjectTypeException(obj, OBJ_BLOB);
obj.flags = flags = obj.flags | SEEN;
if ((flags & UNINTERESTING) == 0)
return obj;
if (boundary)
return obj;
continue;
case TYPE_TREE:
if (obj == null) {
obj = new RevTree(idBuffer);
obj.flags = SEEN;
objects.add(obj);
return enterTree(obj);
}
if (!(obj instanceof RevTree))
throw new IncorrectObjectTypeException(obj, OBJ_TREE);
obj.flags = flags = obj.flags | SEEN;
if ((flags & UNINTERESTING) == 0)
return enterTree(obj);
if (boundary)
return enterTree(obj);
continue;
case TYPE_GITLINK:
continue;
default:
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().corruptObjectInvalidMode3,
String.format("%o", Integer.valueOf(mode)), //$NON-NLS-1$
idBuffer.name(),
RawParseUtils.decode(buf, tv.namePtr, tv.nameEnd),
tv.obj));
}
}
currVisit = tv.parent;
releaseTreeVisit(tv);
tv = currVisit;
}
for (;;) {
RevObject o = pendingObjects.next();
if (o == null) {
reader.walkAdviceEnd();
return null;
}
int flags = o.flags;
if ((flags & SEEN) != 0)
continue;
flags |= SEEN;
o.flags = flags;
if ((flags & UNINTERESTING) == 0 | boundary) {
if (o instanceof RevTree) {
tv = newTreeVisit(o);
tv.parent = null;
currVisit = tv;
}
return o;
}
}
}
private RevObject enterTree(RevObject obj) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
TreeVisit tv = newTreeVisit(obj);
tv.parent = currVisit;
currVisit = tv;
return obj;
}
private static int findObjectId(byte[] buf, int ptr) {
// Skip over the mode and name until the NUL before the ObjectId
// can be located. Skip the NUL as the function returns.
for (;;) {
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
if (buf[++ptr] == 0) return ++ptr;
}
}
private static int parseMode(byte[] buf, int startPtr, int recEndPtr, TreeVisit tv) {
int mode = buf[startPtr] - '0';
for (;;) {
byte c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
c = buf[++startPtr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
}
tv.ptr = recEndPtr;
tv.namePtr = startPtr + 1;
tv.nameEnd = recEndPtr - (ID_SZ + 1);
return mode;
}
/**
* Verify all interesting objects are available, and reachable.
* <p>
* Callers should populate starting points and ending points with
* {@link #markStart(RevObject)} and {@link #markUninteresting(RevObject)}
* and then use this method to verify all objects between those two points
* exist in the repository and are readable.
* <p>
* This method returns successfully if everything is connected; it throws an
* exception if there is a connectivity problem. The exception message
* provides some detail about the connectivity failure.
*
* @throws MissingObjectException
* one or or more of the next objects are not available from the
* object database, but were thought to be candidates for
* traversal. This usually indicates a broken link.
* @throws IncorrectObjectTypeException
* one or or more of the objects in a tree do not match the type
* indicated.
* @throws IOException
* a pack file or loose object could not be read.
*/
public void checkConnectivity() throws MissingObjectException,
IncorrectObjectTypeException, IOException {
for (;;) {
final RevCommit c = next();
if (c == null)
break;
}
for (;;) {
final RevObject o = nextObject();
if (o == null)
break;
if (o instanceof RevBlob && !reader.has(o))
throw new MissingObjectException(o, OBJ_BLOB);
}
}
/**
* Get the current object's complete path.
* <p>
* This method is not very efficient and is primarily meant for debugging
* and final output generation. Applications should try to avoid calling it,
* and if invoked do so only once per interesting entry, where the name is
* absolutely required for correct function.
*
* @return complete path of the current entry, from the root of the
* repository. If the current entry is in a subtree there will be at
* least one '/' in the returned string. Null if the current entry
* has no path, such as for annotated tags or root level trees.
*/
public String getPathString() {
if (pathLen == 0) {
pathLen = updatePathBuf(currVisit);
if (pathLen == 0)
return null;
}
return RawParseUtils.decode(pathBuf, 0, pathLen);
}
/**
* Get the current object's path hash code.
* <p>
* This method computes a hash code on the fly for this path, the hash is
* suitable to cluster objects that may have similar paths together.
*
* @return path hash code; any integer may be returned.
*/
public int getPathHashCode() {
TreeVisit tv = currVisit;
if (tv == null)
return 0;
int nameEnd = tv.nameEnd;
if (nameEnd == 0) {
// When nameEnd == 0 the subtree is itself the current path
// being visited. The name hash must be obtained from its
// parent tree. If there is no parent, this is a root tree with
// a hash code of 0.
tv = tv.parent;
if (tv == null)
return 0;
nameEnd = tv.nameEnd;
}
byte[] buf;
int ptr;
if (16 <= (nameEnd - tv.namePtr)) {
buf = tv.buf;
ptr = nameEnd - 16;
} else {
nameEnd = pathLen;
if (nameEnd == 0) {
nameEnd = updatePathBuf(currVisit);
pathLen = nameEnd;
}
buf = pathBuf;
ptr = Math.max(0, nameEnd - 16);
}
int hash = 0;
for (; ptr < nameEnd; ptr++) {
byte c = buf[ptr];
if (c != ' ')
hash = (hash >>> 2) + (c << 24);
}
return hash;
}
/** @return the internal buffer holding the current path. */
public byte[] getPathBuffer() {
if (pathLen == 0)
pathLen = updatePathBuf(currVisit);
return pathBuf;
}
/** @return length of the path in {@link #getPathBuffer()}. */
public int getPathLength() {
if (pathLen == 0)
pathLen = updatePathBuf(currVisit);
return pathLen;
}
private int updatePathBuf(TreeVisit tv) {
if (tv == null)
return 0;
// If nameEnd == 0 this tree has not yet contributed an entry.
// Update only for the parent, which if null will be empty.
int nameEnd = tv.nameEnd;
if (nameEnd == 0)
return updatePathBuf(tv.parent);
int ptr = tv.pathLen;
if (ptr == 0) {
ptr = updatePathBuf(tv.parent);
if (ptr == pathBuf.length)
growPathBuf(ptr);
if (ptr != 0)
pathBuf[ptr++] = '/';
tv.pathLen = ptr;
}
int namePtr = tv.namePtr;
int nameLen = nameEnd - namePtr;
int end = ptr + nameLen;
while (pathBuf.length < end)
growPathBuf(ptr);
System.arraycopy(tv.buf, namePtr, pathBuf, ptr, nameLen);
return end;
}
private void growPathBuf(int ptr) {
byte[] newBuf = new byte[pathBuf.length << 1];
System.arraycopy(pathBuf, 0, newBuf, 0, ptr);
pathBuf = newBuf;
}
@Override
public void dispose() {
super.dispose();
pendingObjects = new BlockObjQueue();
firstCommit = null;
lastCommit = null;
currVisit = null;
freeVisit = null;
}
@Override
protected void reset(final int retainFlags) {
super.reset(retainFlags);
for (RevObject obj : rootObjects)
obj.flags &= ~IN_PENDING;
rootObjects = new ArrayList<RevObject>();
pendingObjects = new BlockObjQueue();
firstCommit = null;
lastCommit = null;
currVisit = null;
freeVisit = null;
}
private void addObject(final RevObject o) {
if ((o.flags & IN_PENDING) == 0) {
o.flags |= IN_PENDING;
rootObjects.add(o);
pendingObjects.add(o);
}
}
private void markTreeUninteresting(final RevTree tree)
throws MissingObjectException, IncorrectObjectTypeException,
IOException {
if ((tree.flags & UNINTERESTING) != 0)
return;
tree.flags |= UNINTERESTING;
byte[] raw = reader.open(tree, OBJ_TREE).getCachedBytes();
for (int ptr = 0; ptr < raw.length;) {
byte c = raw[ptr];
int mode = c - '0';
for (;;) {
c = raw[++ptr];
if (' ' == c)
break;
mode <<= 3;
mode += c - '0';
}
while (raw[++ptr] != 0) {
// Skip entry name.
}
ptr++; // Skip NUL after entry name.
switch (mode >>> TYPE_SHIFT) {
case TYPE_FILE:
case TYPE_SYMLINK:
idBuffer.fromRaw(raw, ptr);
lookupBlob(idBuffer).flags |= UNINTERESTING;
break;
case TYPE_TREE:
idBuffer.fromRaw(raw, ptr);
markTreeUninteresting(lookupTree(idBuffer));
break;
case TYPE_GITLINK:
break;
default:
idBuffer.fromRaw(raw, ptr);
throw new CorruptObjectException(MessageFormat.format(
JGitText.get().corruptObjectInvalidMode3,
String.format("%o", Integer.valueOf(mode)), //$NON-NLS-1$
idBuffer.name(), "", tree)); //$NON-NLS-1$
}
ptr += ID_SZ;
}
}
private TreeVisit newTreeVisit(RevObject obj) throws LargeObjectException,
MissingObjectException, IncorrectObjectTypeException, IOException {
TreeVisit tv = freeVisit;
if (tv != null) {
freeVisit = tv.parent;
tv.ptr = 0;
tv.namePtr = 0;
tv.nameEnd = 0;
tv.pathLen = 0;
} else {
tv = new TreeVisit();
}
tv.obj = obj;
tv.buf = reader.open(obj, OBJ_TREE).getCachedBytes();
return tv;
}
private void releaseTreeVisit(TreeVisit tv) {
tv.buf = null;
tv.parent = freeVisit;
freeVisit = tv;
}
private static class TreeVisit {
/** Parent tree visit that entered this tree, null if root tree. */
TreeVisit parent;
/** The RevTree currently being iterated through. */
RevObject obj;
/** Canonical encoding of the tree named by {@link #obj}. */
byte[] buf;
/** Index of next entry to parse in {@link #buf}. */
int ptr;
/** Start of the current name entry in {@link #buf}. */
int namePtr;
/** One past end of name, {@code nameEnd - namePtr} is the length. */
int nameEnd;
/** Number of bytes in the path leading up to this tree. */
int pathLen;
}
}