/*
* Copyright (C) 2008-2010, Google Inc.
* Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.storage.pack;
import static org.eclipse.jgit.storage.pack.StoredObjectRepresentation.PACK_DELTA;
import static org.eclipse.jgit.storage.pack.StoredObjectRepresentation.PACK_WHOLE;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdSubclassMap;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.ThreadSafeProgressMonitor;
import org.eclipse.jgit.revwalk.ObjectWalk;
import org.eclipse.jgit.revwalk.RevFlag;
import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevSort;
import org.eclipse.jgit.storage.file.PackIndexWriter;
import org.eclipse.jgit.util.IO;
import org.eclipse.jgit.util.TemporaryBuffer;
/**
* <p>
* PackWriter class is responsible for generating pack files from specified set
* of objects from repository. This implementation produce pack files in format
* version 2.
* </p>
* <p>
* Source of objects may be specified in two ways:
* <ul>
* <li>(usually) by providing sets of interesting and uninteresting objects in
* repository - all interesting objects and their ancestors except uninteresting
* objects and their ancestors will be included in pack, or</li>
* <li>by providing iterator of {@link RevObject} specifying exact list and
* order of objects in pack</li>
* </ul>
* Typical usage consists of creating instance intended for some pack,
* configuring options, preparing the list of objects by calling
* {@link #preparePack(Iterator)} or
* {@link #preparePack(ProgressMonitor, Collection, Collection)}, and finally
* producing the stream with {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)}.
* </p>
* <p>
* Class provide set of configurable options and {@link ProgressMonitor}
* support, as operations may take a long time for big repositories. Deltas
* searching algorithm is <b>NOT IMPLEMENTED</b> yet - this implementation
* relies only on deltas and objects reuse.
* </p>
* <p>
* This class is not thread safe, it is intended to be used in one thread, with
* one instance per created pack. Subsequent calls to writePack result in
* undefined behavior.
* </p>
*/
public class PackWriter {
private static final int PACK_VERSION_GENERATED = 2;
@SuppressWarnings("unchecked")
private final List<ObjectToPack> objectsLists[] = new List[Constants.OBJ_TAG + 1];
{
objectsLists[0] = Collections.<ObjectToPack> emptyList();
objectsLists[Constants.OBJ_COMMIT] = new ArrayList<ObjectToPack>();
objectsLists[Constants.OBJ_TREE] = new ArrayList<ObjectToPack>();
objectsLists[Constants.OBJ_BLOB] = new ArrayList<ObjectToPack>();
objectsLists[Constants.OBJ_TAG] = new ArrayList<ObjectToPack>();
}
private final ObjectIdSubclassMap<ObjectToPack> objectsMap = new ObjectIdSubclassMap<ObjectToPack>();
// edge objects for thin packs
private final ObjectIdSubclassMap<ObjectToPack> edgeObjects = new ObjectIdSubclassMap<ObjectToPack>();
private Deflater myDeflater;
private final ObjectReader reader;
/** {@link #reader} recast to the reuse interface, if it supports it. */
private final ObjectReuseAsIs reuseSupport;
private final PackConfig config;
private List<ObjectToPack> sortedByName;
private byte packcsum[];
private boolean deltaBaseAsOffset;
private boolean reuseDeltas;
private boolean thin;
private boolean ignoreMissingUninteresting = true;
/**
* Create writer for specified repository.
* <p>
* Objects for packing are specified in {@link #preparePack(Iterator)} or
* {@link #preparePack(ProgressMonitor, Collection, Collection)}.
*
* @param repo
* repository where objects are stored.
*/
public PackWriter(final Repository repo) {
this(repo, repo.newObjectReader());
}
/**
* Create a writer to load objects from the specified reader.
* <p>
* Objects for packing are specified in {@link #preparePack(Iterator)} or
* {@link #preparePack(ProgressMonitor, Collection, Collection)}.
*
* @param reader
* reader to read from the repository with.
*/
public PackWriter(final ObjectReader reader) {
this(new PackConfig(), reader);
}
/**
* Create writer for specified repository.
* <p>
* Objects for packing are specified in {@link #preparePack(Iterator)} or
* {@link #preparePack(ProgressMonitor, Collection, Collection)}.
*
* @param repo
* repository where objects are stored.
* @param reader
* reader to read from the repository with.
*/
public PackWriter(final Repository repo, final ObjectReader reader) {
this(new PackConfig(repo), reader);
}
/**
* Create writer with a specified configuration.
* <p>
* Objects for packing are specified in {@link #preparePack(Iterator)} or
* {@link #preparePack(ProgressMonitor, Collection, Collection)}.
*
* @param config
* configuration for the pack writer.
* @param reader
* reader to read from the repository with.
*/
public PackWriter(final PackConfig config, final ObjectReader reader) {
this.config = config;
this.reader = reader;
if (reader instanceof ObjectReuseAsIs)
reuseSupport = ((ObjectReuseAsIs) reader);
else
reuseSupport = null;
deltaBaseAsOffset = config.isDeltaBaseAsOffset();
reuseDeltas = config.isReuseDeltas();
}
/**
* Check whether writer can store delta base as an offset (new style
* reducing pack size) or should store it as an object id (legacy style,
* compatible with old readers).
*
* Default setting: {@value PackConfig#DEFAULT_DELTA_BASE_AS_OFFSET}
*
* @return true if delta base is stored as an offset; false if it is stored
* as an object id.
*/
public boolean isDeltaBaseAsOffset() {
return deltaBaseAsOffset;
}
/**
* Set writer delta base format. Delta base can be written as an offset in a
* pack file (new approach reducing file size) or as an object id (legacy
* approach, compatible with old readers).
*
* Default setting: {@value PackConfig#DEFAULT_DELTA_BASE_AS_OFFSET}
*
* @param deltaBaseAsOffset
* boolean indicating whether delta base can be stored as an
* offset.
*/
public void setDeltaBaseAsOffset(boolean deltaBaseAsOffset) {
this.deltaBaseAsOffset = deltaBaseAsOffset;
}
/** @return true if this writer is producing a thin pack. */
public boolean isThin() {
return thin;
}
/**
* @param packthin
* a boolean indicating whether writer may pack objects with
* delta base object not within set of objects to pack, but
* belonging to party repository (uninteresting/boundary) as
* determined by set; this kind of pack is used only for
* transport; true - to produce thin pack, false - otherwise.
*/
public void setThin(final boolean packthin) {
thin = packthin;
}
/**
* @return true to ignore objects that are uninteresting and also not found
* on local disk; false to throw a {@link MissingObjectException}
* out of {@link #preparePack(ProgressMonitor, Collection, Collection)} if an
* uninteresting object is not in the source repository. By default,
* true, permitting gracefully ignoring of uninteresting objects.
*/
public boolean isIgnoreMissingUninteresting() {
return ignoreMissingUninteresting;
}
/**
* @param ignore
* true if writer should ignore non existing uninteresting
* objects during construction set of objects to pack; false
* otherwise - non existing uninteresting objects may cause
* {@link MissingObjectException}
*/
public void setIgnoreMissingUninteresting(final boolean ignore) {
ignoreMissingUninteresting = ignore;
}
/**
* Returns objects number in a pack file that was created by this writer.
*
* @return number of objects in pack.
*/
public int getObjectsNumber() {
return objectsMap.size();
}
/**
* Prepare the list of objects to be written to the pack stream.
* <p>
* Iterator <b>exactly</b> determines which objects are included in a pack
* and order they appear in pack (except that objects order by type is not
* needed at input). This order should conform general rules of ordering
* objects in git - by recency and path (type and delta-base first is
* internally secured) and responsibility for guaranteeing this order is on
* a caller side. Iterator must return each id of object to write exactly
* once.
* </p>
* <p>
* When iterator returns object that has {@link RevFlag#UNINTERESTING} flag,
* this object won't be included in an output pack. Instead, it is recorded
* as edge-object (known to remote repository) for thin-pack. In such a case
* writer may pack objects with delta base object not within set of objects
* to pack, but belonging to party repository - those marked with
* {@link RevFlag#UNINTERESTING} flag. This type of pack is used only for
* transport.
* </p>
*
* @param objectsSource
* iterator of object to store in a pack; order of objects within
* each type is important, ordering by type is not needed;
* allowed types for objects are {@link Constants#OBJ_COMMIT},
* {@link Constants#OBJ_TREE}, {@link Constants#OBJ_BLOB} and
* {@link Constants#OBJ_TAG}; objects returned by iterator may
* be later reused by caller as object id and type are internally
* copied in each iteration; if object returned by iterator has
* {@link RevFlag#UNINTERESTING} flag set, it won't be included
* in a pack, but is considered as edge-object for thin-pack.
* @throws IOException
* when some I/O problem occur during reading objects.
*/
public void preparePack(final Iterator<RevObject> objectsSource)
throws IOException {
while (objectsSource.hasNext()) {
addObject(objectsSource.next());
}
}
/**
* Prepare the list of objects to be written to the pack stream.
* <p>
* Basing on these 2 sets, another set of objects to put in a pack file is
* created: this set consists of all objects reachable (ancestors) from
* interesting objects, except uninteresting objects and their ancestors.
* This method uses class {@link ObjectWalk} extensively to find out that
* appropriate set of output objects and their optimal order in output pack.
* Order is consistent with general git in-pack rules: sort by object type,
* recency, path and delta-base first.
* </p>
*
* @param countingMonitor
* progress during object enumeration.
* @param interestingObjects
* collection of objects to be marked as interesting (start
* points of graph traversal).
* @param uninterestingObjects
* collection of objects to be marked as uninteresting (end
* points of graph traversal).
* @throws IOException
* when some I/O problem occur during reading objects.
*/
public void preparePack(ProgressMonitor countingMonitor,
final Collection<? extends ObjectId> interestingObjects,
final Collection<? extends ObjectId> uninterestingObjects)
throws IOException {
if (countingMonitor == null)
countingMonitor = NullProgressMonitor.INSTANCE;
ObjectWalk walker = setUpWalker(interestingObjects,
uninterestingObjects);
findObjectsToPack(countingMonitor, walker);
}
/**
* Determine if the pack file will contain the requested object.
*
* @param id
* the object to test the existence of.
* @return true if the object will appear in the output pack file.
*/
public boolean willInclude(final AnyObjectId id) {
return objectsMap.get(id) != null;
}
/**
* Computes SHA-1 of lexicographically sorted objects ids written in this
* pack, as used to name a pack file in repository.
*
* @return ObjectId representing SHA-1 name of a pack that was created.
*/
public ObjectId computeName() {
final byte[] buf = new byte[Constants.OBJECT_ID_LENGTH];
final MessageDigest md = Constants.newMessageDigest();
for (ObjectToPack otp : sortByName()) {
otp.copyRawTo(buf, 0);
md.update(buf, 0, Constants.OBJECT_ID_LENGTH);
}
return ObjectId.fromRaw(md.digest());
}
/**
* Create an index file to match the pack file just written.
* <p>
* This method can only be invoked after {@link #preparePack(Iterator)} or
* {@link #preparePack(ProgressMonitor, Collection, Collection)} has been
* invoked and completed successfully. Writing a corresponding index is an
* optional feature that not all pack users may require.
*
* @param indexStream
* output for the index data. Caller is responsible for closing
* this stream.
* @throws IOException
* the index data could not be written to the supplied stream.
*/
public void writeIndex(final OutputStream indexStream) throws IOException {
final List<ObjectToPack> list = sortByName();
final PackIndexWriter iw;
int indexVersion = config.getIndexVersion();
if (indexVersion <= 0)
iw = PackIndexWriter.createOldestPossible(indexStream, list);
else
iw = PackIndexWriter.createVersion(indexStream, indexVersion);
iw.write(list, packcsum);
}
private List<ObjectToPack> sortByName() {
if (sortedByName == null) {
sortedByName = new ArrayList<ObjectToPack>(objectsMap.size());
for (List<ObjectToPack> list : objectsLists) {
for (ObjectToPack otp : list)
sortedByName.add(otp);
}
Collections.sort(sortedByName);
}
return sortedByName;
}
/**
* Write the prepared pack to the supplied stream.
* <p>
* At first, this method collects and sorts objects to pack, then deltas
* search is performed if set up accordingly, finally pack stream is
* written.
* </p>
* <p>
* All reused objects data checksum (Adler32/CRC32) is computed and
* validated against existing checksum.
* </p>
*
* @param compressMonitor
* progress monitor to report object compression work.
* @param writeMonitor
* progress monitor to report the number of objects written.
* @param packStream
* output stream of pack data. The stream should be buffered by
* the caller. The caller is responsible for closing the stream.
* @throws IOException
* an error occurred reading a local object's data to include in
* the pack, or writing compressed object data to the output
* stream.
*/
public void writePack(ProgressMonitor compressMonitor,
ProgressMonitor writeMonitor, OutputStream packStream)
throws IOException {
if (compressMonitor == null)
compressMonitor = NullProgressMonitor.INSTANCE;
if (writeMonitor == null)
writeMonitor = NullProgressMonitor.INSTANCE;
if ((reuseDeltas || config.isReuseObjects()) && reuseSupport != null)
searchForReuse();
if (config.isDeltaCompress())
searchForDeltas(compressMonitor);
final PackOutputStream out = new PackOutputStream(writeMonitor,
packStream, this);
int objCnt = getObjectsNumber();
writeMonitor.beginTask(JGitText.get().writingObjects, objCnt);
out.writeFileHeader(PACK_VERSION_GENERATED, objCnt);
writeObjects(writeMonitor, out);
writeChecksum(out);
reader.release();
writeMonitor.endTask();
}
/** Release all resources used by this writer. */
public void release() {
reader.release();
if (myDeflater != null) {
myDeflater.end();
myDeflater = null;
}
}
private void searchForReuse() throws IOException {
for (List<ObjectToPack> list : objectsLists) {
for (ObjectToPack otp : list)
reuseSupport.selectObjectRepresentation(this, otp);
}
}
private void searchForDeltas(ProgressMonitor monitor)
throws MissingObjectException, IncorrectObjectTypeException,
IOException {
// Commits and annotated tags tend to have too many differences to
// really benefit from delta compression. Consequently just don't
// bother examining those types here.
//
ObjectToPack[] list = new ObjectToPack[
objectsLists[Constants.OBJ_TREE].size()
+ objectsLists[Constants.OBJ_BLOB].size()
+ edgeObjects.size()];
int cnt = 0;
cnt = findObjectsNeedingDelta(list, cnt, Constants.OBJ_TREE);
cnt = findObjectsNeedingDelta(list, cnt, Constants.OBJ_BLOB);
if (cnt == 0)
return;
// Queue up any edge objects that we might delta against. We won't
// be sending these as we assume the other side has them, but we need
// them in the search phase below.
//
for (ObjectToPack eo : edgeObjects) {
try {
if (loadSize(eo))
list[cnt++] = eo;
} catch (IOException notAvailable) {
// Skip this object. Since we aren't going to write it out
// the only consequence of it being unavailable to us is we
// may produce a larger data stream than we could have.
//
if (!ignoreMissingUninteresting)
throw notAvailable;
}
}
monitor.beginTask(JGitText.get().compressingObjects, cnt);
// Sort the objects by path hash so like files are near each other,
// and then by size descending so that bigger files are first. This
// applies "Linus' Law" which states that newer files tend to be the
// bigger ones, because source files grow and hardly ever shrink.
//
Arrays.sort(list, 0, cnt, new Comparator<ObjectToPack>() {
public int compare(ObjectToPack a, ObjectToPack b) {
int cmp = a.getType() - b.getType();
if (cmp == 0)
cmp = (a.getPathHash() >>> 1) - (b.getPathHash() >>> 1);
if (cmp == 0)
cmp = (a.getPathHash() & 1) - (b.getPathHash() & 1);
if (cmp == 0)
cmp = b.getWeight() - a.getWeight();
return cmp;
}
});
searchForDeltas(monitor, list, cnt);
monitor.endTask();
}
private int findObjectsNeedingDelta(ObjectToPack[] list, int cnt, int type)
throws MissingObjectException, IncorrectObjectTypeException,
IOException {
for (ObjectToPack otp : objectsLists[type]) {
if (otp.isDoNotDelta()) // delta is disabled for this path
continue;
if (otp.isDeltaRepresentation()) // already reusing a delta
continue;
if (loadSize(otp))
list[cnt++] = otp;
}
return cnt;
}
private boolean loadSize(ObjectToPack e) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
long sz = reader.getObjectSize(e, e.getType());
// If its too big for us to handle, skip over it.
//
if (config.getBigFileThreshold() <= sz || Integer.MAX_VALUE <= sz)
return false;
// If its too tiny for the delta compression to work, skip it.
//
if (sz <= DeltaIndex.BLKSZ)
return false;
e.setWeight((int) sz);
return true;
}
private void searchForDeltas(final ProgressMonitor monitor,
final ObjectToPack[] list, final int cnt)
throws MissingObjectException, IncorrectObjectTypeException,
LargeObjectException, IOException {
int threads = config.getThreads();
if (threads == 0)
threads = Runtime.getRuntime().availableProcessors();
if (threads <= 1 || cnt <= 2 * config.getDeltaSearchWindowSize()) {
DeltaCache dc = new DeltaCache(config);
DeltaWindow dw = new DeltaWindow(config, dc, reader);
dw.search(monitor, list, 0, cnt);
return;
}
final DeltaCache dc = new ThreadSafeDeltaCache(config);
final ProgressMonitor pm = new ThreadSafeProgressMonitor(monitor);
// Guess at the size of batch we want. Because we don't really
// have a way for a thread to steal work from another thread if
// it ends early, we over partition slightly so the work units
// are a bit smaller.
//
int estSize = cnt / (threads * 2);
if (estSize < 2 * config.getDeltaSearchWindowSize())
estSize = 2 * config.getDeltaSearchWindowSize();
final List<DeltaTask> myTasks = new ArrayList<DeltaTask>(threads * 2);
for (int i = 0; i < cnt;) {
final int start = i;
final int batchSize;
if (cnt - i < estSize) {
// If we don't have enough to fill the remaining block,
// schedule what is left over as a single block.
//
batchSize = cnt - i;
} else {
// Try to split the block at the end of a path.
//
int end = start + estSize;
while (end < cnt) {
ObjectToPack a = list[end - 1];
ObjectToPack b = list[end];
if (a.getPathHash() == b.getPathHash())
end++;
else
break;
}
batchSize = end - start;
}
i += batchSize;
myTasks.add(new DeltaTask(config, reader, dc, pm, batchSize, start, list));
}
final Executor executor = config.getExecutor();
final List<Throwable> errors = Collections
.synchronizedList(new ArrayList<Throwable>());
if (executor instanceof ExecutorService) {
// Caller supplied us a service, use it directly.
//
runTasks((ExecutorService) executor, myTasks, errors);
} else if (executor == null) {
// Caller didn't give us a way to run the tasks, spawn up a
// temporary thread pool and make sure it tears down cleanly.
//
ExecutorService pool = Executors.newFixedThreadPool(threads);
try {
runTasks(pool, myTasks, errors);
} finally {
pool.shutdown();
for (;;) {
try {
if (pool.awaitTermination(60, TimeUnit.SECONDS))
break;
} catch (InterruptedException e) {
throw new IOException(
JGitText.get().packingCancelledDuringObjectsWriting);
}
}
}
} else {
// The caller gave us an executor, but it might not do
// asynchronous execution. Wrap everything and hope it
// can schedule these for us.
//
final CountDownLatch done = new CountDownLatch(myTasks.size());
for (final DeltaTask task : myTasks) {
executor.execute(new Runnable() {
public void run() {
try {
task.call();
} catch (Throwable failure) {
errors.add(failure);
} finally {
done.countDown();
}
}
});
}
try {
done.await();
} catch (InterruptedException ie) {
// We can't abort the other tasks as we have no handle.
// Cross our fingers and just break out anyway.
//
throw new IOException(
JGitText.get().packingCancelledDuringObjectsWriting);
}
}
// If any task threw an error, try to report it back as
// though we weren't using a threaded search algorithm.
//
if (!errors.isEmpty()) {
Throwable err = errors.get(0);
if (err instanceof Error)
throw (Error) err;
if (err instanceof RuntimeException)
throw (RuntimeException) err;
if (err instanceof IOException)
throw (IOException) err;
IOException fail = new IOException(err.getMessage());
fail.initCause(err);
throw fail;
}
}
private void runTasks(ExecutorService pool, List<DeltaTask> tasks,
List<Throwable> errors) throws IOException {
List<Future<?>> futures = new ArrayList<Future<?>>(tasks.size());
for (DeltaTask task : tasks)
futures.add(pool.submit(task));
try {
for (Future<?> f : futures) {
try {
f.get();
} catch (ExecutionException failed) {
errors.add(failed.getCause());
}
}
} catch (InterruptedException ie) {
for (Future<?> f : futures)
f.cancel(true);
throw new IOException(
JGitText.get().packingCancelledDuringObjectsWriting);
}
}
private void writeObjects(ProgressMonitor writeMonitor, PackOutputStream out)
throws IOException {
for (List<ObjectToPack> list : objectsLists) {
for (ObjectToPack otp : list) {
if (writeMonitor.isCancelled())
throw new IOException(
JGitText.get().packingCancelledDuringObjectsWriting);
if (!otp.isWritten())
writeObject(out, otp);
}
}
}
private void writeObject(PackOutputStream out, final ObjectToPack otp)
throws IOException {
if (otp.isWritten())
return; // We shouldn't be here.
otp.markWantWrite();
if (otp.isDeltaRepresentation())
writeBaseFirst(out, otp);
out.resetCRC32();
otp.setOffset(out.length());
while (otp.isReuseAsIs()) {
try {
reuseSupport.copyObjectAsIs(out, otp);
out.endObject();
otp.setCRC(out.getCRC32());
return;
} catch (StoredObjectRepresentationNotAvailableException gone) {
if (otp.getOffset() == out.length()) {
redoSearchForReuse(otp);
continue;
} else {
// Object writing already started, we cannot recover.
//
CorruptObjectException coe;
coe = new CorruptObjectException(otp, "");
coe.initCause(gone);
throw coe;
}
}
}
// If we reached here, reuse wasn't possible.
//
if (otp.isDeltaRepresentation())
writeDeltaObjectDeflate(out, otp);
else
writeWholeObjectDeflate(out, otp);
out.endObject();
otp.setCRC(out.getCRC32());
}
private void writeBaseFirst(PackOutputStream out, final ObjectToPack otp)
throws IOException {
ObjectToPack baseInPack = otp.getDeltaBase();
if (baseInPack != null) {
if (!baseInPack.isWritten()) {
if (baseInPack.wantWrite()) {
// There is a cycle. Our caller is trying to write the
// object we want as a base, and called us. Turn off
// delta reuse so we can find another form.
//
reuseDeltas = false;
redoSearchForReuse(otp);
reuseDeltas = true;
} else {
writeObject(out, baseInPack);
}
}
} else if (!thin) {
// This should never occur, the base isn't in the pack and
// the pack isn't allowed to reference base outside objects.
// Write the object as a whole form, even if that is slow.
//
otp.clearDeltaBase();
otp.clearReuseAsIs();
}
}
private void redoSearchForReuse(final ObjectToPack otp) throws IOException,
MissingObjectException {
otp.clearDeltaBase();
otp.clearReuseAsIs();
reuseSupport.selectObjectRepresentation(this, otp);
}
private void writeWholeObjectDeflate(PackOutputStream out,
final ObjectToPack otp) throws IOException {
final Deflater deflater = deflater();
final ObjectLoader ldr = reader.open(otp, otp.getType());
out.writeHeader(otp, ldr.getSize());
deflater.reset();
DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater);
ldr.copyTo(dst);
dst.finish();
}
private void writeDeltaObjectDeflate(PackOutputStream out,
final ObjectToPack otp) throws IOException {
DeltaCache.Ref ref = otp.popCachedDelta();
if (ref != null) {
byte[] zbuf = ref.get();
if (zbuf != null) {
out.writeHeader(otp, otp.getCachedSize());
out.write(zbuf);
return;
}
}
TemporaryBuffer.Heap delta = delta(otp);
out.writeHeader(otp, delta.length());
Deflater deflater = deflater();
deflater.reset();
DeflaterOutputStream dst = new DeflaterOutputStream(out, deflater);
delta.writeTo(dst, null);
dst.finish();
}
private TemporaryBuffer.Heap delta(final ObjectToPack otp)
throws IOException {
DeltaIndex index = new DeltaIndex(buffer(otp.getDeltaBaseId()));
byte[] res = buffer(otp);
// We never would have proposed this pair if the delta would be
// larger than the unpacked version of the object. So using it
// as our buffer limit is valid: we will never reach it.
//
TemporaryBuffer.Heap delta = new TemporaryBuffer.Heap(res.length);
index.encode(delta, res);
return delta;
}
private byte[] buffer(AnyObjectId objId) throws IOException {
return buffer(config, reader, objId);
}
static byte[] buffer(PackConfig config, ObjectReader or, AnyObjectId objId)
throws IOException {
ObjectLoader ldr = or.open(objId);
if (!ldr.isLarge())
return ldr.getCachedBytes();
// PackWriter should have already pruned objects that
// are above the big file threshold, so our chances of
// the object being below it are very good. We really
// shouldn't be here, unless the implementation is odd.
// If it really is too big to work with, abort out now.
//
long sz = ldr.getSize();
if (config.getBigFileThreshold() <= sz || Integer.MAX_VALUE < sz)
throw new LargeObjectException(objId.copy());
// Its considered to be large by the loader, but we really
// want it in byte array format. Try to make it happen.
//
byte[] buf;
try {
buf = new byte[(int) sz];
} catch (OutOfMemoryError noMemory) {
LargeObjectException e;
e = new LargeObjectException(objId.copy());
e.initCause(noMemory);
throw e;
}
InputStream in = ldr.openStream();
try {
IO.readFully(in, buf, 0, buf.length);
} finally {
in.close();
}
return buf;
}
private Deflater deflater() {
if (myDeflater == null)
myDeflater = new Deflater(config.getCompressionLevel());
return myDeflater;
}
private void writeChecksum(PackOutputStream out) throws IOException {
packcsum = out.getDigest();
out.write(packcsum);
}
private ObjectWalk setUpWalker(
final Collection<? extends ObjectId> interestingObjects,
final Collection<? extends ObjectId> uninterestingObjects)
throws MissingObjectException, IOException,
IncorrectObjectTypeException {
final ObjectWalk walker = new ObjectWalk(reader);
walker.setRetainBody(false);
walker.sort(RevSort.COMMIT_TIME_DESC);
if (thin)
walker.sort(RevSort.BOUNDARY, true);
for (ObjectId id : interestingObjects) {
RevObject o = walker.parseAny(id);
walker.markStart(o);
}
if (uninterestingObjects != null) {
for (ObjectId id : uninterestingObjects) {
final RevObject o;
try {
o = walker.parseAny(id);
} catch (MissingObjectException x) {
if (ignoreMissingUninteresting)
continue;
throw x;
}
walker.markUninteresting(o);
}
}
return walker;
}
private void findObjectsToPack(final ProgressMonitor countingMonitor,
final ObjectWalk walker) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
countingMonitor.beginTask(JGitText.get().countingObjects,
ProgressMonitor.UNKNOWN);
RevObject o;
while ((o = walker.next()) != null) {
addObject(o, 0);
countingMonitor.update(1);
}
while ((o = walker.nextObject()) != null) {
addObject(o, walker.getPathHashCode());
countingMonitor.update(1);
}
countingMonitor.endTask();
}
/**
* Include one object to the output file.
* <p>
* Objects are written in the order they are added. If the same object is
* added twice, it may be written twice, creating a larger than necessary
* file.
*
* @param object
* the object to add.
* @throws IncorrectObjectTypeException
* the object is an unsupported type.
*/
public void addObject(final RevObject object)
throws IncorrectObjectTypeException {
addObject(object, 0);
}
private void addObject(final RevObject object, final int pathHashCode)
throws IncorrectObjectTypeException {
if (object.has(RevFlag.UNINTERESTING)) {
switch (object.getType()) {
case Constants.OBJ_TREE:
case Constants.OBJ_BLOB:
ObjectToPack otp = new ObjectToPack(object);
otp.setPathHash(pathHashCode);
otp.setDoNotDelta(true);
edgeObjects.add(otp);
thin = true;
break;
}
return;
}
final ObjectToPack otp;
if (reuseSupport != null)
otp = reuseSupport.newObjectToPack(object);
else
otp = new ObjectToPack(object);
otp.setPathHash(pathHashCode);
try {
objectsLists[object.getType()].add(otp);
} catch (ArrayIndexOutOfBoundsException x) {
throw new IncorrectObjectTypeException(object,
JGitText.get().incorrectObjectType_COMMITnorTREEnorBLOBnorTAG);
} catch (UnsupportedOperationException x) {
// index pointing to "dummy" empty list
throw new IncorrectObjectTypeException(object,
JGitText.get().incorrectObjectType_COMMITnorTREEnorBLOBnorTAG);
}
objectsMap.add(otp);
}
/**
* Select an object representation for this writer.
* <p>
* An {@link ObjectReader} implementation should invoke this method once for
* each representation available for an object, to allow the writer to find
* the most suitable one for the output.
*
* @param otp
* the object being packed.
* @param next
* the next available representation from the repository.
*/
public void select(ObjectToPack otp, StoredObjectRepresentation next) {
int nFmt = next.getFormat();
int nWeight;
if (otp.isReuseAsIs()) {
// We've already chosen to reuse a packed form, if next
// cannot beat that break out early.
//
if (PACK_WHOLE < nFmt)
return; // next isn't packed
else if (PACK_DELTA < nFmt && otp.isDeltaRepresentation())
return; // next isn't a delta, but we are
nWeight = next.getWeight();
if (otp.getWeight() <= nWeight)
return; // next would be bigger
} else
nWeight = next.getWeight();
if (nFmt == PACK_DELTA && reuseDeltas) {
ObjectId baseId = next.getDeltaBase();
ObjectToPack ptr = objectsMap.get(baseId);
if (ptr != null) {
otp.setDeltaBase(ptr);
otp.setReuseAsIs();
otp.setWeight(nWeight);
} else if (thin && edgeObjects.contains(baseId)) {
otp.setDeltaBase(baseId);
otp.setReuseAsIs();
otp.setWeight(nWeight);
} else {
otp.clearDeltaBase();
otp.clearReuseAsIs();
}
} else if (nFmt == PACK_WHOLE && config.isReuseObjects()) {
otp.clearDeltaBase();
otp.setReuseAsIs();
otp.setWeight(nWeight);
} else {
otp.clearDeltaBase();
otp.clearReuseAsIs();
}
otp.select(next);
}
}