Source Code of org.eclipse.jgit.diff.DiffFormatter

/*
 * Copyright (C) 2009, Google Inc.
 * Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de>
 * and other copyright owners as documented in the project's IP log.
 *
 * This program and the accompanying materials are made available
 * under the terms of the Eclipse Distribution License v1.0 which
 * accompanies this distribution, is reproduced below, and is
 * available at http://www.eclipse.org/org/documents/edl-v10.php
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or
 * without modification, are permitted provided that the following
 * conditions are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 *
 * - Redistributions in binary form must reproduce the above
 *   copyright notice, this list of conditions and the following
 *   disclaimer in the documentation and/or other materials provided
 *   with the distribution.
 *
 * - Neither the name of the Eclipse Foundation, Inc. nor the
 *   names of its contributors may be used to endorse or promote
 *   products derived from this software without specific prior
 *   written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */


package org.eclipse.jgit.diff;


import static org.eclipse.jgit.lib.Constants.encode;
import static org.eclipse.jgit.lib.Constants.encodeASCII;
import static org.eclipse.jgit.lib.FileMode.GITLINK;


import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;


import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.CoreConfig;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.patch.FileHeader;
import org.eclipse.jgit.patch.HunkHeader;
import org.eclipse.jgit.patch.FileHeader.PatchType;
import org.eclipse.jgit.util.IO;
import org.eclipse.jgit.util.QuotedString;
import org.eclipse.jgit.util.io.DisabledOutputStream;


/**
 * Format an {@link EditList} as a Git style unified patch script.
 */
public class DiffFormatter {
  private static final byte[] noNewLine = encodeASCII("\\ No newline at end of file\n");


  private final OutputStream out;


  private Repository db;


  private int context;


  private int abbreviationLength;


  private RawText.Factory rawTextFactory = RawText.FACTORY;


  private long bigFileThreshold = 50 * 1024 * 1024;


  /**
   * Create a new formatter with a default level of context.
   *
   * @param out
   *            the stream the formatter will write line data to. This stream
   *            should have buffering arranged by the caller, as many small
   *            writes are performed to it.
   */
  public DiffFormatter(OutputStream out) {
    this.out = out;
    setContext(3);
    setAbbreviationLength(8);
  }


  /** @return the stream we are outputting data to. */
  protected OutputStream getOutputStream() {
    return out;
  }


  /**
   * Set the repository the formatter can load object contents from.
   *
   * @param repository
   *            source repository holding referenced objects.
   */
  public void setRepository(Repository repository) {
    db = repository;


    CoreConfig cfg = db.getConfig().get(CoreConfig.KEY);
    bigFileThreshold = cfg.getStreamFileThreshold();
  }


  /**
   * Change the number of lines of context to display.
   *
   * @param lineCount
   *            number of lines of context to see before the first
   *            modification and after the last modification within a hunk of
   *            the modified file.
   */
  public void setContext(final int lineCount) {
    if (lineCount < 0)
      throw new IllegalArgumentException(
          JGitText.get().contextMustBeNonNegative);
    context = lineCount;
  }


  /**
   * Change the number of digits to show in an ObjectId.
   *
   * @param count
   *            number of digits to show in an ObjectId.
   */
  public void setAbbreviationLength(final int count) {
    if (count < 0)
      throw new IllegalArgumentException(
          JGitText.get().abbreviationLengthMustBeNonNegative);
    abbreviationLength = count;
  }


  /**
   * Set the helper that constructs difference output.
   *
   * @param type
   *            the factory to create different output. Different types of
   *            factories can produce different whitespace behavior, for
   *            example.
   * @see RawText#FACTORY
   * @see RawTextIgnoreAllWhitespace#FACTORY
   * @see RawTextIgnoreLeadingWhitespace#FACTORY
   * @see RawTextIgnoreTrailingWhitespace#FACTORY
   * @see RawTextIgnoreWhitespaceChange#FACTORY
   */
  public void setRawTextFactory(RawText.Factory type) {
    rawTextFactory = type;
  }


  /**
   * Set the maximum file size that should be considered for diff output.
   * <p>
   * Text files that are larger than this size will not have a difference
   * generated during output.
   *
   * @param bigFileThreshold
   *            the limit, in bytes.
   */
  public void setBigFileThreshold(long bigFileThreshold) {
    this.bigFileThreshold = bigFileThreshold;
  }


  /**
   * Flush the underlying output stream of this formatter.
   *
   * @throws IOException
   *             the stream's own flush method threw an exception.
   */
  public void flush() throws IOException {
    out.flush();
  }


  /**
   * Format a patch script from a list of difference entries.
   *
   * @param entries
   *            entries describing the affected files.
   * @throws IOException
   *             a file's content cannot be read, or the output stream cannot
   *             be written to.
   */
  public void format(List<? extends DiffEntry> entries) throws IOException {
    for (DiffEntry ent : entries)
      format(ent);
  }


  /**
   * Format a patch script for one file entry.
   *
   * @param ent
   *            the entry to be formatted.
   * @throws IOException
   *             a file's content cannot be read, or the output stream cannot
   *             be written to.
   */
  public void format(DiffEntry ent) throws IOException {
    writeDiffHeader(out, ent);


    if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
      writeGitLinkDiffText(out, ent);
    } else {
      byte[] aRaw = open(ent.getOldMode(), ent.getOldId());
      byte[] bRaw = open(ent.getNewMode(), ent.getNewId());


      if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
        out.write(encodeASCII("Binary files differ\n"));


      } else {
        RawText a = rawTextFactory.create(aRaw);
        RawText b = rawTextFactory.create(bRaw);
        formatEdits(a, b, new MyersDiff(a, b).getEdits());
      }
    }
  }


  private void writeGitLinkDiffText(OutputStream o, DiffEntry ent)
      throws IOException {
    if (ent.getOldMode() == GITLINK) {
      o.write(encodeASCII("-Subproject commit " + ent.getOldId().name()
          + "\n"));
    }
    if (ent.getNewMode() == GITLINK) {
      o.write(encodeASCII("+Subproject commit " + ent.getNewId().name()
          + "\n"));
    }
  }


  private void writeDiffHeader(OutputStream o, DiffEntry ent)
      throws IOException {
    String oldName = quotePath("a/" + ent.getOldPath());
    String newName = quotePath("b/" + ent.getNewPath());
    o.write(encode("diff --git " + oldName + " " + newName + "\n"));


    switch (ent.getChangeType()) {
    case ADD:
      o.write(encodeASCII("new file mode "));
      ent.getNewMode().copyTo(o);
      o.write('\n');
      break;


    case DELETE:
      o.write(encodeASCII("deleted file mode "));
      ent.getOldMode().copyTo(o);
      o.write('\n');
      break;


    case RENAME:
      o.write(encodeASCII("similarity index " + ent.getScore() + "%"));
      o.write('\n');


      o.write(encode("rename from " + quotePath(ent.getOldPath())));
      o.write('\n');


      o.write(encode("rename to " + quotePath(ent.getNewPath())));
      o.write('\n');
      break;


    case COPY:
      o.write(encodeASCII("similarity index " + ent.getScore() + "%"));
      o.write('\n');


      o.write(encode("copy from " + quotePath(ent.getOldPath())));
      o.write('\n');


      o.write(encode("copy to " + quotePath(ent.getNewPath())));
      o.write('\n');


      if (!ent.getOldMode().equals(ent.getNewMode())) {
        o.write(encodeASCII("new file mode "));
        ent.getNewMode().copyTo(o);
        o.write('\n');
      }
      break;
    case MODIFY:
      int score = ent.getScore();
      if (0 < score && score <= 100) {
        o.write(encodeASCII("dissimilarity index " + (100 - score)
            + "%"));
        o.write('\n');
      }
      break;
    }


    switch (ent.getChangeType()) {
    case RENAME:
    case MODIFY:
      if (!ent.getOldMode().equals(ent.getNewMode())) {
        o.write(encodeASCII("old mode "));
        ent.getOldMode().copyTo(o);
        o.write('\n');


        o.write(encodeASCII("new mode "));
        ent.getNewMode().copyTo(o);
        o.write('\n');
      }
    }


    o.write(encodeASCII("index " //
        + format(ent.getOldId()) //
        + ".." //
        + format(ent.getNewId())));
    if (ent.getOldMode().equals(ent.getNewMode())) {
      o.write(' ');
      ent.getNewMode().copyTo(o);
    }
    o.write('\n');
    o.write(encode("--- " + oldName + '\n'));
    o.write(encode("+++ " + newName + '\n'));
  }


  private String format(AbbreviatedObjectId oldId) {
    if (oldId.isComplete() && db != null)
      oldId = oldId.toObjectId().abbreviate(db, abbreviationLength);
    return oldId.name();
  }


  private static String quotePath(String name) {
    String q = QuotedString.GIT_PATH.quote(name);
    return ('"' + name + '"').equals(q) ? name : q;
  }


  private byte[] open(FileMode mode, AbbreviatedObjectId id)
      throws IOException {
    if (mode == FileMode.MISSING)
      return new byte[] {};


    if (mode.getObjectType() != Constants.OBJ_BLOB)
      return new byte[] {};


    if (db == null)
      throw new IllegalStateException(JGitText.get().repositoryIsRequired);


    if (id.isComplete()) {
      ObjectLoader ldr = db.open(id.toObjectId());
      if (!ldr.isLarge())
        return ldr.getCachedBytes();


      long sz = ldr.getSize();
      if (sz < bigFileThreshold && sz < Integer.MAX_VALUE) {
        byte[] buf;
        try {
          buf = new byte[(int) sz];
        } catch (OutOfMemoryError noMemory) {
          LargeObjectException e;


          e = new LargeObjectException(id.toObjectId());
          e.initCause(noMemory);
          throw e;
        }
        InputStream in = ldr.openStream();
        try {
          IO.readFully(in, buf, 0, buf.length);
        } finally {
          in.close();
        }
        return buf;
      }
    }


    return new byte[] {};
  }


  /**
   * Format a patch script, reusing a previously parsed FileHeader.
   * <p>
   * This formatter is primarily useful for editing an existing patch script
   * to increase or reduce the number of lines of context within the script.
   * All header lines are reused as-is from the supplied FileHeader.
   *
   * @param head
   *            existing file header containing the header lines to copy.
   * @param a
   *            text source for the pre-image version of the content. This
   *            must match the content of {@link FileHeader#getOldId()}.
   * @param b
   *            text source for the post-image version of the content. This
   *            must match the content of {@link FileHeader#getNewId()}.
   * @throws IOException
   *             writing to the supplied stream failed.
   */
  public void format(final FileHeader head, final RawText a, final RawText b)
      throws IOException {
    // Reuse the existing FileHeader as-is by blindly copying its
    // header lines, but avoiding its hunks. Instead we recreate
    // the hunks from the text instances we have been supplied.
    //
    final int start = head.getStartOffset();
    int end = head.getEndOffset();
    if (!head.getHunks().isEmpty())
      end = head.getHunks().get(0).getStartOffset();
    out.write(head.getBuffer(), start, end - start);


    formatEdits(a, b, head.toEditList());
  }


  /**
   * Formats a list of edits in unified diff format
   *
   * @param a
   *            the text A which was compared
   * @param b
   *            the text B which was compared
   * @param edits
   *            some differences which have been calculated between A and B
   * @throws IOException
   */
  public void formatEdits(final RawText a, final RawText b,
      final EditList edits) throws IOException {
    for (int curIdx = 0; curIdx < edits.size();) {
      Edit curEdit = edits.get(curIdx);
      final int endIdx = findCombinedEnd(edits, curIdx);
      final Edit endEdit = edits.get(endIdx);


      int aCur = Math.max(0, curEdit.getBeginA() - context);
      int bCur = Math.max(0, curEdit.getBeginB() - context);
      final int aEnd = Math.min(a.size(), endEdit.getEndA() + context);
      final int bEnd = Math.min(b.size(), endEdit.getEndB() + context);


      writeHunkHeader(aCur, aEnd, bCur, bEnd);


      while (aCur < aEnd || bCur < bEnd) {
        if (aCur < curEdit.getBeginA() || endIdx + 1 < curIdx) {
          writeContextLine(a, aCur);
          if (isEndOfLineMissing(a, aCur))
            out.write(noNewLine);
          aCur++;
          bCur++;
        } else if (aCur < curEdit.getEndA()) {
          writeRemovedLine(a, aCur);
          if (isEndOfLineMissing(a, aCur))
            out.write(noNewLine);
          aCur++;
        } else if (bCur < curEdit.getEndB()) {
          writeAddedLine(b, bCur);
          if (isEndOfLineMissing(b, bCur))
            out.write(noNewLine);
          bCur++;
        }


        if (end(curEdit, aCur, bCur) && ++curIdx < edits.size())
          curEdit = edits.get(curIdx);
      }
    }
  }


  /**
   * Output a line of context (unmodified line).
   *
   * @param text
   *            RawText for accessing raw data
   * @param line
   *            the line number within text
   * @throws IOException
   */
  protected void writeContextLine(final RawText text, final int line)
      throws IOException {
    writeLine(' ', text, line);
  }


  private boolean isEndOfLineMissing(final RawText text, final int line) {
    return line + 1 == text.size() && text.isMissingNewlineAtEnd();
  }


  /**
   * Output an added line.
   *
   * @param text
   *            RawText for accessing raw data
   * @param line
   *            the line number within text
   * @throws IOException
   */
  protected void writeAddedLine(final RawText text, final int line)
      throws IOException {
    writeLine('+', text, line);
  }


  /**
   * Output a removed line
   *
   * @param text
   *            RawText for accessing raw data
   * @param line
   *            the line number within text
   * @throws IOException
   */
  protected void writeRemovedLine(final RawText text, final int line)
      throws IOException {
    writeLine('-', text, line);
  }


  /**
   * Output a hunk header
   *
   * @param aStartLine
   *            within first source
   * @param aEndLine
   *            within first source
   * @param bStartLine
   *            within second source
   * @param bEndLine
   *            within second source
   * @throws IOException
   */
  protected void writeHunkHeader(int aStartLine, int aEndLine,
      int bStartLine, int bEndLine) throws IOException {
    out.write('@');
    out.write('@');
    writeRange('-', aStartLine + 1, aEndLine - aStartLine);
    writeRange('+', bStartLine + 1, bEndLine - bStartLine);
    out.write(' ');
    out.write('@');
    out.write('@');
    out.write('\n');
  }


  private void writeRange(final char prefix, final int begin, final int cnt)
      throws IOException {
    out.write(' ');
    out.write(prefix);
    switch (cnt) {
    case 0:
      // If the range is empty, its beginning number must be the
      // line just before the range, or 0 if the range is at the
      // start of the file stream. Here, begin is always 1 based,
      // so an empty file would produce "0,0".
      //
      out.write(encodeASCII(begin - 1));
      out.write(',');
      out.write('0');
      break;


    case 1:
      // If the range is exactly one line, produce only the number.
      //
      out.write(encodeASCII(begin));
      break;


    default:
      out.write(encodeASCII(begin));
      out.write(',');
      out.write(encodeASCII(cnt));
      break;
    }
  }


  /**
   * Write a standard patch script line.
   *
   * @param prefix
   *            prefix before the line, typically '-', '+', ' '.
   * @param text
   *            the text object to obtain the line from.
   * @param cur
   *            line number to output.
   * @throws IOException
   *             the stream threw an exception while writing to it.
   */
  protected void writeLine(final char prefix, final RawText text,
      final int cur) throws IOException {
    out.write(prefix);
    text.writeLine(out, cur);
    out.write('\n');
  }


  /**
   * Creates a {@link FileHeader} representing the given {@link DiffEntry}
   * <p>
   * This method does not use the OutputStream associated with this
   * DiffFormatter instance. It is therefore safe to instantiate this
   * DiffFormatter instance with a {@link DisabledOutputStream} if this method
   * is the only one that will be used.
   *
   * @param ent
   *            the DiffEntry to create the FileHeader for
   * @return a FileHeader representing the DiffEntry. The FileHeader's buffer
   *         will contain only the header of the diff output. It will also
   *         contain one {@link HunkHeader}.
   * @throws IOException
   *             the stream threw an exception while writing to it, or one of
   *             the blobs referenced by the DiffEntry could not be read.
   * @throws CorruptObjectException
   *             one of the blobs referenced by the DiffEntry is corrupt.
   * @throws MissingObjectException
   *             one of the blobs referenced by the DiffEntry is missing.
   */
  public FileHeader createFileHeader(DiffEntry ent) throws IOException,
      CorruptObjectException, MissingObjectException {
    ByteArrayOutputStream buf = new ByteArrayOutputStream();
    final EditList editList;
    final FileHeader.PatchType type;


    writeDiffHeader(buf, ent);


    if (ent.getOldMode() == GITLINK || ent.getNewMode() == GITLINK) {
      writeGitLinkDiffText(buf, ent);
      editList = new EditList();
      type = PatchType.UNIFIED;
    } else {
      byte[] aRaw = open(ent.getOldMode(), ent.getOldId());
      byte[] bRaw = open(ent.getNewMode(), ent.getNewId());


      if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
        buf.write(encodeASCII("Binary files differ\n"));
        editList = new EditList();
        type = PatchType.BINARY;
      } else {
        RawText a = rawTextFactory.create(aRaw);
        RawText b = rawTextFactory.create(bRaw);
        editList = new MyersDiff(a, b).getEdits();
        type = PatchType.UNIFIED;
      }
    }


    return new FileHeader(buf.toByteArray(), editList, type);
  }


  private int findCombinedEnd(final List<Edit> edits, final int i) {
    int end = i + 1;
    while (end < edits.size()
        && (combineA(edits, end) || combineB(edits, end)))
      end++;
    return end - 1;
  }


  private boolean combineA(final List<Edit> e, final int i) {
    return e.get(i).getBeginA() - e.get(i - 1).getEndA() <= 2 * context;
  }


  private boolean combineB(final List<Edit> e, final int i) {
    return e.get(i).getBeginB() - e.get(i - 1).getEndB() <= 2 * context;
  }


  private static boolean end(final Edit edit, final int a, final int b) {
    return edit.getEndA() <= a && edit.getEndB() <= b;
  }
}
Source Code of org.eclipse.jgit.diff.DiffFormatter

Related Classes of org.eclipse.jgit.diff.DiffFormatter