Package org.eclipse.jgit.diff

Source Code of org.eclipse.jgit.diff.RawText$Factory

/*
* Copyright (C) 2009, Google Inc.
* Copyright (C) 2008-2009, Johannes E. Schindelin <johannes.schindelin@gmx.de>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
*   notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
*   copyright notice, this list of conditions and the following
*   disclaimer in the documentation and/or other materials provided
*   with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
*   names of its contributors may be used to endorse or promote
*   products derived from this software without specific prior
*   written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.eclipse.jgit.diff;

import java.io.File;
import java.io.IOException;
import java.io.OutputStream;

import org.eclipse.jgit.util.IO;
import org.eclipse.jgit.util.IntList;
import org.eclipse.jgit.util.RawParseUtils;

/**
* A Sequence supporting UNIX formatted text in byte[] format.
* <p>
* Elements of the sequence are the lines of the file, as delimited by the UNIX
* newline character ('\n'). The file content is treated as 8 bit binary text,
* with no assumptions or requirements on character encoding.
* <p>
* Note that the first line of the file is element 0, as defined by the Sequence
* interface API. Traditionally in a text editor a patch file the first line is
* line number 1. Callers may need to subtract 1 prior to invoking methods if
* they are converting from "line number" to "element index".
*/
public class RawText implements Sequence {
  /** Creates a RawText instance. */
  public static interface Factory {
    /**
     * Construct a RawText instance for the content.
     *
     * @param input
     *            the content array.
     * @return a RawText instance wrapping this content.
     */
    RawText create(byte[] input);
  }

  /** Creates RawText that does not treat whitespace specially. */
  public static final Factory FACTORY = new Factory() {
    public RawText create(byte[] input) {
      return new RawText(input);
    }
  };

  /** Number of bytes to check for heuristics in {@link #isBinary(byte[])} */
  private static final int FIRST_FEW_BYTES = 8000;

  /** The file content for this sequence. */
  protected final byte[] content;

  /** Map of line number to starting position within {@link #content}. */
  protected final IntList lines;

  /** Hash code for each line, for fast equality elimination. */
  protected final IntList hashes;

  /**
   * Create a new sequence from an existing content byte array.
   * <p>
   * The entire array (indexes 0 through length-1) is used as the content.
   *
   * @param input
   *            the content array. The array is never modified, so passing
   *            through cached arrays is safe.
   */
  public RawText(final byte[] input) {
    content = input;
    lines = RawParseUtils.lineMap(content, 0, content.length);
    hashes = computeHashes();
  }

  /**
   * Create a new sequence from a file.
   * <p>
   * The entire file contents are used.
   *
   * @param file
   *            the text file.
   * @throws IOException if Exceptions occur while reading the file
   */
  public RawText(File file) throws IOException {
    this(IO.readFully(file));
  }

  public int size() {
    // The line map is always 2 entries larger than the number of lines in
    // the file. Index 0 is padded out/unused. The last index is the total
    // length of the buffer, and acts as a sentinel.
    //
    return lines.size() - 2;
  }

  public boolean equals(final int i, final Sequence other, final int j) {
    return equals(this, i + 1, (RawText) other, j + 1);
  }

  private static boolean equals(final RawText a, final int ai,
      final RawText b, final int bi) {
    if (a.hashes.get(ai) != b.hashes.get(bi))
      return false;

    int as = a.lines.get(ai);
    int bs = b.lines.get(bi);
    final int ae = a.lines.get(ai + 1);
    final int be = b.lines.get(bi + 1);

    if (ae - as != be - bs)
      return false;

    while (as < ae) {
      if (a.content[as++] != b.content[bs++])
        return false;
    }
    return true;
  }

  /**
   * Write a specific line to the output stream, without its trailing LF.
   * <p>
   * The specified line is copied as-is, with no character encoding
   * translation performed.
   * <p>
   * If the specified line ends with an LF ('\n'), the LF is <b>not</b>
   * copied. It is up to the caller to write the LF, if desired, between
   * output lines.
   *
   * @param out
   *            stream to copy the line data onto.
   * @param i
   *            index of the line to extract. Note this is 0-based, so line
   *            number 1 is actually index 0.
   * @throws IOException
   *             the stream write operation failed.
   */
  public void writeLine(final OutputStream out, final int i)
      throws IOException {
    final int start = lines.get(i + 1);
    int end = lines.get(i + 2);
    if (content[end - 1] == '\n')
      end--;
    out.write(content, start, end - start);
  }

  /**
   * Determine if the file ends with a LF ('\n').
   *
   * @return true if the last line has an LF; false otherwise.
   */
  public boolean isMissingNewlineAtEnd() {
    final int end = lines.get(lines.size() - 1);
    if (end == 0)
      return true;
    return content[end - 1] != '\n';
  }

  private IntList computeHashes() {
    final IntList r = new IntList(lines.size());
    r.add(0);
    for (int lno = 1; lno < lines.size() - 1; lno++) {
      final int ptr = lines.get(lno);
      final int end = lines.get(lno + 1);
      r.add(hashLine(content, ptr, end));
    }
    r.add(0);
    return r;
  }

  /**
   * Compute a hash code for a single line.
   *
   * @param raw
   *            the raw file content.
   * @param ptr
   *            first byte of the content line to hash.
   * @param end
   *            1 past the last byte of the content line.
   * @return hash code for the region <code>[ptr, end)</code> of raw.
   */
  protected int hashLine(final byte[] raw, int ptr, final int end) {
    int hash = 5381;
    for (; ptr < end; ptr++)
      hash = (hash << 5) ^ (raw[ptr] & 0xff);
    return hash;
  }

  /**
   * Determine heuristically whether a byte array represents binary (as
   * opposed to text) content.
   *
   * @param raw
   *            the raw file content.
   * @return true if raw is likely to be a binary file, false otherwise
   */
  public static boolean isBinary(byte[] raw) {
    // Same heuristic as C Git
    int size = raw.length > FIRST_FEW_BYTES ? FIRST_FEW_BYTES : raw.length;
    for (int ptr = 0; ptr < size; ptr++)
      if (raw[ptr] == '\0')
        return true;

    return false;
  }
}
TOP

Related Classes of org.eclipse.jgit.diff.RawText$Factory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.