Package org.eclipse.jgit.lib

Source Code of org.eclipse.jgit.lib.ObjectChecker$Normalizer

/*
* Copyright (C) 2008-2010, Google Inc.
* Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
*   notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
*   copyright notice, this list of conditions and the following
*   disclaimer in the documentation and/or other materials provided
*   with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
*   names of its contributors may be used to endorse or promote
*   products derived from this software without specific prior
*   written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package org.eclipse.jgit.lib;

import static org.eclipse.jgit.util.RawParseUtils.match;
import static org.eclipse.jgit.util.RawParseUtils.nextLF;
import static org.eclipse.jgit.util.RawParseUtils.parseBase10;

import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.text.MessageFormat;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;

import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.util.MutableInteger;
import org.eclipse.jgit.util.RawParseUtils;

/**
* Verifies that an object is formatted correctly.
* <p>
* Verifications made by this class only check that the fields of an object are
* formatted correctly. The ObjectId checksum of the object is not verified, and
* connectivity links between objects are also not verified. Its assumed that
* the caller can provide both of these validations on its own.
* <p>
* Instances of this class are not thread safe, but they may be reused to
* perform multiple object validations.
*/
public class ObjectChecker {
  /** Header "tree " */
  public static final byte[] tree = Constants.encodeASCII("tree "); //$NON-NLS-1$

  /** Header "parent " */
  public static final byte[] parent = Constants.encodeASCII("parent "); //$NON-NLS-1$

  /** Header "author " */
  public static final byte[] author = Constants.encodeASCII("author "); //$NON-NLS-1$

  /** Header "committer " */
  public static final byte[] committer = Constants.encodeASCII("committer "); //$NON-NLS-1$

  /** Header "encoding " */
  public static final byte[] encoding = Constants.encodeASCII("encoding "); //$NON-NLS-1$

  /** Header "object " */
  public static final byte[] object = Constants.encodeASCII("object "); //$NON-NLS-1$

  /** Header "type " */
  public static final byte[] type = Constants.encodeASCII("type "); //$NON-NLS-1$

  /** Header "tag " */
  public static final byte[] tag = Constants.encodeASCII("tag "); //$NON-NLS-1$

  /** Header "tagger " */
  public static final byte[] tagger = Constants.encodeASCII("tagger "); //$NON-NLS-1$

  private final MutableObjectId tempId = new MutableObjectId();

  private final MutableInteger ptrout = new MutableInteger();

  private boolean allowZeroMode;
  private boolean windows;
  private boolean macosx;

  /**
   * Enable accepting leading zero mode in tree entries.
   * <p>
   * Some broken Git libraries generated leading zeros in the mode part of
   * tree entries. This is technically incorrect but gracefully allowed by
   * git-core. JGit rejects such trees by default, but may need to accept
   * them on broken histories.
   *
   * @param allow allow leading zero mode.
   * @return {@code this}.
   * @since 3.4
   */
  public ObjectChecker setAllowLeadingZeroFileMode(boolean allow) {
    allowZeroMode = allow;
    return this;
  }

  /**
   * Restrict trees to only names legal on Windows platforms.
   * <p>
   * Also rejects any mixed case forms of reserved names ({@code .git}).
   *
   * @param win true if Windows name checking should be performed.
   * @return {@code this}.
   * @since 3.4
   */
  public ObjectChecker setSafeForWindows(boolean win) {
    windows = win;
    return this;
  }

  /**
   * Restrict trees to only names legal on Mac OS X platforms.
   * <p>
   * Rejects any mixed case forms of reserved names ({@code .git})
   * for users working on HFS+ in case-insensitive (default) mode.
   *
   * @param mac true if Mac OS X name checking should be performed.
   * @return {@code this}.
   * @since 3.4
   */
  public ObjectChecker setSafeForMacOS(boolean mac) {
    macosx = mac;
    return this;
  }

  /**
   * Check an object for parsing errors.
   *
   * @param objType
   *            type of the object. Must be a valid object type code in
   *            {@link Constants}.
   * @param raw
   *            the raw data which comprises the object. This should be in the
   *            canonical format (that is the format used to generate the
   *            ObjectId of the object). The array is never modified.
   * @throws CorruptObjectException
   *             if an error is identified.
   */
  public void check(final int objType, final byte[] raw)
      throws CorruptObjectException {
    switch (objType) {
    case Constants.OBJ_COMMIT:
      checkCommit(raw);
      break;
    case Constants.OBJ_TAG:
      checkTag(raw);
      break;
    case Constants.OBJ_TREE:
      checkTree(raw);
      break;
    case Constants.OBJ_BLOB:
      checkBlob(raw);
      break;
    default:
      throw new CorruptObjectException(MessageFormat.format(
          JGitText.get().corruptObjectInvalidType2,
          Integer.valueOf(objType)));
    }
  }

  private int id(final byte[] raw, final int ptr) {
    try {
      tempId.fromString(raw, ptr);
      return ptr + Constants.OBJECT_ID_STRING_LENGTH;
    } catch (IllegalArgumentException e) {
      return -1;
    }
  }

  private int personIdent(final byte[] raw, int ptr) {
    final int emailB = nextLF(raw, ptr, '<');
    if (emailB == ptr || raw[emailB - 1] != '<')
      return -1;

    final int emailE = nextLF(raw, emailB, '>');
    if (emailE == emailB || raw[emailE - 1] != '>')
      return -1;
    if (emailE == raw.length || raw[emailE] != ' ')
      return -1;

    parseBase10(raw, emailE + 1, ptrout); // when
    ptr = ptrout.value;
    if (emailE + 1 == ptr)
      return -1;
    if (ptr == raw.length || raw[ptr] != ' ')
      return -1;

    parseBase10(raw, ptr + 1, ptrout); // tz offset
    if (ptr + 1 == ptrout.value)
      return -1;
    return ptrout.value;
  }

  /**
   * Check a commit for errors.
   *
   * @param raw
   *            the commit data. The array is never modified.
   * @throws CorruptObjectException
   *             if any error was detected.
   */
  public void checkCommit(final byte[] raw) throws CorruptObjectException {
    int ptr = 0;

    if ((ptr = match(raw, ptr, tree)) < 0)
      throw new CorruptObjectException("no tree header");
    if ((ptr = id(raw, ptr)) < 0 || raw[ptr++] != '\n')
      throw new CorruptObjectException("invalid tree");

    while (match(raw, ptr, parent) >= 0) {
      ptr += parent.length;
      if ((ptr = id(raw, ptr)) < 0 || raw[ptr++] != '\n')
        throw new CorruptObjectException("invalid parent");
    }

    if ((ptr = match(raw, ptr, author)) < 0)
      throw new CorruptObjectException("no author");
    if ((ptr = personIdent(raw, ptr)) < 0 || raw[ptr++] != '\n')
      throw new CorruptObjectException("invalid author");

    if ((ptr = match(raw, ptr, committer)) < 0)
      throw new CorruptObjectException("no committer");
    if ((ptr = personIdent(raw, ptr)) < 0 || raw[ptr++] != '\n')
      throw new CorruptObjectException("invalid committer");
  }

  /**
   * Check an annotated tag for errors.
   *
   * @param raw
   *            the tag data. The array is never modified.
   * @throws CorruptObjectException
   *             if any error was detected.
   */
  public void checkTag(final byte[] raw) throws CorruptObjectException {
    int ptr = 0;

    if ((ptr = match(raw, ptr, object)) < 0)
      throw new CorruptObjectException("no object header");
    if ((ptr = id(raw, ptr)) < 0 || raw[ptr++] != '\n')
      throw new CorruptObjectException("invalid object");

    if ((ptr = match(raw, ptr, type)) < 0)
      throw new CorruptObjectException("no type header");
    ptr = nextLF(raw, ptr);

    if ((ptr = match(raw, ptr, tag)) < 0)
      throw new CorruptObjectException("no tag header");
    ptr = nextLF(raw, ptr);

    if ((ptr = match(raw, ptr, tagger)) > 0) {
      if ((ptr = personIdent(raw, ptr)) < 0 || raw[ptr++] != '\n')
        throw new CorruptObjectException("invalid tagger");
    }
  }

  private static int lastPathChar(final int mode) {
    return FileMode.TREE.equals(mode) ? '/' : '\0';
  }

  private static int pathCompare(final byte[] raw, int aPos, final int aEnd,
      final int aMode, int bPos, final int bEnd, final int bMode) {
    while (aPos < aEnd && bPos < bEnd) {
      final int cmp = (raw[aPos++] & 0xff) - (raw[bPos++] & 0xff);
      if (cmp != 0)
        return cmp;
    }

    if (aPos < aEnd)
      return (raw[aPos] & 0xff) - lastPathChar(bMode);
    if (bPos < bEnd)
      return lastPathChar(aMode) - (raw[bPos] & 0xff);
    return 0;
  }

  private static boolean duplicateName(final byte[] raw,
      final int thisNamePos, final int thisNameEnd) {
    final int sz = raw.length;
    int nextPtr = thisNameEnd + 1 + Constants.OBJECT_ID_LENGTH;
    for (;;) {
      int nextMode = 0;
      for (;;) {
        if (nextPtr >= sz)
          return false;
        final byte c = raw[nextPtr++];
        if (' ' == c)
          break;
        nextMode <<= 3;
        nextMode += c - '0';
      }

      final int nextNamePos = nextPtr;
      for (;;) {
        if (nextPtr == sz)
          return false;
        final byte c = raw[nextPtr++];
        if (c == 0)
          break;
      }
      if (nextNamePos + 1 == nextPtr)
        return false;

      final int cmp = pathCompare(raw, thisNamePos, thisNameEnd,
          FileMode.TREE.getBits(), nextNamePos, nextPtr - 1, nextMode);
      if (cmp < 0)
        return false;
      else if (cmp == 0)
        return true;

      nextPtr += Constants.OBJECT_ID_LENGTH;
    }
  }

  /**
   * Check a canonical formatted tree for errors.
   *
   * @param raw
   *            the raw tree data. The array is never modified.
   * @throws CorruptObjectException
   *             if any error was detected.
   */
  public void checkTree(final byte[] raw) throws CorruptObjectException {
    final int sz = raw.length;
    int ptr = 0;
    int lastNameB = 0, lastNameE = 0, lastMode = 0;
    Set<String> normalized = windows || macosx
        ? new HashSet<String>()
        : null;

    while (ptr < sz) {
      int thisMode = 0;
      for (;;) {
        if (ptr == sz)
          throw new CorruptObjectException("truncated in mode");
        final byte c = raw[ptr++];
        if (' ' == c)
          break;
        if (c < '0' || c > '7')
          throw new CorruptObjectException("invalid mode character");
        if (thisMode == 0 && c == '0' && !allowZeroMode)
          throw new CorruptObjectException("mode starts with '0'");
        thisMode <<= 3;
        thisMode += c - '0';
      }

      if (FileMode.fromBits(thisMode).getObjectType() == Constants.OBJ_BAD)
        throw new CorruptObjectException("invalid mode " + thisMode);

      final int thisNameB = ptr;
      ptr = scanPathSegment(raw, ptr, sz);
      if (ptr == sz || raw[ptr] != 0)
        throw new CorruptObjectException("truncated in name");
      checkPathSegment2(raw, thisNameB, ptr);
      if (normalized != null) {
        if (!normalized.add(normalize(raw, thisNameB, ptr)))
          throw new CorruptObjectException("duplicate entry names");
      } else if (duplicateName(raw, thisNameB, ptr))
        throw new CorruptObjectException("duplicate entry names");

      if (lastNameB != 0) {
        final int cmp = pathCompare(raw, lastNameB, lastNameE,
            lastMode, thisNameB, ptr, thisMode);
        if (cmp > 0)
          throw new CorruptObjectException("incorrectly sorted");
      }

      lastNameB = thisNameB;
      lastNameE = ptr;
      lastMode = thisMode;

      ptr += 1 + Constants.OBJECT_ID_LENGTH;
      if (ptr > sz)
        throw new CorruptObjectException("truncated in object id");
    }
  }

  private int scanPathSegment(byte[] raw, int ptr, int end)
      throws CorruptObjectException {
    for (; ptr < end; ptr++) {
      byte c = raw[ptr];
      if (c == 0)
        return ptr;
      if (c == '/')
        throw new CorruptObjectException("name contains '/'");
      if (windows && isInvalidOnWindows(c)) {
        if (c > 31)
          throw new CorruptObjectException(String.format(
              "name contains '%c'", c));
        throw new CorruptObjectException(String.format(
            "name contains byte 0x%x", c & 0xff));
      }
    }
    return ptr;
  }

  /**
   * Check tree path entry for validity.
   *
   * @param raw buffer to scan.
   * @param ptr offset to first byte of the name.
   * @param end offset to one past last byte of name.
   * @throws CorruptObjectException name is invalid.
   * @since 3.4
   */
  public void checkPathSegment(byte[] raw, int ptr, int end)
      throws CorruptObjectException {
    int e = scanPathSegment(raw, ptr, end);
    if (e < end && raw[e] == 0)
      throw new CorruptObjectException("name contains byte 0x00");
    checkPathSegment2(raw, ptr, end);
  }

  private void checkPathSegment2(byte[] raw, int ptr, int end)
      throws CorruptObjectException {
    if (ptr == end)
      throw new CorruptObjectException("zero length name");
    if (raw[ptr] == '.') {
      switch (end - ptr) {
      case 1:
        throw new CorruptObjectException("invalid name '.'");
      case 2:
        if (raw[ptr + 1] == '.')
          throw new CorruptObjectException("invalid name '..'");
        break;
      case 4:
        if (isDotGit(raw, ptr + 1))
          throw new CorruptObjectException(String.format(
              "invalid name '%s'",
              RawParseUtils.decode(raw, ptr, end)));
      }
    }

    if (windows) {
      // Windows ignores space and dot at end of file name.
      if (raw[end - 1] == ' ' || raw[end - 1] == '.')
        throw new CorruptObjectException("invalid name ends with '"
            + ((char) raw[end - 1]) + "'");
      if (end - ptr >= 3)
        checkNotWindowsDevice(raw, ptr, end);
    }
  }

  private static void checkNotWindowsDevice(byte[] raw, int ptr, int end)
      throws CorruptObjectException {
    switch (toLower(raw[ptr])) {
    case 'a': // AUX
      if (end - ptr >= 3
          && toLower(raw[ptr + 1]) == 'u'
          && toLower(raw[ptr + 2]) == 'x'
          && (end - ptr == 3 || raw[ptr + 3] == '.'))
        throw new CorruptObjectException("invalid name 'AUX'");
      break;

    case 'c': // CON, COM[1-9]
      if (end - ptr >= 3
          && toLower(raw[ptr + 2]) == 'n'
          && toLower(raw[ptr + 1]) == 'o'
          && (end - ptr == 3 || raw[ptr + 3] == '.'))
        throw new CorruptObjectException("invalid name 'CON'");
      if (end - ptr >= 4
          && toLower(raw[ptr + 2]) == 'm'
          && toLower(raw[ptr + 1]) == 'o'
          && isPositiveDigit(raw[ptr + 3])
          && (end - ptr == 4 || raw[ptr + 4] == '.'))
        throw new CorruptObjectException("invalid name 'COM"
            + ((char) raw[ptr + 3]) + "'");
      break;

    case 'l': // LPT[1-9]
      if (end - ptr >= 4
          && toLower(raw[ptr + 1]) == 'p'
          && toLower(raw[ptr + 2]) == 't'
          && isPositiveDigit(raw[ptr + 3])
          && (end - ptr == 4 || raw[ptr + 4] == '.'))
        throw new CorruptObjectException("invalid name 'LPT"
            + ((char) raw[ptr + 3]) + "'");
      break;

    case 'n': // NUL
      if (end - ptr >= 3
          && toLower(raw[ptr + 1]) == 'u'
          && toLower(raw[ptr + 2]) == 'l'
          && (end - ptr == 3 || raw[ptr + 3] == '.'))
        throw new CorruptObjectException("invalid name 'NUL'");
      break;

    case 'p': // PRN
      if (end - ptr >= 3
          && toLower(raw[ptr + 1]) == 'r'
          && toLower(raw[ptr + 2]) == 'n'
          && (end - ptr == 3 || raw[ptr + 3] == '.'))
        throw new CorruptObjectException("invalid name 'PRN'");
      break;
    }
  }

  private static boolean isInvalidOnWindows(byte c) {
    // Windows disallows "special" characters in a path component.
    switch (c) {
    case '"':
    case '*':
    case ':':
    case '<':
    case '>':
    case '?':
    case '\\':
    case '|':
      return true;
    }
    return 1 <= c && c <= 31;
  }

  private boolean isDotGit(byte[] buf, int p) {
    if (windows || macosx)
      return toLower(buf[p]) == 'g'
          && toLower(buf[p + 1]) == 'i'
          && toLower(buf[p + 2]) == 't';
    return buf[p] == 'g' && buf[p + 1] == 'i' && buf[p + 2] == 't';
  }

  private static char toLower(byte b) {
    if ('A' <= b && b <= 'Z')
      return (char) (b + ('a' - 'A'));
    return (char) b;
  }

  private static boolean isPositiveDigit(byte b) {
    return '1' <= b && b <= '9';
  }

  /**
   * Check a blob for errors.
   *
   * @param raw
   *            the blob data. The array is never modified.
   * @throws CorruptObjectException
   *             if any error was detected.
   */
  public void checkBlob(final byte[] raw) throws CorruptObjectException {
    // We can always assume the blob is valid.
  }

  private String normalize(byte[] raw, int ptr, int end) {
    String n = RawParseUtils.decode(raw, ptr, end).toLowerCase(Locale.US);
    return macosx ? Normalizer.normalize(n) : n;
  }

  private static class Normalizer {
    // TODO Simplify invocation to Normalizer after dropping Java 5.
    private static final Method normalize;
    private static final Object nfc;
    static {
      Method method;
      Object formNfc;
      try {
        Class<?> formClazz = Class.forName("java.text.Normalizer$Form"); //$NON-NLS-1$
        formNfc = formClazz.getField("NFC").get(null); //$NON-NLS-1$
        method = Class.forName("java.text.Normalizer") //$NON-NLS-1$
          .getMethod("normalize", CharSequence.class, formClazz); //$NON-NLS-1$
      } catch (ClassNotFoundException e) {
        method = null;
        formNfc = null;
      } catch (NoSuchFieldException e) {
        method = null;
        formNfc = null;
      } catch (NoSuchMethodException e) {
        method = null;
        formNfc = null;
      } catch (SecurityException e) {
        method = null;
        formNfc = null;
      } catch (IllegalArgumentException e) {
        method = null;
        formNfc = null;
      } catch (IllegalAccessException e) {
        method = null;
        formNfc = null;
      }
      normalize = method;
      nfc = formNfc;
    }

    static String normalize(String in) {
      if (normalize == null)
        return in;
      try {
        return (String) normalize.invoke(null, in, nfc);
      } catch (IllegalAccessException e) {
        return in;
      } catch (InvocationTargetException e) {
        if (e.getCause() instanceof RuntimeException)
          throw (RuntimeException) e.getCause();
        if (e.getCause() instanceof Error)
          throw (Error) e.getCause();
        return in;
      }
    }
  }
}
TOP

Related Classes of org.eclipse.jgit.lib.ObjectChecker$Normalizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.