Package org.tmatesoft.hg.internal

Source Code of org.tmatesoft.hg.internal.EncodingHelper

/*
* Copyright (c) 2011-2013 TMate Software Ltd
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* For information on how to redistribute this software under
* the terms of a license other than GNU General Public License
* contact TMate Software at support@hg4j.com
*/
package org.tmatesoft.hg.internal;

import static org.tmatesoft.hg.util.LogFacility.Severity.Error;

import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;

import org.tmatesoft.hg.core.SessionContext;
import org.tmatesoft.hg.repo.HgInvalidStateException;

/**
* Keep all encoding-related issues in the single place
* NOT thread-safe (encoder and decoder requires synchronized access)
*
* @see http://mercurial.selenic.com/wiki/EncodingStrategy
* @see http://mercurial.selenic.com/wiki/WindowsUTF8Plan
* @see http://mercurial.selenic.com/wiki/CharacterEncodingOnWindows
* @author Artem Tikhomirov
* @author TMate Software Ltd.
*/
public class EncodingHelper {
  /*
   * To understand what Mercurial thinks of UTF-8 and Unix byte approach to names, see
   * http://mercurial.808500.n3.nabble.com/Unicode-support-request-td3430704.html
   */
 
  private final SessionContext sessionContext;
  private final CharsetEncoder encoder;
  private final CharsetDecoder decoder;
  private final CharsetEncoder utfEncoder;
  private final CharsetDecoder utfDecoder;
 
  EncodingHelper(Charset fsEncoding, SessionContext.Source ctx) {
    sessionContext = ctx.getSessionContext();
    decoder = fsEncoding.newDecoder();
    encoder = fsEncoding.newEncoder();
    Charset utf8 = getUTF8();
    if (fsEncoding.equals(utf8)) {
      utfDecoder = decoder;
      utfEncoder = encoder;
    } else {
      utfDecoder = utf8.newDecoder();
      utfEncoder = utf8.newEncoder();
    }
  }

  /**
   * Translate file names from manifest to amazing Unicode string
   */
  public String fromManifest(byte[] data, int start, int length) {
    return decodeWithSystemDefaultFallback(data, start, length);
  }
 
  /**
   * @return byte representation of the string directly comparable to bytes in manifest
   */
  public byte[] toManifest(CharSequence s) {
    if (s == null) {
      // perhaps, can return byte[0] in this case?
      throw new IllegalArgumentException();
    }
    return toArray(encodeWithSystemDefaultFallback(s));
  }

  /**
   * Translate file names from dirstate to amazing Unicode string
   */
  public String fromDirstate(byte[] data, int start, int length) {
    return decodeWithSystemDefaultFallback(data, start, length);
  }
 
  public byte[] toDirstate(CharSequence fname) {
    if (fname == null) {
      throw new IllegalArgumentException();
    }
    return toArray(encodeWithSystemDefaultFallback(fname));
  }
 
  /**
   * prepare filename to be serialized into fncache file
   */
  public ByteBuffer toFNCache(CharSequence fname) {
    return encodeWithSystemDefaultFallback(fname);
  }
 
  public byte[] toBundle(CharSequence fname) {
    // yes, mercurial transfers filenames in local encoding
    // so that if your local encoding doesn't match that on server,
    // and you use native characters, you'd likely fail
    return toArray(encodeWithSystemDefaultFallback(fname));
  }
  public String fromBundle(byte[] data, int start, int length) {
    return decodeWithSystemDefaultFallback(data, start, length);
  }
 
 
  public String userFromChangeset(byte[] data, int start, int length) {
    return decodeUnicodeWithFallback(data, start, length);
  }
 
  public String commentFromChangeset(byte[] data, int start, int length) {
    return decodeUnicodeWithFallback(data, start, length);
  }
 
  public String fileFromChangeset(byte[] data, int start, int length) {
    return decodeWithSystemDefaultFallback(data, start, length);
  }

  public byte[] userToChangeset(CharSequence user) {
    return toArray(encodeUnicode(user));
  }
 
  public byte[] commentToChangeset(CharSequence comment) {
    return toArray(encodeUnicode(comment));
  }
 
  public byte[] fileToChangeset(CharSequence file) {
    return toArray(encodeWithSystemDefaultFallback(file));
  }
 
  private String decodeWithSystemDefaultFallback(byte[] data, int start, int length) {
    try {
      return decoder.decode(ByteBuffer.wrap(data, start, length)).toString();
    } catch (CharacterCodingException ex) {
      sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name()));
      // resort to system-default
      return new String(data, start, length);
    }
  }
 
  private ByteBuffer encodeWithSystemDefaultFallback(CharSequence s) {
    try {
      // synchronized(encoder) {
      return encoder.encode(CharBuffer.wrap(s));
      // }
    } catch (CharacterCodingException ex) {
      sessionContext.getLog().dump(getClass(), Error, ex, String.format("Use of charset %s failed, resort to system default", charset().name()));
      // resort to system-default
      return ByteBuffer.wrap(s.toString().getBytes());
    }
  }

  private byte[] toArray(ByteBuffer bb) {
    byte[] rv;
    if (bb.hasArray() && bb.arrayOffset() == 0) {
      rv = bb.array();
      if (rv.length == bb.remaining()) {
        return rv;
      }
      // fall through
    }
    rv = new byte[bb.remaining()];
    bb.get(rv, 0, rv.length);
    return rv;
  }

  private String decodeUnicodeWithFallback(byte[] data, int start, int length) {
    try {
      return utfDecoder.decode(ByteBuffer.wrap(data, start, length)).toString();
    } catch (CharacterCodingException ex) {
      // TODO post-1.2 respect ui.fallbackencoding actual setting
      try {
        return new String(data, start, length, "ISO-8859-1"); // XXX java5
      } catch (UnsupportedEncodingException e) {
        throw new HgInvalidStateException(ex.getMessage());
      }
    }
  }
 
  private ByteBuffer encodeUnicode(CharSequence s) {
    //
    try {
      return utfEncoder.encode(CharBuffer.wrap(s));
    } catch (CharacterCodingException ex) {
      byte[] rv;
      try {
        rv = s.toString().getBytes(getUTF8().name()); // XXX Java 1.5
      } catch (UnsupportedEncodingException e) {
        throw new HgInvalidStateException("Unexpected error trying to get UTF-8 encoding");
      }
      return ByteBuffer.wrap(rv);
    }
  }

  private Charset charset() {
    return encoder.charset();
  }

  public static Charset getUTF8() {
    return Charset.forName("UTF-8");
  }
}
TOP

Related Classes of org.tmatesoft.hg.internal.EncodingHelper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.