Package org.rascalmpl.unicode

Source Code of org.rascalmpl.unicode.UnicodeInputStreamReader

/*******************************************************************************
* Copyright (c) 2009-2013 CWI
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:

*   * Davy Landman  - Davy.Landman@cwi.nl - CWI
*******************************************************************************/
package org.rascalmpl.unicode;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;


public class UnicodeInputStreamReader extends Reader {
  private Reader wrapped;
  private InputStream original;
  private String encoding;
  public UnicodeInputStreamReader(InputStream in)  {
    original = in;
  }
  public UnicodeInputStreamReader(InputStream in, String encoding) {
    original = in;
    this.encoding = encoding;
  }
 
  public UnicodeInputStreamReader(InputStream in, Charset charset) {
    this(in, charset == null ? null : charset.name());
  }
  @Override
  public int read(char[] cbuf, int off, int len) throws IOException {
    if (wrapped == null) {
      if (encoding != null) {
        // we have an encoding, so lets just skip the possible BOM
        wrapped = removeBOM(original, encoding);
        original = null;
      }
      else {
        // we have to try and detect the decoding
        wrapped = detectCharset(original);
        original = null;
      }
    }
    return wrapped.read(cbuf, off, len);
  }

  @Override
  public void close() throws IOException {
    if (wrapped != null) {
      wrapped.close();
    }
    else {
      original.close();
    }
  }
 
  private static Reader removeBOM(InputStream in, String encoding) throws IOException {
    byte[] detectionBuffer = new byte[UnicodeDetector.getMaximumBOMLength()];
    int bufferSize = in.read(detectionBuffer);
    ByteOrderMarker b = UnicodeDetector.detectBom(detectionBuffer, bufferSize);
    if (b != null) {
      Charset ref = Charset.forName(encoding);
      if (UnicodeDetector.isAmbigiousBOM(b.getCharset(), ref)) {
        b = ByteOrderMarker.fromString(encoding);
      }
      if (b.getCharset().equals(ref) || b.getGroup().equals(ref)) {
        InputStream prefix = new ByteArrayInputStream(detectionBuffer, b.getHeaderLength(), bufferSize - b.getHeaderLength());
        return new InputStreamReader(new ConcatInputStream(prefix, in), b.getCharset());
      }
      else {
        throw new UnsupportedEncodingException("The requested encoding was " + encoding + " but the file contained a BOM for " + b.getCharset().name() + ".");
      }
    }
    else {
      InputStream prefix = new ByteArrayInputStream(detectionBuffer, 0, bufferSize);
      return new InputStreamReader(new ConcatInputStream(prefix, in), encoding);
    }
  }

  private static Reader detectCharset(InputStream in) throws IOException {
    byte[] detectionBuffer = new byte[UnicodeDetector.getSuggestedDetectionSampleSize()];
    int bufferSize = in.read(detectionBuffer);
    ByteOrderMarker b =UnicodeDetector.detectBom(detectionBuffer, bufferSize);
    if (b != null) {
      // we have to remove the BOM from the front
      InputStream prefix = new ByteArrayInputStream(detectionBuffer, b.getHeaderLength(), bufferSize - b.getHeaderLength());
      return new InputStreamReader(new ConcatInputStream(prefix, in), b.getCharset());
    }
    Charset cs = UnicodeDetector.detectByContent(detectionBuffer, bufferSize);
    if (cs == null) {
      cs = Charset.defaultCharset();
    }
    InputStream prefix = new ByteArrayInputStream(detectionBuffer, 0, bufferSize);
    return new InputStreamReader(new ConcatInputStream(prefix, in), cs);
  }
}
TOP

Related Classes of org.rascalmpl.unicode.UnicodeInputStreamReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.