Source Code of org.rascalmpl.unicode.UnicodeInputStreamReader

/*******************************************************************************
 * Copyright (c) 2009-2013 CWI
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:


 *   * Davy Landman  - Davy.Landman@cwi.nl - CWI
*******************************************************************************/
package org.rascalmpl.unicode;


import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;




public class UnicodeInputStreamReader extends Reader {
  private Reader wrapped;
  private InputStream original;
  private String encoding;
  public UnicodeInputStreamReader(InputStream in)  {
    original = in;
  }
  public UnicodeInputStreamReader(InputStream in, String encoding) {
    original = in;
    this.encoding = encoding;
  }
  
  public UnicodeInputStreamReader(InputStream in, Charset charset) {
    this(in, charset == null ? null : charset.name());
  }
  @Override
  public int read(char[] cbuf, int off, int len) throws IOException {
    if (wrapped == null) {
      if (encoding != null) {
        // we have an encoding, so lets just skip the possible BOM
        wrapped = removeBOM(original, encoding);
        original = null;
      }
      else {
        // we have to try and detect the decoding
        wrapped = detectCharset(original);
        original = null;
      }
    }
    return wrapped.read(cbuf, off, len);
  }


  @Override
  public void close() throws IOException {
    if (wrapped != null) {
      wrapped.close();
    }
    else {
      original.close();
    }
  }
  
  private static Reader removeBOM(InputStream in, String encoding) throws IOException {
    byte[] detectionBuffer = new byte[UnicodeDetector.getMaximumBOMLength()];
    int bufferSize = in.read(detectionBuffer);
    ByteOrderMarker b = UnicodeDetector.detectBom(detectionBuffer, bufferSize);
    if (b != null) {
      Charset ref = Charset.forName(encoding);
      if (UnicodeDetector.isAmbigiousBOM(b.getCharset(), ref)) {
        b = ByteOrderMarker.fromString(encoding);
      }
      if (b.getCharset().equals(ref) || b.getGroup().equals(ref)) {
        InputStream prefix = new ByteArrayInputStream(detectionBuffer, b.getHeaderLength(), bufferSize - b.getHeaderLength());
        return new InputStreamReader(new ConcatInputStream(prefix, in), b.getCharset());
      }
      else {
        throw new UnsupportedEncodingException("The requested encoding was " + encoding + " but the file contained a BOM for " + b.getCharset().name() + ".");
      }
    }
    else {
      InputStream prefix = new ByteArrayInputStream(detectionBuffer, 0, bufferSize);
      return new InputStreamReader(new ConcatInputStream(prefix, in), encoding);
    }
  }


  private static Reader detectCharset(InputStream in) throws IOException {
    byte[] detectionBuffer = new byte[UnicodeDetector.getSuggestedDetectionSampleSize()];
    int bufferSize = in.read(detectionBuffer);
    ByteOrderMarker b =UnicodeDetector.detectBom(detectionBuffer, bufferSize);
    if (b != null) {
      // we have to remove the BOM from the front
      InputStream prefix = new ByteArrayInputStream(detectionBuffer, b.getHeaderLength(), bufferSize - b.getHeaderLength());
      return new InputStreamReader(new ConcatInputStream(prefix, in), b.getCharset());
    }
    Charset cs = UnicodeDetector.detectByContent(detectionBuffer, bufferSize);
    if (cs == null) {
      cs = Charset.defaultCharset();
    }
    InputStream prefix = new ByteArrayInputStream(detectionBuffer, 0, bufferSize);
    return new InputStreamReader(new ConcatInputStream(prefix, in), cs);
  }
}
Source Code of org.rascalmpl.unicode.UnicodeInputStreamReader

Related Classes of org.rascalmpl.unicode.UnicodeInputStreamReader