Package nokogiri.internals

Source Code of nokogiri.internals.NokogiriEncodingReaderWrapper

package nokogiri.internals;

import java.io.IOException;
import java.io.InputStream;

import org.jruby.Ruby;
import org.jruby.RubyObject;
import org.jruby.RubyProcess.Sys;
import org.jruby.exceptions.RaiseException;
import org.jruby.javasupport.util.RuntimeHelpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;

/**
* This class wraps the EncodingReader which act like a rewinding input stream,
* it tries to read the first 1K of data to detect the encoding, but save
* this data in a buffer for the subsequent read. Unfortunately, the EncodingReader
* will behave as expected only if encoding was detected, otherwise, the read data
* won't be stored and EncodingReader will fallback to read directory from the io stream.
* this is kind of lame, since we need to have similar logic in both layers. The alternative
* is to implement the encoding detection similar to the way C-Nokogiri does it; it starts
* parsing assuming encoding is unknown and if encoding is detected it will throw an exception
* causing parsing to stop, in which case we have to intercept the exception and set the encoding.
* Also in this case we don't have to restart the parsing since html/document.rb does that for us.
*
* @author John Shahid <jvshahid@gmail.com>
*
*/
public class NokogiriEncodingReaderWrapper extends InputStream {
  private final ThreadContext context;
  private final IRubyObject   encodingReader;
  private final Ruby          ruby;
  private IRubyObject         detectedEncoding;
  private final byte[]        firstChunk       = new byte[1024];
  private int                 firstChunkOff    = 0;
  private int                 firstChunkLength = 0;

  public NokogiriEncodingReaderWrapper(ThreadContext context, RubyObject encodingReader) {
    this.context = context;
    this.encodingReader = encodingReader;
    this.ruby = context.getRuntime();

    if (!RuntimeHelpers.invoke(context, encodingReader, "respond_to?", ruby.newSymbol("read").to_sym()).isTrue()
        || encodingReader.getInstanceVariable("@io") == null) {
      throw ruby.newArgumentError("Argument doesn't respond to read or doesn't have instance variable @io");
    }
  }

  public boolean detectEncoding() {
    try {
      firstChunkLength = read(firstChunk);
    } catch (RaiseException e) {
      detectedEncoding = e.getException().getInstanceVariable("@found_encoding");
      return true;
    }
    detectedEncoding = context.nil;
    return false;
  }

  public IRubyObject getEncoding() {
    return detectedEncoding;
  }

  @Override
  public int read(byte b[]) {
    return read(b, 0, b.length);
  }

  @Override
  public int read(byte b[], int off, int len) {
    if (b == null) {
      throw new NullPointerException();
    } else if (off < 0 || len < 0 || len > b.length - off) {
      throw new IndexOutOfBoundsException();
    } else if (len == 0) {
      return 0;
    }

    int copyLength = Math.min(firstChunkLength - firstChunkOff, len);
    if (copyLength > 0) {
      System.arraycopy(firstChunk, firstChunkOff, b, off, copyLength);
      len -= copyLength;
      firstChunkOff += copyLength;
    }

    if (len <= 0)
      return copyLength;

    IRubyObject returnValue = encodingReader.callMethod(context, "read", ruby.newFixnum(len));
    if (returnValue.isNil())
      return -1;

    ByteList bytes = returnValue.asString().getByteList();
    int length = bytes.length();
    System.arraycopy(bytes.unsafeBytes(), bytes.getBegin(), b, off + copyLength, length);
    return length + copyLength;
  }

  @Override
  public int read() {
    byte[] bytes = new byte[1];
    int count = read(bytes, 0, 1);
    if (count < 1)
      return count;
    return bytes[0];
  }

}
TOP

Related Classes of nokogiri.internals.NokogiriEncodingReaderWrapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.