/**
* (The MIT License)
*
* Copyright (c) 2008 - 2012:
*
* * {Aaron Patterson}[http://tenderlovemaking.com]
* * {Mike Dalessio}[http://mike.daless.io]
* * {Charles Nutter}[http://blog.headius.com]
* * {Sergio Arbeo}[http://www.serabe.com]
* * {Patrick Mahoney}[http://polycrystal.org]
* * {Yoko Harada}[http://yokolet.blogspot.com]
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* 'Software'), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package nokogiri.internals;
import static nokogiri.internals.NokogiriHelpers.rubyStringToString;
import static org.jruby.runtime.Helpers.invoke;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.URI;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.RubyIO;
import org.jruby.RubyObject;
import org.jruby.RubyString;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;
import org.jruby.util.TypeConverter;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* Base class for the various parser contexts. Handles converting
* Ruby objects to InputSource objects.
*
* @author Patrick Mahoney <pat@polycrystal.org>
* @author Yoko Harada <yokolet@gmail.com>
*/
public class ParserContext extends RubyObject {
protected InputSource source = null;
protected IRubyObject detected_encoding = null;
protected int stringDataSize = -1;
public ParserContext(Ruby runtime) {
// default to class 'Object' because this class isn't exposed to Ruby
super(runtime, runtime.getObject());
}
public ParserContext(Ruby runtime, RubyClass klass) {
super(runtime, klass);
}
protected InputSource getInputSource() {
return source;
}
/**
* Set the InputSource from <code>url</code> or <code>data</code>,
* which may be an IO object, a String, or a StringIO.
*/
public void setInputSource(ThreadContext context, IRubyObject data, IRubyObject url) {
source = new InputSource();
Ruby ruby = context.getRuntime();
ParserContext.setUrl(context, source, url);
// if setEncoding returned true, then the stream is set
// to the EncodingReaderInputStream
if (setEncoding(context, data))
return;
RubyString stringData = null;
if (invoke(context, data, "respond_to?",
ruby.newSymbol("to_io").to_sym()).isTrue()) {
/* IO or other object that responds to :to_io */
RubyIO io =
(RubyIO) TypeConverter.convertToType(data,
ruby.getIO(),
"to_io");
// use unclosedable input stream to fix #495
source.setByteStream(new UncloseableInputStream(io.getInStream()));
} else {
if (invoke(context, data, "respond_to?",
ruby.newSymbol("string").to_sym()).isTrue()) {
/* StringIO or other object that responds to :string */
stringData = invoke(context, data, "string").convertToString();
} else if (data instanceof RubyString) {
stringData = (RubyString) data;
} else {
throw ruby.newArgumentError(
"must be kind_of String or respond to :to_io or :string");
}
}
if (stringData != null) {
String encName = null;
if (stringData.encoding(context) != null) {
encName = stringData.encoding(context).toString();
}
Charset charset = null;
if (encName != null) {
try {
charset = Charset.forName(encName);
} catch (UnsupportedCharsetException e) {
// do nothing;
}
}
ByteList bytes = stringData.getByteList();
if (charset != null) {
StringReader reader = new StringReader(new String(bytes.unsafeBytes(), bytes.begin(), bytes.length(), charset));
source.setCharacterStream(reader);
source.setEncoding(charset.name());
} else {
stringDataSize = bytes.length() - bytes.begin();
ByteArrayInputStream stream = new ByteArrayInputStream(bytes.unsafeBytes(), bytes.begin(), bytes.length());
source.setByteStream(stream);
}
}
}
public static void setUrl(ThreadContext context, InputSource source, IRubyObject url) {
String path = rubyStringToString(url);
// Dir.chdir might be called at some point before this.
if (path != null) {
try {
URI uri = URI.create(path);
source.setSystemId(uri.toURL().toString());
} catch (Exception ex) {
// fallback to the old behavior
File file = new File(path);
if (file.isAbsolute()) {
source.setSystemId(path);
} else {
String pwd = context.getRuntime().getCurrentDirectory();
String absolutePath;
try {
absolutePath = new File(pwd, path).getCanonicalPath();
} catch (IOException e) {
absolutePath = new File(pwd, path).getAbsolutePath();
}
source.setSystemId(absolutePath);
}
}
}
}
private boolean setEncoding(ThreadContext context, IRubyObject data) {
if (data.getType().respondsTo("detect_encoding")) {
// in case of EncodingReader is used
// since EncodingReader won't respond to :to_io
NokogiriEncodingReaderWrapper reader = new NokogiriEncodingReaderWrapper(context, (RubyObject) data);
source.setByteStream(reader);
// data is EnocodingReader
if(reader.detectEncoding()) {
detected_encoding = reader.getEncoding();
source.setEncoding(detected_encoding.asJavaString());
}
return true;
}
return false;
}
/**
* Set the InputSource to read from <code>file</code>, a String filename.
*/
public void setInputSourceFile(ThreadContext context, IRubyObject file) {
source = new InputSource();
ParserContext.setUrl(context, source, file);
}
/**
* Set the InputSource from <code>stream</code>.
*/
public void setInputSource(InputStream stream) {
source = new InputSource(stream);
}
/**
* Wrap Nokogiri parser options in a utility class. This is
* read-only.
*/
public static class Options {
protected static final long STRICT = 0;
protected static final long RECOVER = 1;
protected static final long NOENT = 2;
protected static final long DTDLOAD = 4;
protected static final long DTDATTR = 8;
protected static final long DTDVALID = 16;
protected static final long NOERROR = 32;
protected static final long NOWARNING = 64;
protected static final long PEDANTIC = 128;
protected static final long NOBLANKS = 256;
protected static final long SAX1 = 512;
protected static final long XINCLUDE = 1024;
protected static final long NONET = 2048;
protected static final long NODICT = 4096;
protected static final long NSCLEAN = 8192;
protected static final long NOCDATA = 16384;
protected static final long NOXINCNODE = 32768;
public boolean strict;
public boolean recover;
public boolean noEnt;
public boolean dtdLoad;
public boolean dtdAttr;
public boolean dtdValid;
public boolean noError;
public boolean noWarning;
public boolean pedantic;
public boolean noBlanks;
public boolean sax1;
public boolean xInclude;
public boolean noNet;
public boolean noDict;
public boolean nsClean;
public boolean noCdata;
public boolean noXIncNode;
protected static boolean test(long options, long mask) {
return ((options & mask) == mask);
}
public Options(long options) {
strict = ((options & RECOVER) == STRICT);
recover = test(options, RECOVER);
noEnt = test(options, NOENT);
dtdLoad = test(options, DTDLOAD);
dtdAttr = test(options, DTDATTR);
dtdValid = test(options, DTDVALID);
noError = test(options, NOERROR);
noWarning = test(options, NOWARNING);
pedantic = test(options, PEDANTIC);
noBlanks = test(options, NOBLANKS);
sax1 = test(options, SAX1);
xInclude = test(options, XINCLUDE);
noNet = test(options, NONET);
noDict = test(options, NODICT);
nsClean = test(options, NSCLEAN);
noCdata = test(options, NOCDATA);
noXIncNode = test(options, NOXINCNODE);
}
}
public static class NokogiriXInlcudeEntityResolver implements org.xml.sax.EntityResolver {
InputSource source;
public NokogiriXInlcudeEntityResolver(InputSource source) {
this.source = source;
}
@Override
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException {
if (systemId != null) source.setSystemId(systemId);
if (publicId != null) source.setPublicId(publicId);
return source;
}
}
}