/***** BEGIN LICENSE BLOCK *****
* Version: CPL 1.0/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Common Public
* License Version 1.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.eclipse.org/legal/cpl-v10.html
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* Copyright (C) 2002-2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
* Copyright (C) 2002-2004 Anders Bengtsson <ndrsbngtssn@yahoo.se>
* Copyright (C) 2003-2004 Thomas E Enebo <enebo@acm.org>
* Copyright (C) 2004 Charles O Nutter <headius@headius.com>
* Copyright (C) 2004 Stefan Matthias Aust <sma@3plus4.de>
* Copyright (C) 2005 Derek Berner <derek.berner@state.nm.us>
* Copyright (C) 2006 Evan Buswell <ebuswell@gmail.com>
* Copyright (C) 2007 Nick Sieger <nicksieger@gmail.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the CPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the CPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****/
package org.jruby.util;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.UTF8Encoding;
import org.jcodings.unicode.UnicodeEncoding;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyBignum;
import org.jruby.RubyFloat;
import org.jruby.RubyKernel;
import org.jruby.RubyNumeric;
import org.jruby.RubyString;
import org.jruby.runtime.builtin.IRubyObject;
public class Pack {
private static final byte[] sSp10 = " ".getBytes();
private static final byte[] sNil10 = "\000\000\000\000\000\000\000\000\000\000".getBytes();
private static final int IS_STAR = -1;
private static final ASCIIEncoding ASCII = ASCIIEncoding.INSTANCE;
/** Native pack type.
**/
private static final String NATIVE_CODES = "sSiIlL";
private static final String sTooFew = "too few arguments";
private static final byte[] hex_table;
private static final byte[] uu_table;
private static final byte[] b64_table;
private static final byte[] sHexDigits;
private static final int[] b64_xtable = new int[256];
private static final Converter[] converters = new Converter[256];
private static final BigInteger QUAD_MIN = BigInteger.valueOf(Long.MIN_VALUE);
private static final BigInteger QUAD_MAX = new BigInteger("ffffffffffffffff", 16);
/*
* convert into longs, returning unsigned 64-bit values as signed longs
* ( num2long raises a RangeError on values > Long.MAX_VALUE )
*/
private static long num2quad(IRubyObject arg) {
if (arg == arg.getRuntime().getNil()) {
return 0L;
}
else if (arg instanceof RubyBignum) {
BigInteger big = ((RubyBignum)arg).getValue();
if (big.compareTo(QUAD_MIN) < 0 || big.compareTo(QUAD_MAX) > 0) {
throw arg.getRuntime().newRangeError("bignum too big to convert into `quad int'");
}
return big.longValue();
}
return RubyNumeric.num2long(arg);
}
static {
hex_table = ByteList.plain("0123456789ABCDEF");
uu_table =
ByteList.plain("`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_");
b64_table =
ByteList.plain("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
sHexDigits = ByteList.plain("0123456789abcdef0123456789ABCDEFx");
// b64_xtable for decoding Base 64
for (int i = 0; i < 256; i++) {
b64_xtable[i] = -1;
}
for (int i = 0; i < 64; i++) {
b64_xtable[(int)b64_table[i]] = i;
}
// short, little-endian (network)
converters['v'] = new Converter(2) {
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
return runtime.newFixnum(
decodeShortUnsignedLittleEndian(enc));
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
int s = o == runtime.getNil() ? 0 : (int) (RubyNumeric.num2long(o) & 0xffff);
encodeShortLittleEndian(result, s);
}};
// single precision, little-endian
converters['e'] = new Converter(4) {
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
return RubyFloat.newFloat(runtime, decodeFloatLittleEndian(enc));
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
float f = o == runtime.getNil() ? 0 : (float) RubyKernel.new_float(o,o).convertToFloat().getDoubleValue();
encodeFloatLittleEndian(result, f);
}};
Converter tmp = new Converter(4) {
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
return RubyFloat.newFloat(runtime, decodeFloatBigEndian(enc));
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
float f = o == runtime.getNil() ? 0 : (float) RubyKernel.new_float(o,o).convertToFloat().getDoubleValue();
encodeFloatBigEndian(result, f);
}
};
converters['F'] = tmp; // single precision, native
converters['f'] = tmp; // single precision, native
converters['g'] = tmp; // single precision, native
// double precision, little-endian
converters['E'] = new Converter(8) {
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
return RubyFloat.newFloat(runtime, decodeDoubleLittleEndian(enc));
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
double d = o == runtime.getNil() ? 0 : RubyKernel.new_float(o,o).convertToFloat().getDoubleValue();
encodeDoubleLittleEndian(result, d);
}};
tmp = new Converter(8) {
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
return RubyFloat.newFloat(runtime, decodeDoubleBigEndian(enc));
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
double d = o == runtime.getNil() ? 0 : RubyKernel.new_float(o,o).convertToFloat().getDoubleValue();
encodeDoubleBigEndian(result, d);
}
};
converters['D'] = tmp; // double precision native
converters['d'] = tmp; // double precision native
converters['G'] = tmp; // double precision bigendian
converters['s'] = new Converter(2) { // signed short
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
return runtime.newFixnum(decodeShortBigEndian(enc));
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
int s;
if (o == runtime.getNil()) {
s = 0;
} else {
long l = RubyNumeric.num2long(o);
// MRI 1.8 behavior:
if (l == Long.MIN_VALUE || Math.abs(l) > 0xffffffffL) {
throw runtime.newRangeError("bignum too big to convert into 'unsigned long'");
}
s = (int)(l & 0xffff);
}
encodeShortBigEndian(result, s);
}};
tmp = new Converter(2) {
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
return runtime.newFixnum(
decodeShortUnsignedBigEndian(enc));
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
int s = o == runtime.getNil() ? 0 : (int) (RubyNumeric.num2long(o) & 0xffff);
encodeShortBigEndian(result, s);
}
};
converters['S'] = tmp; // unsigned short
converters['n'] = tmp; // short network
converters['c'] = new Converter(1) { // signed char
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
int c = enc.get();
return runtime.newFixnum(c > (char) 127 ? c-256 : c);
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
byte c = o == runtime.getNil() ? 0 : (byte) (RubyNumeric.num2long(o) & 0xff);
result.append(c);
}};
converters['C'] = new Converter(1) { // unsigned char
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
return runtime.newFixnum(enc.get() & 0xFF);
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
byte c = o == runtime.getNil() ? 0 : (byte) (RubyNumeric.num2long(o) & 0xff);
result.append(c);
}};
// long, little-endian
converters['V'] = new Converter(4) {
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
return runtime.newFixnum(
decodeIntUnsignedLittleEndian(enc));
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
int s = o == runtime.getNil() ? 0 : (int) RubyNumeric.num2long(o);
encodeIntLittleEndian(result, s);
}};
tmp = new Converter(4) {
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
return runtime.newFixnum(
decodeIntUnsignedBigEndian(enc));
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
int s = o == runtime.getNil() ? 0 : (int) RubyNumeric.num2long(o);
encodeIntBigEndian(result, s);
}
};
converters['I'] = tmp; // unsigned int, native
converters['L'] = tmp; // unsigned long (bugs?)
converters['N'] = tmp; // long, network
tmp = new Converter(4) {
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
return runtime.newFixnum(decodeIntBigEndian(enc));
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
int s;
if (o == runtime.getNil()) {
s = 0;
} else {
long l = RubyNumeric.num2long(o);
// MRI 1.8 behavior:
if (l == Long.MIN_VALUE || Math.abs(l) > 0xffffffffL) {
throw runtime.newRangeError("bignum too big to convert into 'unsigned long'");
}
s = (int)l;
}
encodeIntBigEndian(result, s);
}
};
converters['l'] = tmp; // long, native
converters['i'] = tmp; // int, native
tmp = new Converter(8) {
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
long l = decodeLongBigEndian(enc);
return RubyBignum.bignorm(runtime,BigInteger.valueOf(l).and(
new BigInteger("FFFFFFFFFFFFFFFF", 16)));
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
long l = num2quad(o);
encodeLongBigEndian(result, l);
}
};
converters['Q'] = tmp;
tmp = new Converter(8) {
public IRubyObject decode(Ruby runtime, ByteBuffer enc) {
return runtime.newFixnum(decodeLongBigEndian(enc));
}
public void encode(Ruby runtime, IRubyObject o, ByteList result){
long l = num2quad(o);
encodeLongBigEndian(result, l);
}
};
converters['q'] = tmp;
}
/**
* encodes a String in base64 or its uuencode variant.
* appends the result of the encoding in a StringBuffer
* @param io2Append The StringBuffer which should receive the result
* @param i2Encode The String to encode
* @param iLength The max number of characters to encode
* @param iType the type of encoding required (this is the same type as used by the pack method)
* @return the io2Append buffer
**/
private static ByteList encodes(Ruby runtime, ByteList io2Append,byte[]charsToEncode, int startIndex, int length, int charCount, byte encodingType) {
charCount = charCount < length ? charCount : length;
io2Append.ensure(charCount * 4 / 3 + 6);
int i = startIndex;
byte[] lTranslationTable = encodingType == 'u' ? uu_table : b64_table;
byte lPadding;
if (encodingType == 'u') {
if (charCount >= lTranslationTable.length) {
throw runtime.newArgumentError(
""
+ charCount
+ " is not a correct value for the number of bytes per line in a u directive. Correct values range from 0 to "
+ lTranslationTable.length);
}
io2Append.append(lTranslationTable[charCount]);
lPadding = '`';
} else {
lPadding = '=';
}
while (charCount >= 3) {
byte lCurChar = charsToEncode[i++];
byte lNextChar = charsToEncode[i++];
byte lNextNextChar = charsToEncode[i++];
io2Append.append(lTranslationTable[077 & (lCurChar >>> 2)]);
io2Append.append(lTranslationTable[077 & (((lCurChar << 4) & 060) | ((lNextChar >>> 4) & 017))]);
io2Append.append(lTranslationTable[077 & (((lNextChar << 2) & 074) | ((lNextNextChar >>> 6) & 03))]);
io2Append.append(lTranslationTable[077 & lNextNextChar]);
charCount -= 3;
}
if (charCount == 2) {
byte lCurChar = charsToEncode[i++];
byte lNextChar = charsToEncode[i++];
io2Append.append(lTranslationTable[077 & (lCurChar >>> 2)]);
io2Append.append(lTranslationTable[077 & (((lCurChar << 4) & 060) | ((lNextChar >> 4) & 017))]);
io2Append.append(lTranslationTable[077 & (((lNextChar << 2) & 074) | (('\0' >> 6) & 03))]);
io2Append.append(lPadding);
} else if (charCount == 1) {
byte lCurChar = charsToEncode[i++];
io2Append.append(lTranslationTable[077 & (lCurChar >>> 2)]);
io2Append.append(lTranslationTable[077 & (((lCurChar << 4) & 060) | (('\0' >>> 4) & 017))]);
io2Append.append(lPadding);
io2Append.append(lPadding);
}
io2Append.append('\n');
return io2Append;
}
/**
* encodes a String with the Quoted printable, MIME encoding (see RFC2045).
* appends the result of the encoding in a StringBuffer
* @param io2Append The StringBuffer which should receive the result
* @param i2Encode The String to encode
* @param iLength The max number of characters to encode
* @return the io2Append buffer
**/
private static ByteList qpencode(ByteList io2Append, ByteList i2Encode, int iLength) {
io2Append.ensure(1024);
int lCurLineLength = 0;
int lPrevChar = -1;
byte[] l2Encode = i2Encode.bytes;
try {
int end = i2Encode.begin + i2Encode.realSize;
for (int i = i2Encode.begin; i < end; i++) {
int lCurChar = l2Encode[i] & 0xff;
if (lCurChar > 126 || (lCurChar < 32 && lCurChar != '\n' && lCurChar != '\t') || lCurChar == '=') {
io2Append.append('=');
io2Append.append(hex_table[lCurChar >>> 4]);
io2Append.append(hex_table[lCurChar & 0x0f]);
lCurLineLength += 3;
lPrevChar = -1;
} else if (lCurChar == '\n') {
if (lPrevChar == ' ' || lPrevChar == '\t') {
io2Append.append('=');
io2Append.append(lCurChar);
}
io2Append.append(lCurChar);
lCurLineLength = 0;
lPrevChar = lCurChar;
} else {
io2Append.append(lCurChar);
lCurLineLength++;
lPrevChar = lCurChar;
}
if (lCurLineLength > iLength) {
io2Append.append('=');
io2Append.append('\n');
lCurLineLength = 0;
lPrevChar = '\n';
}
}
} catch (ArrayIndexOutOfBoundsException e) {
//normal exit, this should be faster than a test at each iterations for string with more than
//about 40 char
}
if (lCurLineLength > 0) {
io2Append.append('=');
io2Append.append('\n');
}
return io2Append;
}
/**
* Decodes <i>str</i> (which may contain binary data) according to the format
* string, returning an array of each value extracted.
* The format string consists of a sequence of single-character directives.<br/>
* Each directive may be followed by a number, indicating the number of times to repeat with this directive. An asterisk (``<code>*</code>'') will use up all
* remaining elements. <br/>
* The directives <code>sSiIlL</code> may each be followed by an underscore (``<code>_</code>'') to use the underlying platform's native size for the specified type; otherwise, it uses a platform-independent consistent size. <br/>
* Spaces are ignored in the format string.
* @see RubyArray#pack
* <table border="2" width="500" bgcolor="#ffe0e0">
* <tr>
* <td>
* <P></P>
* <b>Directives for <a href="ref_c_string.html#String.unpack">
* <code>String#unpack</code>
* </a>
* </b> <table class="codebox" cellspacing="0" border="0" cellpadding="3">
* <tr bgcolor="#ff9999">
* <td valign="top">
* <b>Format</b>
* </td>
* <td valign="top">
* <b>Function</b>
* </td>
* <td valign="top">
* <b>Returns</b>
* </td>
* </tr>
* <tr>
* <td valign="top">A</td>
* <td valign="top">String with trailing nulls and spaces removed.</td>
* <td valign="top">String</td>
* </tr>
* <tr>
* <td valign="top">a</td>
* <td valign="top">String.</td>
* <td valign="top">String</td>
* </tr>
* <tr>
* <td valign="top">B</td>
* <td valign="top">Extract bits from each character (msb first).</td>
* <td valign="top">String</td>
* </tr>
* <tr>
* <td valign="top">b</td>
* <td valign="top">Extract bits from each character (lsb first).</td>
* <td valign="top">String</td>
* </tr>
* <tr>
* <td valign="top">C</td>
* <td valign="top">Extract a character as an unsigned integer.</td>
* <td valign="top">Fixnum</td>
* </tr>
* <tr>
* <td valign="top">c</td>
* <td valign="top">Extract a character as an integer.</td>
* <td valign="top">Fixnum</td>
* </tr>
* <tr>
* <td valign="top">d</td>
* <td valign="top">Treat <em>sizeof(double)</em> characters as a native
* double.</td>
* <td valign="top">Float</td>
* </tr>
* <tr>
* <td valign="top">E</td>
* <td valign="top">Treat <em>sizeof(double)</em> characters as a double in
* little-endian byte order.</td>
* <td valign="top">Float</td>
* </tr>
* <tr>
* <td valign="top">e</td>
* <td valign="top">Treat <em>sizeof(float)</em> characters as a float in
* little-endian byte order.</td>
* <td valign="top">Float</td>
* </tr>
* <tr>
* <td valign="top">f</td>
* <td valign="top">Treat <em>sizeof(float)</em> characters as a native float.</td>
* <td valign="top">Float</td>
* </tr>
* <tr>
* <td valign="top">G</td>
* <td valign="top">Treat <em>sizeof(double)</em> characters as a double in
* network byte order.</td>
* <td valign="top">Float</td>
* </tr>
* <tr>
* <td valign="top">g</td>
* <td valign="top">Treat <em>sizeof(float)</em> characters as a float in
* network byte order.</td>
* <td valign="top">Float</td>
* </tr>
* <tr>
* <td valign="top">H</td>
* <td valign="top">Extract hex nibbles from each character (most
* significant first).</td>
* <td valign="top">String</td>
* </tr>
* <tr>
* <td valign="top">h</td>
* <td valign="top">Extract hex nibbles from each character (least
* significant first).</td>
* <td valign="top">String</td>
* </tr>
* <tr>
* <td valign="top">I</td>
* <td valign="top">Treat <em>sizeof(int)</em>
* <sup>1</sup> successive
* characters as an unsigned native integer.</td>
* <td valign="top">Integer</td>
* </tr>
* <tr>
* <td valign="top">i</td>
* <td valign="top">Treat <em>sizeof(int)</em>
* <sup>1</sup> successive
* characters as a signed native integer.</td>
* <td valign="top">Integer</td>
* </tr>
* <tr>
* <td valign="top">L</td>
* <td valign="top">Treat four<sup>1</sup> successive
* characters as an unsigned native
* long integer.</td>
* <td valign="top">Integer</td>
* </tr>
* <tr>
* <td valign="top">l</td>
* <td valign="top">Treat four<sup>1</sup> successive
* characters as a signed native
* long integer.</td>
* <td valign="top">Integer</td>
* </tr>
* <tr>
* <td valign="top">M</td>
* <td valign="top">Extract a quoted-printable string.</td>
* <td valign="top">String</td>
* </tr>
* <tr>
* <td valign="top">m</td>
* <td valign="top">Extract a base64 encoded string.</td>
* <td valign="top">String</td>
* </tr>
* <tr>
* <td valign="top">N</td>
* <td valign="top">Treat four characters as an unsigned long in network
* byte order.</td>
* <td valign="top">Fixnum</td>
* </tr>
* <tr>
* <td valign="top">n</td>
* <td valign="top">Treat two characters as an unsigned short in network
* byte order.</td>
* <td valign="top">Fixnum</td>
* </tr>
* <tr>
* <td valign="top">P</td>
* <td valign="top">Treat <em>sizeof(char *)</em> characters as a pointer, and
* return <em>len</em> characters from the referenced location.</td>
* <td valign="top">String</td>
* </tr>
* <tr>
* <td valign="top">p</td>
* <td valign="top">Treat <em>sizeof(char *)</em> characters as a pointer to a
* null-terminated string.</td>
* <td valign="top">String</td>
* </tr>
* <tr>
* <td valign="top">S</td>
* <td valign="top">Treat two<sup>1</sup> successive characters as an unsigned
* short in
* native byte order.</td>
* <td valign="top">Fixnum</td>
* </tr>
* <tr>
* <td valign="top">s</td>
* <td valign="top">Treat two<sup>1</sup> successive
* characters as a signed short in
* native byte order.</td>
* <td valign="top">Fixnum</td>
* </tr>
* <tr>
* <td valign="top">U</td>
* <td valign="top">Extract UTF-8 characters as unsigned integers.</td>
* <td valign="top">Integer</td>
* </tr>
* <tr>
* <td valign="top">u</td>
* <td valign="top">Extract a UU-encoded string.</td>
* <td valign="top">String</td>
* </tr>
* <tr>
* <td valign="top">V</td>
* <td valign="top">Treat four characters as an unsigned long in little-endian
* byte order.</td>
* <td valign="top">Fixnum</td>
* </tr>
* <tr>
* <td valign="top">v</td>
* <td valign="top">Treat two characters as an unsigned short in little-endian
* byte order.</td>
* <td valign="top">Fixnum</td>
* </tr>
* <tr>
* <td valign="top">X</td>
* <td valign="top">Skip backward one character.</td>
* <td valign="top">---</td>
* </tr>
* <tr>
* <td valign="top">x</td>
* <td valign="top">Skip forward one character.</td>
* <td valign="top">---</td>
* </tr>
* <tr>
* <td valign="top">Z</td>
* <td valign="top">String with trailing nulls removed.</td>
* <td valign="top">String</td>
* </tr>
* <tr>
* <td valign="top">@</td>
* <td valign="top">Skip to the offset given by the length argument.</td>
* <td valign="top">---</td>
* </tr>
* <tr>
* <td colspan="9" bgcolor="#ff9999" height="2"><img src="dot.gif" width="1" height="1"></td>
* </tr>
* </table>
* <P></P>
* <sup>1</sup> May be modified by appending ``_'' to the directive.
* <P></P>
* </td>
* </tr>
* </table>
*
**/
public static RubyArray unpack(Ruby runtime, ByteList encodedString, ByteList formatString) {
RubyArray result = runtime.newArray();
// FIXME: potentially could just use ByteList here?
ByteBuffer format = ByteBuffer.wrap(formatString.unsafeBytes(), formatString.begin(), formatString.length());
ByteBuffer encode = ByteBuffer.wrap(encodedString.unsafeBytes(), encodedString.begin(), encodedString.length());
int type = 0;
int next = safeGet(format);
while (next != 0) {
type = next;
next = safeGet(format);
// Next indicates to decode using native encoding format
if (next == '_' || next == '!') {
if (NATIVE_CODES.indexOf(type) == -1) {
throw runtime.newArgumentError("'" + next + "' allowed only after types " + NATIVE_CODES);
}
next = safeGet(format);
}
// How many occurrences of 'type' we want
int occurrences = 0;
if (next == 0) {
occurrences = 1;
} else {
if (next == '*') {
occurrences = IS_STAR;
next = safeGet(format);
} else if (ASCII.isDigit(next)) {
occurrences = 0;
do {
occurrences = occurrences * 10 + Character.digit((char)(next & 0xFF), 10);
next = safeGet(format);
} while (next != 0 && ASCII.isDigit(next));
} else {
occurrences = type == '@' ? 0 : 1;
}
}
// See if we have a converter for the job...
Converter converter = converters[type];
if (converter != null) {
decode(runtime, encode, occurrences, result, converter);
type = next;
continue;
}
// Otherwise the unpack should be here...
switch (type) {
case '@' :
try {
if (occurrences == IS_STAR) {
encode.position(encode.remaining());
} else {
encode.position(occurrences);
}
} catch (IllegalArgumentException iae) {
throw runtime.newArgumentError("@ outside of string");
}
break;
case '%' :
throw runtime.newArgumentError("% is not supported");
case 'A' :
{
if (occurrences == IS_STAR || occurrences > encode.remaining()) {
occurrences = encode.remaining();
}
byte[] potential = new byte[occurrences];
encode.get(potential);
for (int t = occurrences - 1; occurrences > 0; occurrences--, t--) {
byte c = potential[t];
if (c != '\0' && c != ' ') {
break;
}
}
result.append(RubyString.newString(runtime, new ByteList(potential, 0, occurrences,false)));
}
break;
case 'Z' :
{
boolean isStar = (occurrences == IS_STAR);
if (occurrences == IS_STAR || occurrences > encode.remaining()) {
occurrences = encode.remaining();
}
byte[] potential = new byte[occurrences];
int t = 0;
while (t < occurrences) {
byte b = encode.get();
if (b == 0) {
break;
}
potential[t] = b;
t++;
}
result.append(RubyString.newString(runtime, new ByteList(potential, 0, t, false)));
// In case when the number of occurences is
// explicitly specified, we have to read up
// the remaining garbage after the '\0' to
// satisfy the requested pattern.
if (!isStar) {
if (t < occurrences) {
// We encountered '\0' when
// were reading the buffer above,
// increment the number of read bytes.
t++;
}
while (t < occurrences) {
encode.get();
t++;
}
}
}
break;
case 'a' :
if (occurrences == IS_STAR || occurrences > encode.remaining()) {
occurrences = encode.remaining();
}
byte[] potential = new byte[occurrences];
encode.get(potential);
result.append(RubyString.newString(runtime, new ByteList(potential, false)));
break;
case 'b' :
{
if (occurrences == IS_STAR || occurrences > encode.remaining() * 8) {
occurrences = encode.remaining() * 8;
}
int bits = 0;
byte[] lElem = new byte[occurrences];
for (int lCurByte = 0; lCurByte < occurrences; lCurByte++) {
if ((lCurByte & 7) != 0) {
bits >>>= 1;
} else {
bits = encode.get();
}
lElem[lCurByte] = (bits & 1) != 0 ? (byte)'1' : (byte)'0';
}
result.append(RubyString.newString(runtime, new ByteList(lElem, false)));
}
break;
case 'B' :
{
if (occurrences == IS_STAR || occurrences > encode.remaining() * 8) {
occurrences = encode.remaining() * 8;
}
int bits = 0;
byte[] lElem = new byte[occurrences];
for (int lCurByte = 0; lCurByte < occurrences; lCurByte++) {
if ((lCurByte & 7) != 0)
bits <<= 1;
else
bits = encode.get();
lElem[lCurByte] = (bits & 128) != 0 ? (byte)'1' : (byte)'0';
}
result.append(RubyString.newString(runtime, new ByteList(lElem, false)));
}
break;
case 'h' :
{
if (occurrences == IS_STAR || occurrences > encode.remaining() * 2) {
occurrences = encode.remaining() * 2;
}
int bits = 0;
byte[] lElem = new byte[occurrences];
for (int lCurByte = 0; lCurByte < occurrences; lCurByte++) {
if ((lCurByte & 1) != 0) {
bits >>>= 4;
} else {
bits = encode.get();
}
lElem[lCurByte] = sHexDigits[bits & 15];
}
result.append(RubyString.newString(runtime, new ByteList(lElem, false)));
}
break;
case 'H' :
{
if (occurrences == IS_STAR || occurrences > encode.remaining() * 2) {
occurrences = encode.remaining() * 2;
}
int bits = 0;
byte[] lElem = new byte[occurrences];
for (int lCurByte = 0; lCurByte < occurrences; lCurByte++) {
if ((lCurByte & 1) != 0)
bits <<= 4;
else
bits = encode.get();
lElem[lCurByte] = sHexDigits[(bits >>> 4) & 15];
}
result.append(RubyString.newString(runtime, new ByteList(lElem, false)));
}
break;
case 'u':
{
int length = encode.remaining() * 3 / 4;
byte[] lElem = new byte[length];
int index = 0;
int s;
int total = 0;
s = encode.get();
while (encode.hasRemaining() && s > ' ' && s < 'a') {
int a, b, c, d;
byte[] hunk = new byte[3];
int len = (s - ' ') & 077;
s = safeGet(encode);
total += len;
if (total > length) {
len -= total - length;
total = length;
}
while (len > 0) {
int mlen = len > 3 ? 3 : len;
if (encode.hasRemaining() && s >= ' ') {
a = (s - ' ') & 077;
s = safeGet(encode);
} else
a = 0;
if (encode.hasRemaining() && s >= ' ') {
b = (s - ' ') & 077;
s = safeGet(encode);
} else
b = 0;
if (encode.hasRemaining() && s >= ' ') {
c = (s - ' ') & 077;
s = safeGet(encode);
} else
c = 0;
if (encode.hasRemaining() && s >= ' ') {
d = (s - ' ') & 077;
s = safeGet(encode);
} else
d = 0;
hunk[0] = (byte)((a << 2 | b >> 4) & 255);
hunk[1] = (byte)((b << 4 | c >> 2) & 255);
hunk[2] = (byte)((c << 6 | d) & 255);
for (int i = 0; i < mlen; i++) lElem[index++] = hunk[i];
len -= mlen;
}
if (s == '\r')
s = safeGet(encode);
if (s == '\n')
s = safeGet(encode);
else if (encode.hasRemaining()) {
if (safeGet(encode) == '\n') {
safeGet(encode); // Possible Checksum Byte
} else if (encode.hasRemaining()) {
encode.position(encode.position() - 1);
}
}
}
result.append(RubyString.newString(runtime, new ByteList(lElem, 0, index, false)));
}
break;
case 'm':
{
int length = encode.remaining()*3/4;
byte[] lElem = new byte[length];
int a = -1, b = -1, c = 0, d;
int index = 0;
int s = -1;
while (encode.hasRemaining()) {
a = b = c = d = -1;
// obtain a
s = safeGet(encode);
while (((a = b64_xtable[s]) == -1) && encode.hasRemaining()) {
s = safeGet(encode);
}
if (a == -1) break;
// obtain b
s = safeGet(encode);
while (((b = b64_xtable[s]) == -1) && encode.hasRemaining()) {
s = safeGet(encode);
}
if (b == -1) break;
// obtain c
s = safeGet(encode);
while (((c = b64_xtable[s]) == -1) && encode.hasRemaining()) {
if (s == '=') break;
s = safeGet(encode);
}
if ((s == '=') || c == -1) {
if (s == '=') {
encode.position(encode.position() - 1);
}
break;
}
// obtain d
s = safeGet(encode);
while (((d = b64_xtable[s]) == -1) && encode.hasRemaining()) {
if (s == '=') break;
s = safeGet(encode);
}
if ((s == '=') || d == -1) {
if (s == '=') {
encode.position(encode.position() - 1);
}
break;
}
// calculate based on a, b, c and d
lElem[index++] = (byte)((a << 2 | b >> 4) & 255);
lElem[index++] = (byte)((b << 4 | c >> 2) & 255);
lElem[index++] = (byte)((c << 6 | d) & 255);
}
if (a != -1 && b != -1) {
if (c == -1 && s == '=') {
lElem[index++] = (byte)((a << 2 | b >> 4) & 255);
} else if(c != -1 && s == '=') {
lElem[index++] = (byte)((a << 2 | b >> 4) & 255);
lElem[index++] = (byte)((b << 4 | c >> 2) & 255);
}
}
result.append(RubyString.newString(runtime, new ByteList(lElem, 0, index, false)));
}
break;
case 'M' :
{
byte[] lElem = new byte[Math.max(encode.remaining(),0)];
int index = 0;
for(;;) {
if (!encode.hasRemaining()) break;
byte c = safeGet(encode);
if (c != '=') {
lElem[index++] = c;
} else {
if (!encode.hasRemaining()) break;
encode.mark();
byte c1 = safeGet(encode);
if (c1 == '\n') continue;
int d1 = Character.digit((char)(c1 & 0xFF), 16);
if (d1 == -1) {
encode.reset();
break;
}
encode.mark();
if (!encode.hasRemaining()) break;
byte c2 = safeGet(encode);
int d2 = Character.digit((char)(c2 & 0xFF), 16);
if (d2 == -1) {
encode.reset();
break;
}
byte value = (byte)(d1 << 4 | d2);
lElem[index++] = value;
}
}
result.append(RubyString.newString(runtime, new ByteList(lElem, 0, index, false)));
}
break;
case 'U' :
{
if (occurrences == IS_STAR || occurrences > encode.remaining()) {
occurrences = encode.remaining();
}
while (occurrences-- > 0 && encode.remaining() > 0) {
try {
// TODO: for now, we use a faithful
// reimplementation of MRI's algorithm,
// but should use UTF8Encoding facilities
// from Joni, once it starts prefroming
// UTF-8 content validation.
result.append(
runtime.newFixnum(utf8Decode(encode)));
} catch (IllegalArgumentException e) {
throw runtime.newArgumentError(e.getMessage());
}
}
}
break;
case 'X':
if (occurrences == IS_STAR) {
// MRI behavior: Contrary to what seems to be logical,
// when '*' is given, MRI calculates the distance
// to the end, in order to go backwards.
occurrences = /*encode.limit() - */encode.remaining();
}
try {
encode.position(encode.position() - occurrences);
} catch (IllegalArgumentException e) {
throw runtime.newArgumentError("in `unpack': X outside of string");
}
break;
case 'x':
if (occurrences == IS_STAR) {
occurrences = encode.remaining();
}
try {
encode.position(encode.position() + occurrences);
} catch (IllegalArgumentException e) {
throw runtime.newArgumentError("in `unpack': x outside of string");
}
break;
}
}
return result;
}
/** utf8_to_uv
*/
private static int utf8Decode(ByteBuffer buffer) {
int c = buffer.get() & 0xFF;
int uv = c;
int n;
if ((c & 0x80) == 0) {
return c;
}
if ((c & 0x40) == 0) {
throw new IllegalArgumentException("malformed UTF-8 character");
}
if ((uv & 0x20) == 0) { n = 2; uv &= 0x1f; }
else if ((uv & 0x10) == 0) { n = 3; uv &= 0x0f; }
else if ((uv & 0x08) == 0) { n = 4; uv &= 0x07; }
else if ((uv & 0x04) == 0) { n = 5; uv &= 0x03; }
else if ((uv & 0x02) == 0) { n = 6; uv &= 0x01; }
else {
throw new IllegalArgumentException("malformed UTF-8 character");
}
if (n > buffer.remaining() + 1) {
throw new IllegalArgumentException(
"malformed UTF-8 character (expected " + n + " bytes, "
+ "given " + (buffer.remaining() + 1) + " bytes)");
}
int limit = n - 1;
n--;
if (n != 0) {
while (n-- != 0) {
c = buffer.get() & 0xff;
if ((c & 0xc0) != 0x80) {
throw new IllegalArgumentException("malformed UTF-8 character");
}
else {
c &= 0x3f;
uv = uv << 6 | c;
}
}
}
if (uv < utf8_limits[limit]) {
throw new IllegalArgumentException("redundant UTF-8 sequence");
}
return uv;
}
private static final long utf8_limits[] = {
0x0, /* 1 */
0x80, /* 2 */
0x800, /* 3 */
0x10000, /* 4 */
0x200000, /* 5 */
0x4000000, /* 6 */
0x80000000, /* 7 */
};
private static byte safeGet(ByteBuffer encode) {
return encode.hasRemaining() ? encode.get() : 0;
}
public static void decode(Ruby runtime, ByteBuffer encode, int occurrences,
RubyArray result, Converter converter) {
int lPadLength = 0;
if (occurrences == IS_STAR) {
occurrences = encode.remaining() / converter.size;
} else if (occurrences > encode.remaining() / converter.size) {
lPadLength = occurrences - encode.remaining() / converter.size;
occurrences = encode.remaining() / converter.size;
}
for (; occurrences-- > 0;) {
result.append(converter.decode(runtime, encode));
}
// MRI behavior: for 'Q', do not add trailing nils
if (converter != converters['Q']) {
for (; lPadLength-- > 0;)
result.append(runtime.getNil());
}
}
public static int encode(Ruby runtime, int occurrences, ByteList result,
RubyArray list, int index, Converter converter) {
int listSize = list.size();
while (occurrences-- > 0) {
if (listSize-- <= 0) {
throw runtime.newArgumentError(sTooFew);
}
IRubyObject from = list.eltInternal(index++);
converter.encode(runtime, from, result);
}
return index;
}
public abstract static class Converter {
public int size;
public Converter(int size) {
this.size = size;
}
public abstract IRubyObject decode(Ruby runtime, ByteBuffer format);
public abstract void encode(Ruby runtime, IRubyObject from, ByteList result);
}
/**
* shrinks a stringbuffer.
* shrinks a stringbuffer by a number of characters.
* @param i2Shrink the stringbuffer
* @param iLength how much to shrink
* @return the stringbuffer
**/
private static final ByteList shrink(ByteList i2Shrink, int iLength) {
iLength = i2Shrink.length() - iLength;
if (iLength < 0) {
throw new IllegalArgumentException();
}
i2Shrink.length(iLength);
return i2Shrink;
}
/**
* grows a stringbuffer.
* uses the Strings to pad the buffer for a certain length
* @param i2Grow the buffer to grow
* @param iPads the string used as padding
* @param iLength how much padding is needed
* @return the padded buffer
**/
private static final ByteList grow(ByteList i2Grow, byte[]iPads, int iLength) {
int lPadLength = iPads.length;
while (iLength >= lPadLength) {
i2Grow.append(iPads);
iLength -= lPadLength;
}
i2Grow.append(iPads, 0, iLength);
return i2Grow;
}
/**
* pack_pack
*
* Template characters for Array#pack Directive Meaning
* <table class="codebox" cellspacing="0" border="0" cellpadding="3">
* <tr bgcolor="#ff9999">
* <td valign="top">
* <b>Directive</b>
* </td>
* <td valign="top">
* <b>Meaning</b>
* </td>
* </tr>
* <tr>
* <td valign="top">@</td>
* <td valign="top">Moves to absolute position</td>
* </tr>
* <tr>
* <td valign="top">A</td>
* <td valign="top">ASCII string (space padded, count is width)</td>
* </tr>
* <tr>
* <td valign="top">a</td>
* <td valign="top">ASCII string (null padded, count is width)</td>
* </tr>
* <tr>
* <td valign="top">B</td>
* <td valign="top">Bit string (descending bit order)</td>
* </tr>
* <tr>
* <td valign="top">b</td>
* <td valign="top">Bit string (ascending bit order)</td>
* </tr>
* <tr>
* <td valign="top">C</td>
* <td valign="top">Unsigned char</td>
* </tr>
* <tr>
* <td valign="top">c</td>
* <td valign="top">Char</td>
* </tr>
* <tr>
* <td valign="top">d</td>
* <td valign="top">Double-precision float, native format</td>
* </tr>
* <tr>
* <td valign="top">E</td>
* <td valign="top">Double-precision float, little-endian byte order</td>
* </tr>
* <tr>
* <td valign="top">e</td>
* <td valign="top">Single-precision float, little-endian byte order</td>
* </tr>
* <tr>
* <td valign="top">f</td>
* <td valign="top">Single-precision float, native format</td>
* </tr>
* <tr>
* <td valign="top">G</td>
* <td valign="top">Double-precision float, network (big-endian) byte order</td>
* </tr>
* <tr>
* <td valign="top">g</td>
* <td valign="top">Single-precision float, network (big-endian) byte order</td>
* </tr>
* <tr>
* <td valign="top">H</td>
* <td valign="top">Hex string (high nibble first)</td>
* </tr>
* <tr>
* <td valign="top">h</td>
* <td valign="top">Hex string (low nibble first)</td>
* </tr>
* <tr>
* <td valign="top">I</td>
* <td valign="top">Unsigned integer</td>
* </tr>
* <tr>
* <td valign="top">i</td>
* <td valign="top">Integer</td>
* </tr>
* <tr>
* <td valign="top">L</td>
* <td valign="top">Unsigned long</td>
* </tr>
* <tr>
* <td valign="top">l</td>
* <td valign="top">Long</td>
* </tr>
* <tr>
* <td valign="top">M</td>
* <td valign="top">Quoted printable, MIME encoding (see RFC2045)</td>
* </tr>
* <tr>
* <td valign="top">m</td>
* <td valign="top">Base64 encoded string</td>
* </tr>
* <tr>
* <td valign="top">N</td>
* <td valign="top">Long, network (big-endian) byte order</td>
* </tr>
* <tr>
* <td valign="top">n</td>
* <td valign="top">Short, network (big-endian) byte-order</td>
* </tr>
* <tr>
* <td valign="top">P</td>
* <td valign="top">Pointer to a structure (fixed-length string)</td>
* </tr>
* <tr>
* <td valign="top">p</td>
* <td valign="top">Pointer to a null-terminated string</td>
* </tr>
* <tr>
* <td valign="top">Q</td>
* <td valign="top">Unsigned 64-bit number</td>
* </tr>
* <tr>
* <td valign="top">q</td>
* <td valign="top">64-bit number</td>
* </tr>
* <tr>
* <td valign="top">S</td>
* <td valign="top">Unsigned short</td>
* </tr>
* <tr>
* <td valign="top">s</td>
* <td valign="top">Short</td>
* </tr>
* <tr>
* <td valign="top">U</td>
* <td valign="top">UTF-8</td>
* </tr>
* <tr>
* <td valign="top">u</td>
* <td valign="top">UU-encoded string</td>
* </tr>
* <tr>
* <td valign="top">V</td>
* <td valign="top">Long, little-endian byte order</td>
* </tr>
* <tr>
* <td valign="top">v</td>
* <td valign="top">Short, little-endian byte order</td>
* </tr>
* <tr>
* <td valign="top">X</td>
* <td valign="top">Back up a byte</td>
* </tr>
* <tr>
* <td valign="top">x</td>
* <td valign="top">Null byte</td>
* </tr>
* <tr>
* <td valign="top">Z</td>
* <td valign="top">Same as ``A''</td>
* </tr>
* <tr>
* <td colspan="9" bgcolor="#ff9999" height="2"><img src="dot.gif" width="1" height="1"></td>
* </tr>
* </table>
*
*
* Packs the contents of arr into a binary sequence according to the directives in
* aTemplateString (see preceding table).
* Directives ``A,'' ``a,'' and ``Z'' may be followed by a count, which gives the
* width of the resulting field.
* The remaining directives also may take a count, indicating the number of array
* elements to convert.
* If the count is an asterisk (``*''] = all remaining array elements will be
* converted.
* Any of the directives ``sSiIlL'' may be followed by an underscore (``_'') to use
* the underlying platform's native size for the specified type; otherwise, they
* use a platform-independent size. Spaces are ignored in the template string.
* @see RubyString#unpack
**/
@SuppressWarnings("fallthrough")
public static RubyString pack(Ruby runtime, RubyArray list, ByteList formatString) {
ByteBuffer format = ByteBuffer.wrap(formatString.unsafeBytes(), formatString.begin(), formatString.length());
ByteList result = new ByteList();
int listSize = list.size();
int type = 0;
int next = safeGet(format);
int idx = 0;
ByteList lCurElemString;
mainLoop: while (next != 0) {
type = next;
next = safeGet(format);
// Skip all whitespace in pack format string
while (ASCII.isSpace(type)) {
if (next == 0) break mainLoop;
type = next;
next = safeGet(format);
}
// Skip embedded comments in pack format string
if (type == '#') {
while (type != '\n') {
if (next == 0) break mainLoop;
type = next;
next = safeGet(format);
}
}
if (next == '!' || next == '_') {
if (NATIVE_CODES.indexOf(type) == -1) {
throw runtime.newArgumentError("'" + next +
"' allowed only after types " + NATIVE_CODES);
}
next = safeGet(format);
}
// Determine how many of type are needed (default: 1)
int occurrences = 1;
boolean isStar = false;
if (next != 0) {
if (next == '*') {
if ("@Xxu".indexOf(type) != -1) {
occurrences = 0;
} else {
occurrences = listSize;
isStar = true;
}
next = safeGet(format);
} else if (ASCII.isDigit(next)) {
occurrences = 0;
do {
occurrences = occurrences * 10 + Character.digit((char)(next & 0xFF), 10);
next = safeGet(format);
} while (next != 0 && ASCII.isDigit(next));
}
}
Converter converter = converters[type];
if (converter != null) {
idx = encode(runtime, occurrences, result, list, idx, converter);
continue;
}
switch (type) {
case '%' :
throw runtime.newArgumentError("% is not supported");
case 'A' :
case 'a' :
case 'Z' :
case 'B' :
case 'b' :
case 'H' :
case 'h' :
{
if (listSize-- <= 0) {
throw runtime.newArgumentError(sTooFew);
}
IRubyObject from = list.eltInternal(idx++);
lCurElemString = from == runtime.getNil() ? ByteList.EMPTY_BYTELIST : from.convertToString().getByteList();
if (isStar) {
occurrences = lCurElemString.length();
// 'Z' adds extra null pad (versus 'a')
if (type == 'Z') occurrences++;
}
switch (type) {
case 'a' :
case 'A' :
case 'Z' :
if (lCurElemString.length() >= occurrences) {
result.append(lCurElemString.bytes, lCurElemString.begin, occurrences);
} else {//need padding
//I'm fairly sure there is a library call to create a
//string filled with a given char with a given length but I couldn't find it
result.append(lCurElemString);
occurrences -= lCurElemString.length();
switch (type) {
case 'a':
case 'Z':
grow(result, sNil10, occurrences);
break;
default:
grow(result, sSp10, occurrences);
break;
}
}
break;
case 'b' :
{
int currentByte = 0;
int padLength = 0;
if (occurrences > lCurElemString.length()) {
padLength = (occurrences - lCurElemString.length()) / 2 + occurrences % 2;
occurrences = lCurElemString.length();
}
for (int i = 0; i < occurrences;) {
if ((lCurElemString.charAt(i++) & 1) != 0) {//if the low bit is set
currentByte |= 128; //set the high bit of the result
}
if ((i & 7) == 0) {
result.append((byte) (currentByte & 0xff));
currentByte = 0;
continue;
}
//if the index is not a multiple of 8, we are not on a byte boundary
currentByte >>= 1; //shift the byte
}
if ((occurrences & 7) != 0) { //if the length is not a multiple of 8
currentByte >>= 7 - (occurrences & 7); //we need to pad the last byte
result.append((byte) (currentByte & 0xff));
}
//do some padding, I don't understand the padding strategy
result.length(result.length() + padLength);
}
break;
case 'B' :
{
int currentByte = 0;
int padLength = 0;
if (occurrences > lCurElemString.length()) {
padLength = (occurrences - lCurElemString.length()) / 2 + occurrences % 2;
occurrences = lCurElemString.length();
}
for (int i = 0; i < occurrences;) {
currentByte |= lCurElemString.charAt(i++) & 1;
// we filled up current byte; append it and create next one
if ((i & 7) == 0) {
result.append((byte) (currentByte & 0xff));
currentByte = 0;
continue;
}
//if the index is not a multiple of 8, we are not on a byte boundary
currentByte <<= 1;
}
if ((occurrences & 7) != 0) { //if the length is not a multiple of 8
currentByte <<= 7 - (occurrences & 7); //we need to pad the last byte
result.append((byte) (currentByte & 0xff));
}
result.length(result.length() + padLength);
}
break;
case 'h' :
{
int currentByte = 0;
int padLength = 0;
if (occurrences > lCurElemString.length()) {
padLength = occurrences - lCurElemString.length();
occurrences = lCurElemString.length();
}
for (int i = 0; i < occurrences;) {
byte currentChar = (byte)lCurElemString.charAt(i++);
if (Character.isJavaIdentifierStart(currentChar)) {
//this test may be too lax but it is the same as in MRI
currentByte |= (((currentChar & 15) + 9) & 15) << 4;
} else {
currentByte |= (currentChar & 15) << 4;
}
if ((i & 1) != 0) {
currentByte >>= 4;
} else {
result.append((byte) (currentByte & 0xff));
currentByte = 0;
}
}
if ((occurrences & 1) != 0) {
result.append((byte) (currentByte & 0xff));
}
result.length(result.length() + padLength);
}
break;
case 'H' :
{
int currentByte = 0;
int padLength = 0;
if (occurrences > lCurElemString.length()) {
padLength = occurrences - lCurElemString.length();
occurrences = lCurElemString.length();
}
for (int i = 0; i < occurrences;) {
byte currentChar = (byte)lCurElemString.charAt(i++);
if (Character.isJavaIdentifierStart(currentChar)) {
//this test may be too lax but it is the same as in MRI
currentByte |= ((currentChar & 15) + 9) & 15;
} else {
currentByte |= currentChar & 15;
}
if ((i & 1) != 0) {
currentByte <<= 4;
} else {
result.append((byte) (currentByte & 0xff));
currentByte = 0;
}
}
if ((occurrences & 1) != 0) {
result.append((byte) (currentByte & 0xff));
}
result.length(result.length() + padLength);
}
break;
}
break;
}
case 'x' :
grow(result, sNil10, occurrences);
break;
case 'X' :
try {
shrink(result, occurrences);
} catch (IllegalArgumentException e) {
throw runtime.newArgumentError("in `pack': X outside of string");
}
break;
case '@' :
occurrences -= result.length();
if (occurrences > 0) {
grow(result, sNil10, occurrences);
}
occurrences = -occurrences;
if (occurrences > 0) {
shrink(result, occurrences);
}
break;
case 'u' :
case 'm' :
{
if (listSize-- <= 0) {
throw runtime.newArgumentError(sTooFew);
}
IRubyObject from = list.eltInternal(idx++);
lCurElemString = from == runtime.getNil() ? ByteList.EMPTY_BYTELIST : from.convertToString().getByteList();
occurrences = occurrences <= 2 ? 45 : occurrences / 3 * 3;
if (lCurElemString.length() == 0) break;
byte[] charsToEncode = lCurElemString.bytes;
for (int i = 0; i < lCurElemString.length(); i += occurrences) {
encodes(runtime, result, charsToEncode,
i + lCurElemString.begin, lCurElemString.length() - i,
occurrences, (byte)type);
}
}
break;
case 'M' :
{
if (listSize-- <= 0) {
throw runtime.newArgumentError(sTooFew);
}
IRubyObject from = list.eltInternal(idx++);
lCurElemString = from == runtime.getNil() ? ByteList.EMPTY_BYTELIST : from.asString().getByteList();
if (occurrences <= 1) {
occurrences = 72;
}
qpencode(result, lCurElemString, occurrences);
}
break;
case 'U' :
UTF8Encoding enc = UTF8Encoding.INSTANCE;
byte[] packedBytes = new byte[enc.maxLength() * occurrences];
int index = 0;
while (occurrences-- > 0) {
if (listSize-- <= 0) {
throw runtime.newArgumentError(sTooFew);
}
IRubyObject from = list.eltInternal(idx++);
int code = from == runtime.getNil() ? 0 : RubyNumeric.num2int(from);
if (code < 0) {
throw runtime.newRangeError(
"pack(U): value out of range");
}
int length = enc.codeToMbc(code, packedBytes, index);
result.append(packedBytes, index, length);
index += length;
}
break;
case 'w' :
if (listSize-- <= 0) {
throw runtime.newArgumentError(sTooFew);
}
IRubyObject from = list.eltInternal(idx++);
String stringVal = from == runtime.getNil() ? "0" : from.asString().toString();
BigInteger bigInt = new BigInteger(stringVal);
// we don't deal with negatives.
if (bigInt.compareTo(BigInteger.ZERO) >= 0) {
int bitLength = bigInt.toString(2).length();
byte[] bytes = bigInt.toByteArray();
byte[] buf = new byte[(bitLength / 7) + ((bitLength % 7) > 0 ? 1 : 0)];
int b = 0;
int destBit = 0;
int destByte = 0;
for(int srcByte = bytes.length - 1; srcByte >= 0; srcByte--) {
for(int srcBit = 0; srcBit < 8; srcBit++, destBit++) {
if(destBit == 7) {
buf[buf.length - 1 - destByte++] = (byte) (b & 0xff);
b = 0x80;
destBit = 0;
}
int val = bytes[srcByte] & (1 << srcBit);
if(destBit > srcBit) {
val = 0xff & (val << destBit - srcBit);
} else if(destBit < srcBit) {
val = 0xff & (val >> srcBit - destBit);
}
b |= 0xff & val;
}
}
if(b != 0x80) {
buf[destByte] = (byte) (b & 0xff);
}
result.append(buf);
} else {
throw runtime.newArgumentError("can't compress negative numbers");
}
break;
}
}
return runtime.newString(result);
}
/**
* Retrieve an encoded int in little endian starting at index in the
* string value.
*
* @param encode string to get int from
* @return the decoded integer
*/
private static int decodeIntLittleEndian(ByteBuffer encode) {
encode.order(ByteOrder.LITTLE_ENDIAN);
int value = encode.getInt();
encode.order(ByteOrder.BIG_ENDIAN);
return value;
}
/**
* Retrieve an encoded int in little endian starting at index in the
* string value.
*
* @param encode string to get int from
* @return the decoded integer
*/
private static int decodeIntBigEndian(ByteBuffer encode) {
return encode.getInt();
}
/**
* Retrieve an encoded int in big endian starting at index in the string
* value.
*
* @param encode string to get int from
* @return the decoded integer
*/
private static long decodeIntUnsignedBigEndian(ByteBuffer encode) {
return (long)encode.getInt() & 0xFFFFFFFFL;
}
/**
* Retrieve an encoded int in little endian starting at index in the
* string value.
*
* @param encode the encoded string
* @return the decoded integer
*/
private static long decodeIntUnsignedLittleEndian(ByteBuffer encode) {
encode.order(ByteOrder.LITTLE_ENDIAN);
long value = encode.getInt() & 0xFFFFFFFFL;
encode.order(ByteOrder.BIG_ENDIAN);
return value;
}
/**
* Encode an int in little endian format into a packed representation.
*
* @param result to be appended to
* @param s the integer to encode
*/
private static void encodeIntLittleEndian(ByteList result, int s) {
result.append((byte) (s & 0xff)).append((byte) ((s >> 8) & 0xff));
result.append((byte) ((s>>16) & 0xff)).append((byte) ((s>>24) &0xff));
}
/**
* Encode an int in big-endian format into a packed representation.
*
* @param result to be appended to
* @param s the integer to encode
*/
private static void encodeIntBigEndian(ByteList result, int s) {
result.append((byte) ((s>>24) &0xff)).append((byte) ((s>>16) &0xff));
result.append((byte) ((s >> 8) & 0xff)).append((byte) (s & 0xff));
}
/**
* Decode a long in big-endian format from a packed value
*
* @param encode string to get int from
* @return the long value
*/
private static long decodeLongBigEndian(ByteBuffer encode) {
int c1 = decodeIntBigEndian(encode);
int c2 = decodeIntBigEndian(encode);
return ((long) c1 << 32) + (c2 & 0xffffffffL);
}
/**
* Decode a long in little-endian format from a packed value
*
* @param encode string to get int from
* @return the long value
*/
private static long decodeLongLittleEndian(ByteBuffer encode) {
int c1 = decodeIntLittleEndian(encode);
int c2 = decodeIntLittleEndian(encode);
return ((long) c2 << 32) + (c1 & 0xffffffffL);
}
/**
* Encode a long in little-endian format into a packed value
*
* @param result to pack long into
* @param l is the long to encode
*/
private static void encodeLongLittleEndian(ByteList result, long l) {
encodeIntLittleEndian(result, (int) (l & 0xffffffff));
encodeIntLittleEndian(result, (int) (l >>> 32));
}
/**
* Encode a long in big-endian format into a packed value
*
* @param result to pack long into
* @param l is the long to encode
*/
private static void encodeLongBigEndian(ByteList result, long l) {
encodeIntBigEndian(result, (int) (l >>> 32));
encodeIntBigEndian(result, (int) (l & 0xffffffff));
}
/**
* Decode a double from a packed value
*
* @param encode string to get int from
* @return the double value
*/
private static double decodeDoubleLittleEndian(ByteBuffer encode) {
return Double.longBitsToDouble(decodeLongLittleEndian(encode));
}
/**
* Decode a double in big-endian from a packed value
*
* @param encode string to get int from
* @return the double value
*/
private static double decodeDoubleBigEndian(ByteBuffer encode) {
return Double.longBitsToDouble(decodeLongBigEndian(encode));
}
/**
* Encode a double in little endian format into a packed value
*
* @param result to pack double into
* @param d is the double to encode
*/
private static void encodeDoubleLittleEndian(ByteList result, double d) {
encodeLongLittleEndian(result, Double.doubleToLongBits(d));
}
/**
* Encode a double in big-endian format into a packed value
*
* @param result to pack double into
* @param d is the double to encode
*/
private static void encodeDoubleBigEndian(ByteList result, double d) {
encodeLongBigEndian(result, Double.doubleToLongBits(d));
}
/**
* Decode a float in big-endian from a packed value
*
* @param encode string to get int from
* @return the double value
*/
private static float decodeFloatBigEndian(ByteBuffer encode) {
return Float.intBitsToFloat(decodeIntBigEndian(encode));
}
/**
* Decode a float in little-endian from a packed value
*
* @param encode string to get int from
* @return the double value
*/
private static float decodeFloatLittleEndian(ByteBuffer encode) {
return Float.intBitsToFloat(decodeIntLittleEndian(encode));
}
/**
* Encode a float in little endian format into a packed value
* @param result to pack float into
* @param f is the float to encode
*/
private static void encodeFloatLittleEndian(ByteList result, float f) {
encodeIntLittleEndian(result, Float.floatToIntBits(f));
}
/**
* Encode a float in big-endian format into a packed value
* @param result to pack float into
* @param f is the float to encode
*/
private static void encodeFloatBigEndian(ByteList result, float f) {
encodeIntBigEndian(result, Float.floatToIntBits(f));
}
/**
* Decode a short in big-endian from a packed value
*
* @param encode string to get int from
* @return the short value
*/
private static int decodeShortUnsignedLittleEndian(ByteBuffer encode) {
encode.order(ByteOrder.LITTLE_ENDIAN);
int value = encode.getShort() & 0xFFFF;
encode.order(ByteOrder.BIG_ENDIAN);
return value;
}
/**
* Decode a short in big-endian from a packed value
*
* @param encode string to get int from
* @return the short value
*/
private static int decodeShortUnsignedBigEndian(ByteBuffer encode) {
int value = encode.getShort() & 0xFFFF;
return value;
}
/**
* Decode a short in big-endian from a packed value
*
* @param encode string to get int from
* @return the short value
*/
private static short decodeShortBigEndian(ByteBuffer encode) {
return encode.getShort();
}
/**
* Encode an short in little endian format into a packed representation.
*
* @param result to be appended to
* @param s the short to encode
*/
private static void encodeShortLittleEndian(ByteList result, int s) {
result.append((byte) (s & 0xff)).append((byte) ((s & 0xff00) >> 8));
}
/**
* Encode an shortin big-endian format into a packed representation.
*
* @param result to be appended to
* @param s the short to encode
*/
private static void encodeShortBigEndian(ByteList result, int s) {
result.append((byte) ((s & 0xff00) >> 8)).append((byte) (s & 0xff));
}
}