* Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
* This file is part of Resin(R) Open Source
* Each copy or derived work must preserve the copyright notice and this
* notice unmodified.
* Resin Open Source is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* Resin Open Source is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* of NON-INFRINGEMENT. See the GNU General Public License for more
* details.
* You should have received a copy of the GNU General Public License
* along with Resin Open Source; if not, write to the
* Free Software Foundation, Inc.
* 59 Temple Place, Suite 330
* Boston, MA 02111-1307 USA
* @author Nam Nguyen
package com.caucho.quercus.lib.i18n;
import com.caucho.quercus.QuercusModuleException;
import com.caucho.quercus.UnimplementedException;
import com.caucho.quercus.annotation.Optional;
import com.caucho.quercus.annotation.Reference;
import com.caucho.quercus.annotation.VariableArguments;
import com.caucho.quercus.env.*;
import com.caucho.quercus.lib.MailModule;
import com.caucho.quercus.lib.regexp.Eregi;
import com.caucho.quercus.lib.regexp.RegexpModule;
import com.caucho.quercus.lib.regexp.UnicodeEreg;
import com.caucho.quercus.lib.regexp.UnicodeEregi;
import com.caucho.quercus.lib.string.StringModule;
import com.caucho.quercus.module.AbstractQuercusModule;
import com.caucho.quercus.module.IniDefinitions;
import com.caucho.quercus.module.IniDefinition;
import com.caucho.util.L10N;
import com.caucho.vfs.Encoding;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
public class MbstringModule
extends AbstractQuercusModule
private static final Logger log
= Logger.getLogger(MbstringModule.class.getName());
private static final L10N L = new L10N(MbstringModule.class);
private static final IniDefinitions _iniDefinitions = new IniDefinitions();
public static final int MB_CASE_UPPER = 0;
public static final int MB_CASE_LOWER = 1;
public static final int MB_CASE_TITLE = 2;
* Returns the extensions implemented by the module.
public String []getLoadedExtensions()
return new String[] { "mbstring" };
* Returns the default php.ini values.
public IniDefinitions getIniDefinitions()
return _iniDefinitions;
* Checks if the string is correctly encoded.
* XXX: no args
public boolean mb_check_encoding(Env env,
@Optional Value var,
@Optional String encoding)
if (encoding == null || encoding.length() == 0)
encoding = getEncoding(env);
Decoder decoder = Decoder.create(encoding);
if (! var.isDefault())
return decoder.isDecodable(env, var.toStringValue(env));
throw new UnimplementedException("mb_check_encoding() with no args");
* Upper-cases, lower-cases, or capitalizes first letter of words.
public static StringValue mb_convert_case(Env env,
StringValue str,
int mode,
@Optional("") String encoding)
if (mode == MB_CASE_TITLE) {
encoding = getEncoding(env, encoding);
CharSequence unicodeStr = decode(env, str, encoding);
unicodeStr = toUpperCaseTitle(env, unicodeStr);
return encode(env, unicodeStr, encoding);
else if (mode == MB_CASE_LOWER)
return mb_strtolower(env, str, encoding);
else if (mode == MB_CASE_UPPER)
return mb_strtoupper(env, str, encoding);
return str;
* Converts string of one encoding to another.
public static Value mb_convert_encoding(Env env,
StringValue str,
String destEncoding,
@Optional Value fromEncodings)
ArrayList<String> charsetList = getEncodingList(env, fromEncodings);
CharSequence unicodeStr = null;
for (int i = 0; i < charsetList.size(); i++) {
String charset = charsetList.get(i);
try {
unicodeStr = decode(env, str, charset);
} catch (UnsupportedCharsetException e) {
// should probably not log anything here because this is normal
// behavior because of fallback encodings
if (unicodeStr == null) {
log.log(Level.FINE, L.l("unsupported character encoding {0}", fromEncodings));
env.warning(L.l("unsupported character encoding {0}", fromEncodings));
return str;
try {
return encode(env, unicodeStr, destEncoding);
} catch (UnsupportedCharsetException e) {
log.log(Level.FINE, e.getMessage(), e);
env.warning(L.l("unsupported character encoding {0}", fromEncodings));
return BooleanValue.FALSE;
* Performs Japanese specific charset conversion.
public static StringValue mb_convert_kana(Env env,
StringValue str,
@Optional("") String option,
@Optional("") String encoding)
throw new UnimplementedException("mb_convert_kana");
* Decodes and then encodes variables.
* XXX: variable arguments to convert.
public static StringValue mb_convert_variables(Env env,
String toEncoding,
String fromEncodings,
@Reference Value vars)
// XXX: fallback encoding
int tail = fromEncodings.indexOf(',', 1);
if (tail < 0)
tail = fromEncodings.length();
String srcEncoding;
if (tail < 0)
srcEncoding = fromEncodings;
srcEncoding = getEncoding(env, fromEncodings.substring(0, tail).trim());
Value decoded = decodeAll(env, vars, srcEncoding);
vars.set(encodeAll(env, decoded, toEncoding));
return env.createStringOld(srcEncoding);
* Decodes mime field.
public static Value mb_decode_mimeheader(Env env,
StringValue str)
String encoding = getEncoding(env);
try {
return QuercusMimeUtility.decodeMime(env, str, encoding);
} catch (UnsupportedEncodingException e) {
throw new QuercusModuleException(e.getMessage());
* Decodes HTML numeric entity.
public static StringValue mb_decode_numericentity(Env env,
StringValue str,
ArrayValue convmap,
@Optional String encoding)
throw new UnimplementedException("mb_decode_numericentity");
* Detects encoding of string.
public static Value mb_detect_encoding(Env env,
StringValue str,
@Optional Value encodingV,
@Optional boolean isStrict)
// XXX: strict
ArrayList<String> encodingList = getDetectOrderList(env, encodingV);
int len = encodingList.size();
for (int i = 0; i < len; i++) {
String charset = encodingList.get(i);
Decoder decoder = Decoder.create(charset);
if (decoder.isDecodable(env, str))
return env.createStringOld(charset);
return BooleanValue.FALSE;
* Specifies order of charsets to test when detecting encoding.
public static Value mb_detect_order(Env env,
@Optional Value encodingV)
if (encodingV.isDefault()) {
ArrayValue array = new ArrayValueImpl();
ArrayList<String> list = getDetectOrderList(env, encodingV);
for (String encoding : list) {
return array;
else {
ArrayList<String> list = new ArrayList<String>();
if (encodingV.isArray()) {
Iterator<Value> iter = encodingV.getValueIterator(env);
while (iter.hasNext()) {
parseCommaSeparatedList(list, encodingV.toString());
env.setSpecialValue("mb.detect_order", list);
return BooleanValue.TRUE;
private static ArrayList<String> getDetectOrderList(Env env,
Value encodingV)
if (encodingV.isDefault() && env.getSpecialValue("mb.detect_order") != null)
return (ArrayList<String>) env.getSpecialValue("mb.detect_order");
ArrayList<String> list = new ArrayList<String>();
if (encodingV.isDefault()) {
String encodings = env.getIniString("mbstring.detect_order");
if (encodings != null)
parseCommaSeparatedList(list, encodings);
else {
else if (encodingV.isArray()) {
Iterator<Value> iter = encodingV.getValueIterator(env);
while (iter.hasNext()) {
else {
String encodings = encodingV.toString();
if (encodings.equalsIgnoreCase("auto")) {
parseCommaSeparatedList(list, encodingV.toString());
return list;
private static void parseCommaSeparatedList(ArrayList<String> list,
String str)
int start = 0;
int index;
while ((index = str.indexOf(",", start)) >= 0) {
String charset = str.substring(start, index).trim();
start = index + 1;
* Encodes a string into mime.
public static StringValue mb_encode_mimeheader(Env env,
StringValue str,
@Optional("") String charset,
@Optional("B") String transfer_encoding,
@Optional("") String linefeed)
charset = getEncoding(env, charset);
try {
String mime = QuercusMimeUtility.encodeMimeWord(str.toString(),
return env.createStringOld(mime);
} catch (UnsupportedEncodingException e) {
throw new QuercusModuleException(e.getMessage());
* Encodes HTML numeric string entity.
public static StringValue mb_encode_numericentity(Env env,
StringValue str,
ArrayValue convmap,
@Optional String encoding)
throw new UnimplementedException();
* Returns true if pattern matches a part of string.
public static BooleanValue mb_ereg_match(Env env,
UnicodeEreg ereg,
StringValue string,
@Optional String option)
String encoding = getEncoding(env);
string = string.convertToUnicode(env, encoding);
// XXX: option
Value val = RegexpModule.eregImpl(env, ereg, string, null);
if (val == BooleanValue.FALSE)
return BooleanValue.FALSE;
return BooleanValue.TRUE;
* Multibyte version of ereg_replace.
public static Value mb_ereg_replace(Env env,
UnicodeEreg ereg,
StringValue replacement,
StringValue subject,
@Optional String option)
String encoding = getEncoding(env);
replacement = replacement.convertToUnicode(env, encoding);
subject = subject.convertToUnicode(env, encoding);
//XXX: option
Value val = RegexpModule.ereg_replace(env,
return encodeAll(env, val, encoding);
* Multibyte version of ereg.
public static Value mb_ereg(Env env,
UnicodeEreg ereg,
StringValue string,
@Optional ArrayValue regs)
return eregImpl(env, ereg, string, regs);
* Multibyte version of eregi_replace.
public static Value mb_eregi_replace(Env env,
StringValue pattern,
StringValue replacement,
StringValue subject,
@Optional String option)
String encoding = getEncoding(env);
pattern = pattern.convertToUnicode(env, encoding);
replacement = replacement.convertToUnicode(env, encoding);
subject = subject.convertToUnicode(env, encoding);
//XXX: option
Eregi regexp = RegexpModule.createEregi(env, pattern);
Value val = RegexpModule.eregi_replace(env, regexp, replacement, subject);
return encodeAll(env, val, encoding);
* Multibyte version of eregi.
public static Value mb_eregi(Env env,
UnicodeEregi eregi,
StringValue string,
@Optional ArrayValue regs)
return eregImpl(env, eregi, string, regs);
private static Value eregImpl(Env env,
UnicodeEreg ereg,
StringValue string,
ArrayValue regs)
String encoding = getEncoding(env);
string = string.convertToUnicode(env, encoding);
if (regs == null) {
return RegexpModule.eregImpl(env, ereg, string, null);
Value val;
Var regVar = new Var();
val = RegexpModule.eregImpl(env, ereg, string, regVar);
if (regVar.isset()) {
ArrayValue results = regVar.toArrayValue(env);
for (Map.Entry<Value,Value> entry : results.entrySet()) {
Value bytes = encodeAll(env, entry.getValue(), encoding);
regs.put(entry.getKey(), bytes);
val = LongValue.create(
return val;
* Gets current position of ereg state object.
public static LongValue mb_ereg_search_getpos(Env env)
EregSearch ereg = getEreg(env);
if (ereg == null)
return LongValue.ZERO;
return LongValue.create(ereg._position);
* Gets the last match of ereg state object from previous matching.
public static Value mb_ereg_search_getregs(Env env)
EregSearch ereg = getEreg(env);
if (ereg == null || ereg._lastMatch == null)
return BooleanValue.FALSE;
return ereg._lastMatch;
* Initializes a ereg state object.
public static BooleanValue mb_ereg_search_init(Env env,
StringValue string,
@Optional Value rawRegexp,
@Optional Value option)
UnicodeEregi regexp = null;
if (! rawRegexp.isDefault()) {
= RegexpModule.createUnicodeEregi(env, rawRegexp.toStringValue(env));
EregSearch ereg = new EregSearch(env, string, regexp, option);
env.setSpecialValue("mb.search", ereg);
return BooleanValue.TRUE;
* Returns index and position after matching.
public static Value mb_ereg_search_pos(Env env,
@Optional Value rawRegexp,
@Optional Value option)
UnicodeEregi regexp = null;
if (! rawRegexp.isDefault()) {
= RegexpModule.createUnicodeEregi(env, rawRegexp.toStringValue(env));
EregSearch ereg = getEreg(env, regexp, option);
if (ereg == null) {
env.warning(L.l("Regular expression not set"));
return BooleanValue.FALSE;
return ereg.search(env, true);
* Returns match array after matching.
public static Value mb_ereg_search_regs(Env env,
@Optional Value rawRegexp,
@Optional Value option)
UnicodeEregi regexp = null;
if (! rawRegexp.isDefault()) {
= RegexpModule.createUnicodeEregi(env, rawRegexp.toStringValue(env));
EregSearch ereg = getEreg(env, regexp, option);
if (ereg == null) {
env.warning(L.l("Regular expression not set"));
return BooleanValue.FALSE;
if (ereg.search(env, false) == BooleanValue.FALSE)
return BooleanValue.FALSE;
return ereg._lastMatch;
* Sets the position of the ereg state object.
public static BooleanValue mb_ereg_search_setpos(Env env,
int position)
EregSearch ereg = getEreg(env);
if (ereg == null)
return BooleanValue.FALSE;
ereg._position = position;
return BooleanValue.TRUE;
* Returns whether or not pattern matches string.
public static BooleanValue mb_ereg_search(Env env,
@Optional Value rawRegexp,
@Optional Value option)
UnicodeEregi regexp = null;
if (! rawRegexp.isDefault()) {
= RegexpModule.createUnicodeEregi(env, rawRegexp.toStringValue(env));
EregSearch ereg = getEreg(env, regexp, option);
if (ereg == null) {
env.warning(L.l("Regular expression not set"));
return BooleanValue.FALSE;
Value result = ereg.search(env, false);
return BooleanValue.create(result.toBoolean());
* Returns the ereg state object from the environment.
private static EregSearch getEreg(Env env)
Object obj = env.getSpecialValue("mb.search");
if (obj == null)
return null;
return (EregSearch) obj;
* Returns the ereg state object from the environment iff the ereg object
* is a valid one.
private static EregSearch getEreg(Env env,
UnicodeEregi regexp,
Value option)
Object obj = env.getSpecialValue("mb.search");
if (obj != null) {
EregSearch ereg = (EregSearch) obj;
if (regexp != null)
ereg.init(regexp, option);
if (ereg._isValidRegexp)
return ereg;
return null;
return null;
* Returns current mb settings.
public static Value mb_get_info(Env env,
@Optional("") String type)
if (type.length() == 0) {
ArrayValue array = new ArrayValueImpl();
return array;
else if (type.equals("internal_encoding")) {
return env.createStringOld(getEncoding(env));
else if (type.equals("http_output")) {
return env.createStringOld(getOutputEncoding(env));
else {
env.warning(L.l("unsupported option: {0}", type));
return BooleanValue.FALSE;
* Returns and/or sets the http input encoding
public static Value mb_http_input(Env env,
@Optional String type)
throw new UnimplementedException("mb_http_input");
* Returns and/or sets the http output encoding
public static Value mb_http_output(Env env,
@Optional String encoding)
if (encoding.length() == 0) {
return env.createStringOld(getOutputEncoding(env));
else {
env.setIni("mbstring.http_output", encoding);
return BooleanValue.TRUE;
* Returns and/or sets the internal encoding.
public static Value mb_internal_encoding(Env env,
@Optional String encoding)
if (encoding.length() == 0)
return env.createStringOld(getEncoding(env));
else {
setEncoding(env, encoding);
return BooleanValue.TRUE;
* Returns and/or sets the encoding for mail.
public static Value mb_language(Env env,
@Optional String language)
String encoding = getEncodingLanguage(env);
if (language == null || language.length() == 0) {
if (encoding.equalsIgnoreCase("ISO-2022-JP"))
return env.createStringOld("Japanese");
else if (encoding.equalsIgnoreCase("ISO-8859-1"))
return env.createStringOld("English");
else if (encoding.equalsIgnoreCase("UTF-8"))
return env.createStringOld("uni");
return env.createStringOld(encoding);
else if (language.equals("Japanese") || language.equals("ja"))
setEncodingLanguage(env, "ISO-2022-JP");
else if (language.equals("English") || language.equals("en"))
setEncodingLanguage(env, "ISO-8859-1");
else if (language.equals("uni"))
setEncodingLanguage(env, "UTF-8");
return BooleanValue.FALSE;
return BooleanValue.TRUE;
private static String getEncodingLanguage(Env env)
String encoding = (String) env.getSpecialValue("mb.internal_encoding");
if (encoding == null)
return "ISO-8859-1";
return encoding;
private static void setEncodingLanguage(Env env, String encoding)
env.setSpecialValue("mb.internal_encoding", encoding);
* Get all supported encodings.
public static ArrayValue mb_list_encodings(Env env)
ArrayValue array = new ArrayValueImpl();
Map<String,Charset> charsetMap = Charset.availableCharsets();
for (String name : charsetMap.keySet()) {
return array;
* ob_start() handler
public static StringValue mb_output_handler(Env env,
StringValue contents,
int status)
// XXX: status?
String toEncoding = getOutputEncoding(env);
if (toEncoding.equals("pass"))
return contents;
String fromEncoding = getEncoding(env);
Decoder decoder = getDecoder(env, fromEncoding);
CharSequence contentsUnicode = decoder.decode(env, contents);
Encoder encoder = getEncoder(env, toEncoding);
return encoder.encode(env, contentsUnicode);
* Multibyte version of parse_str.
public static BooleanValue mb_parse_str(Env env,
StringValue strValue,
@Optional @Reference Value result)
String encoding = getEncoding(env);
StringModule.parse_str(env,strValue.toString(), result);
if (result == null) {
// XXX: encode newly added global variables
return BooleanValue.TRUE;
else {
Value array = encodeAll(env, result, encoding);
return BooleanValue.TRUE;
* Returns the preferred mime name of this encoding.
public static StringValue mb_preferred_mime_name(Env env,
StringValue encoding)
String mimeName = Encoding.getMimeName(encoding.toString());
return env.createStringOld(mimeName);
* Returns and/or sets encoding for mb regular expressions.
public static Value mb_regex_encoding(Env env,
@Optional("") String encoding)
return mb_internal_encoding(env, encoding);
* XXX: what does this actually do?
public static StringValue mb_regex_set_options(Env env,
@Optional String options)
throw new UnimplementedException("mb_regex_set_options");
* Multibyte version of mail.
public static BooleanValue mb_send_mail(Env env,
StringValue to,
StringValue subject,
StringValue message,
@Optional StringValue additionalHeaders,
@Optional StringValue additionalParameters)
//XXX: not correct
String encoding = getEncoding(env);
subject = subject.toBinaryValue(encoding);
message = message.toBinaryValue(encoding);
additionalHeaders = additionalHeaders.toBinaryValue(encoding);
boolean result = MailModule.mail(env,
return BooleanValue.create(result);
* Multibyte version of split.
public static Value mb_split(Env env,
UnicodeEreg ereg,
StringValue string,
@Optional("-1") long limit)
String encoding = getEncoding(env);
string = string.convertToUnicode(env, encoding);
Value val = RegexpModule.split(env, ereg, string, limit);
return encodeAll(env, val, encoding);
* Similar to substr except start index is at the beginning of char
* boundaries.
public static StringValue mb_strcut(Env env,
final StringValue str,
int start,
@Optional("7fffffff") int length,
@Optional String encoding)
encoding = getEncoding(env, encoding);
CharSequence unicodeStr = decode(env, str, encoding);
int len = unicodeStr.length();
int end = start + length;
if (end > len)
end = len;
if (start < 0 || start > end)
return StringValue.EMPTY;
// XXX: not quite exactly the same behavior as PHP
if (start < len && Character.isHighSurrogate(unicodeStr.charAt(start)))
StringBuilder sb = new StringBuilder();
sb.append(unicodeStr, start, end);
return encode(env, sb, encoding);
* Truncates the string.
public static StringValue mb_strimwidth(Env env,
final StringValue str,
int start,
int width,
@Optional() StringValue trimmarker,
@Optional("") String encoding)
encoding = getEncoding(env, encoding);
CharSequence unicodeStr = decode(env, str, encoding);
int len = unicodeStr.length();
int end = start + width;
if (end > len)
end = len;
if (start < 0 || start > end)
return StringValue.EMPTY;
StringBuilder sb = new StringBuilder();
if (end < len && trimmarker.length() > 0) {
sb.append(unicodeStr, start, end - 1);
sb.append(decode(env, trimmarker, encoding));
unicodeStr = sb;
sb.append(unicodeStr, start, end);
return encode(env, sb, encoding);
* Multibyte version of strlen.
public static LongValue mb_strlen(Env env,
StringValue str,
@Optional("") String encoding)
encoding = getEncoding(env, encoding);
str = str.convertToUnicode(env, encoding);
return LongValue.create(str.length());
* Multibyte version of strpos.
public static Value mb_strpos(Env env,
StringValue haystack,
StringValue needle,
@Optional("0") int offset,
@Optional("") String encoding)
encoding = getEncoding(env, encoding);
haystack = haystack.convertToUnicode(env, encoding);
needle = needle.convertToUnicode(env, encoding);
return StringModule.strpos(env, haystack, needle, offset);
* Multibyte version of strrpos.
public static Value mb_strrpos(Env env,
StringValue haystack,
StringValue needle,
@Optional Value offsetV,
@Optional("") String encoding)
encoding = getEncoding(env, encoding);
haystack = haystack.convertToUnicode(env, encoding);
needle = needle.convertToUnicode(env, encoding);
return StringModule.strrpos(env, haystack, needle, offsetV);
* Converts all characters to lower-case.
public static StringValue mb_strtolower(Env env,
StringValue str,
@Optional("") String encoding)
encoding = getEncoding(env, encoding);
StringValue unicodeStr = str.convertToUnicode(env, encoding);
unicodeStr = StringModule.strtolower(str);
return str.create(env, unicodeStr, encoding);
* Converts all characters to upper-case.
public static StringValue mb_strtoupper(Env env,
StringValue str,
@Optional("") String encoding)
encoding = getEncoding(env, encoding);
StringValue unicodeStr = str.convertToUnicode(env, encoding);
unicodeStr = StringModule.strtoupper(str);
return str.create(env, unicodeStr, encoding);
* Returns the width of this multibyte string.
public static LongValue mb_strwidth(Env env,
StringValue str,
@Optional("") String encoding)
encoding = getEncoding(env, encoding);
str = str.convertToUnicode(env, encoding);
return LongValue.create(str.length());
int width = 0;
int len = string.length();
// Per PHP manual
for (int i = 0; i < len; i++) {
char ch = string.charAt(i);
if (ch <= 0x19)
else if (ch <= 0x1fff)
width += 1;
else if (ch <= 0xff60)
width += 2;
else if (ch <= 0xff9f)
width += 1;
width += 2;
return LongValue.create(width);
* Sets the character to use when decoding/encoding fails on a character.
public static Value mb_substitute_character(Value substrchar)
throw new UnimplementedException("mb_substitute_character");
public static LongValue mb_substr_count(Env env,
StringValue haystack,
StringValue needle,
@Optional("") String encoding)
encoding = getEncoding(env, encoding);
haystack = haystack.convertToUnicode(env, encoding);
needle = needle.convertToUnicode(env, encoding);
int count = 0;
int sublen = needle.length();
int i = haystack.indexOf(needle);
while (i >= 0) {
i = haystack.indexOf(needle, i + sublen);
return LongValue.create(count);
* Multibyte version of substr.
public static StringValue mb_substr(Env env,
StringValue str,
int start,
@Optional Value lengthV,
@Optional String encoding)
encoding = getEncoding(env, encoding);
StringValue unicodeStr = str.convertToUnicode(env, encoding);
Value val = StringModule.substr(env, unicodeStr, start, lengthV);
if (val == BooleanValue.FALSE)
return StringValue.EMPTY;
return encode(env, val.toStringValue(env), encoding);
// Private helper functions
* Returns string with words capitalized and intermediate letters are
* made lower-case.
private static CharSequence toUpperCaseTitle(Env env, CharSequence str)
StringBuilder sb = new StringBuilder();
int strLen = str.length();
boolean isWordStart = true;
for (int i = 0; i < strLen; i++) {
char ch = str.charAt(i);
switch (ch) {
case ' ': case '\t': case '\r': case '\n':
isWordStart = true;
if (isWordStart) {
isWordStart = false;
return sb;
private static CharSequence decode(Env env,
StringValue str,
String encoding)
if (str.isUnicode())
return str;
Decoder decoder = getDecoder(env, encoding);
return decoder.decode(env, str);
private static Decoder getDecoder(Env env, String encoding)
Decoder decoder = Decoder.create(encoding);
String ini = env.getIniString("mbstring.substitute_character");
if (ini == null) {
else if (ini.equalsIgnoreCase("none")) {
else if (ini.equalsIgnoreCase("long")) {
else {
int len = ini.length();
int value = 0;
for (int i = 0; i < len; i++) {
char ch = ini.charAt(i);
if ('0' <= ch && ch <= '9')
value = value * 10 + ini.charAt(i) - '0';
// XXX: surrogate pairs
decoder.setReplacement("" + (char) value);
return decoder;
private static StringValue encode(Env env,
CharSequence str,
String encoding)
Encoder encoder = getEncoder(env, encoding);
return encoder.encode(env, str);
private static Encoder getEncoder(Env env, String encoding)
Encoder encoder = Encoder.create(encoding);
String ini = env.getIniString("mbstring.substitute_character");
if (ini == null) {
else if (ini.equalsIgnoreCase("none")) {
else if (ini.equalsIgnoreCase("long")) {
else {
int len = ini.length();
int value = 0;
for (int i = 0; i < len; i++) {
char ch = ini.charAt(i);
if ('0' <= ch && ch <= '9')
value = value * 10 + ini.charAt(i) - '0';
// XXX: surrogate pairs
encoder.setReplacement("" + (char) value);
return encoder;
public static String getEncoding(Env env)
Value encoding = env.getIni("mbstring.internal_encoding");
if (encoding.length() > 0)
return encoding.toString();
return env.getRuntimeEncoding();
private static String getEncoding(Env env, String encoding)
if (encoding == null || encoding.length() == 0)
return getEncoding(env);
return encoding;
private static void setEncoding(Env env, String encoding)
env.setIni("mbstring.internal_encoding", encoding);
private static String getOutputEncoding(Env env)
Value encoding = env.getIni("mbstring.http_output");
if (encoding.length() != 0)
return encoding.toString();
return env.getOutputEncoding();
private static ArrayList<String> getEncodingList(Env env, Value encodingV)
ArrayList<String> list = new ArrayList<String>();
if (encodingV.isDefault()) {
else if (encodingV.isArray()) {
Iterator<Value> iter = encodingV.getValueIterator(env);
while (iter.hasNext()) {
else {
String encodings = encodingV.toString();
if (encodings.equals("auto")) {
else {
int start = 0;
int index;
while ((index = encodings.indexOf(",", start)) >= 0) {
String charset = encodings.substring(start, index).trim();
start = index + 1;
return list;
* Recursively decodes objects and arrays.
private static Value decodeAll(Env env,
Value val,
String encoding)
Decoder decoder = getDecoder(env, encoding);
return decodeAll(env, val, decoder);
* Recursively decodes objects and arrays.
private static Value decodeAll(Env env,
Value val,
Decoder decoder)
val = val.toValue();
if (val.isString()) {
return decoder.decodeUnicode(env, val.toStringValue(env));
else if (val.isArray()) {
ArrayValue array = new ArrayValueImpl();
for (Map.Entry<Value,Value> entry : ((ArrayValue)val).entrySet()) {
decodeAll(env, entry.getValue(), decoder));
return array;
} else if (val.isObject()) {
ObjectValue obj = (ObjectValue) val.toObject(env);
for (Map.Entry<Value,Value> entry : obj.entrySet()) {
decodeAll(env, entry.getValue(), decoder));
return obj;
} else
return val;
* Recursively encodes objects and arrays.
private static Value encodeAll(Env env,
Value val,
String encoding)
Encoder encoder = getEncoder(env, encoding);
return encodeAll(env, val, encoder);
* Recursively encodes objects and arrays.
private static Value encodeAll(Env env,
Value val,
Encoder encoder)
val = val.toValue();
if (val.isString()) {
return encoder.encode(env, val.toStringValue(env), true);
else if (val.isArray()) {
ArrayValue array = new ArrayValueImpl();
for (Map.Entry<Value,Value> entry : ((ArrayValue)val).entrySet()) {
encodeAll(env, entry.getValue(), encoder));
return array;
} else if (val.isObject()) {
ObjectValue obj = (ObjectValue)val;
for (Map.Entry<Value,Value> entry : obj.entrySet()) {
encodeAll(env, entry.getValue(), encoder));
return obj;
} else
return val;
* ereg state object (saves previous match and other info)
* XXX: option
static class EregSearch {
private StringValue _string;
private UnicodeEregi _ereg;
private Value _option;
private int _length;
ArrayValue _lastMatch;
int _position;
boolean _isValidRegexp;
EregSearch(Env env,
StringValue string,
UnicodeEregi ereg,
Value option)
_string = string.convertToUnicode(env, getEncoding(env));
_position = 0;
_length = _string.length();
_ereg = ereg;
_isValidRegexp = ereg != null;
_option = option;
void init(UnicodeEregi ereg, Value option)
_ereg = ereg;
_isValidRegexp = ereg != null;
_option = option;
StringValue getString(Env env)
if (_position == 0)
return _string;
else if (_position < _length)
return _string.substring(_position);
return StringValue.EMPTY;
Value search(Env env, boolean isArrayReturn)
if (_position < 0)
return BooleanValue.FALSE;
StringValue string = getString(env);
ArrayValue regs = new ArrayValueImpl();
Value val = eregImpl(env, _ereg, string, regs);
if (val == BooleanValue.FALSE)
return BooleanValue.FALSE;
StringValue match = regs.get(LongValue.ZERO).toStringValue(env);
int matchIndex = _string.indexOf(match, _position);
int matchLength = match.length();
_position = matchIndex + matchLength;
_lastMatch = regs;
if (isArrayReturn) {
ArrayValue array = new ArrayValueImpl();
return array;
} else
return BooleanValue.TRUE;
static final IniDefinition INI_MBSTRING_HTTP_INPUT
= _iniDefinitions.add("mbstring.http_input", "pass", PHP_INI_ALL);
static final IniDefinition INI_MBSTRING_HTTP_OUTPUT
= _iniDefinitions.add("mbstring.http_output", "pass", PHP_INI_ALL);