/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-2009 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* $Id$
*/
package org.exist.xquery.functions.fn;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.exist.dom.QName;
import org.exist.xquery.Cardinality;
import org.exist.xquery.Dependency;
import org.exist.xquery.ErrorCodes;
import org.exist.xquery.Function;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.Profiler;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.FunctionReturnSequenceType;
import org.exist.xquery.value.Item;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.StringValue;
import org.exist.xquery.value.Type;
import org.exist.xquery.value.ValueSequence;
/**
* @see <a href="http://www.w3.org/TR/xpath-functions/#func-tokenize">http://www.w3.org/TR/xpath-functions/#func-tokenize</a>
* @author Wolfgang Meier (wolfgang@exist-db.org)
*/
public class FunTokenize extends FunMatches {
public final static FunctionSignature signatures[] = {
new FunctionSignature(
new QName("tokenize", Function.BUILTIN_FUNCTION_NS),
"Breaks the input string $input into a sequence of strings, "
+ "treating any substring that matches pattern $pattern as a separator. The "
+ "separators themselves are not returned.",
new SequenceType[] {
new FunctionParameterSequenceType("input", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string"),
new FunctionParameterSequenceType("pattern", Type.STRING, Cardinality.EXACTLY_ONE, "The tokenization pattern")},
new FunctionReturnSequenceType(Type.STRING, Cardinality.ONE_OR_MORE, "the token sequence")
),
new FunctionSignature(
new QName("tokenize", Function.BUILTIN_FUNCTION_NS),
"Breaks the input string $input into a sequence of strings, "
+ "treating any substring that matches pattern $pattern as a separator using $flags, see http://www.w3.org/TR/xpath-functions/#flags. The "
+ "separators themselves are not returned.",
new SequenceType[] {
new FunctionParameterSequenceType("input", Type.STRING, Cardinality.ZERO_OR_ONE, "The input string"),
new FunctionParameterSequenceType("pattern", Type.STRING, Cardinality.EXACTLY_ONE, "The tokenization pattern"),
new FunctionParameterSequenceType("flags", Type.STRING, Cardinality.EXACTLY_ONE, "The flags")},
new FunctionReturnSequenceType(Type.STRING, Cardinality.ONE_OR_MORE, "the token sequence")
)
};
/**
* @param context
*/
public FunTokenize(XQueryContext context, FunctionSignature signature) {
super(context, signature);
}
/**
* @see org.exist.xquery.Expression#eval(Sequence, Item)
*/
public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException {
if (context.getProfiler().isEnabled()) {
context.getProfiler().start(this);
context.getProfiler().message(this, Profiler.DEPENDENCIES, "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies()));
if (contextSequence != null)
{context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT SEQUENCE", contextSequence);}
if (contextItem != null)
{context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());}
}
Sequence result;
final Sequence stringArg = getArgument(0).eval(contextSequence, contextItem);
if (stringArg.isEmpty())
{result = Sequence.EMPTY_SEQUENCE;}
else {
final String string = stringArg.getStringValue();
if (string.length() == 0 )
{result = Sequence.EMPTY_SEQUENCE;}
else {
final String pattern = translateRegexp(getArgument(1).eval(contextSequence, contextItem).getStringValue());
if (Pattern.matches(pattern, "")) {
throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string");
}
int flags = 0;
if (getSignature().getArgumentCount() == 3)
{flags = parseFlags(getArgument(2).eval(contextSequence, contextItem)
.getStringValue());}
try {
if (pat == null || (!pattern.equals(pat.pattern())) || flags != pat.flags()) {
pat = Pattern.compile(pattern, flags);
}
final String[] tokens = pat.split(string, -1);
result = new ValueSequence();
for (int i = 0; i < tokens.length; i++)
result.add(new StringValue(tokens[i]));
} catch (final PatternSyntaxException e) {
throw new XPathException(this, ErrorCodes.FORX0001, "Invalid regular expression: " + e.getMessage(), new StringValue(pattern), e);
}
}
}
if (context.getProfiler().isEnabled())
{context.getProfiler().end(this, "", result);}
return result;
}
}