package com.dbxml.db.common.fulltext;
/*
* dbXML - Native XML Database
* Copyright (c) 1999-2006 The dbXML Group, L.L.C.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* $Id: FullTextQuery.java,v 1.5 2006/02/02 18:53:52 bradford Exp $
*/
import com.dbxml.db.core.query.*;
import com.dbxml.db.common.query.QueryBase;
import com.dbxml.db.common.xpath.XPathQueryResolver;
import com.dbxml.db.core.Collection;
import com.dbxml.db.core.DBException;
import com.dbxml.db.core.data.Key;
import com.dbxml.db.core.data.Value;
import com.dbxml.db.core.indexer.IndexMatch;
import com.dbxml.db.core.indexer.IndexPattern;
import com.dbxml.db.core.indexer.IndexQuery;
import com.dbxml.db.core.indexer.Indexer;
import com.dbxml.db.core.transaction.Transaction;
import com.dbxml.xml.NamespaceMap;
import com.dbxml.xml.dom.DOMHelper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;
/**
* FullTextQuery
*/
public final class FullTextQuery extends QueryBase {
private static final Key[][] EmptyKeySet = new Key[0][0];
private static final String FULLTEXT = "fulltext";
private static final String XPATH = "xpath";
private static final String AND = "and";
private static final String OR = "or";
private static final String SELECT = "select";
private static final String NAME = "name";
private static final String OPERATOR = "operator";
private static final int NODE_AND = 0;
private static final int NODE_OR = 1;
private static final int NODE_SELECT = 2;
private static final int OP_EQ = IndexQuery.EQ;
private static final String STR_EQ = "EQ";
private static final String ALT_EQ = "=";
private static final int OP_NEQ = IndexQuery.NEQ;
private static final String STR_NEQ = "NEQ";
private static final String ALT_NEQ = "!=";
private static final int OP_GT = IndexQuery.GT;
private static final String STR_GT = "GT";
private static final String ALT_GT = ">";
private static final int OP_LT = IndexQuery.LT;
private static final String STR_LT = "LT";
private static final String ALT_LT = "<";
private static final int OP_GTE = IndexQuery.GTE;
private static final String STR_GTE = "GTE";
private static final String ALT_GTE = ">=";
private static final int OP_LTE = IndexQuery.LTE;
private static final String STR_LTE = "LTE";
private static final String ALT_LTE = "<=";
private static final int OP_SW = IndexQuery.SW;
private static final String STR_SW = "SW";
private static final String ALT_SW = "STARTS-WITH";
private static final int OP_NSW = IndexQuery.NSW;
private static final String STR_NSW = "NSW";
private static final String ALT_NSW = "!STARTS-WITH";
private static final int[] OPS = {
OP_EQ, OP_NEQ, OP_GT, OP_LT,
OP_GTE, OP_LTE, OP_SW, OP_NSW
};
private static final String[] STRS = {
STR_EQ, STR_NEQ, STR_GT, STR_LT,
STR_GTE, STR_LTE, STR_SW, STR_NSW
};
private static final String[] ALTS = {
ALT_EQ, ALT_NEQ, ALT_GT, ALT_LT,
ALT_GTE, ALT_LTE, ALT_SW, ALT_NSW
};
private static final int TOKEN_WORD = 0;
private static final int TOKEN_WHITESPACE = 1;
private static final int TOKEN_OPERATOR = 2;
private static final int TOKEN_STRING = 3;
private QueryNode queryNode;
private String xpath;
public FullTextQuery(Collection context, String query, NamespaceMap nsMap, Key[] keys) throws QueryException {
super(context, query, nsMap, keys);
}
public void compileQuery() throws CompilationException {
if ( query.trim().startsWith("<") ) {
try {
Document d = DOMHelper.parseText(query);
queryNode = buildQueryTree(d);
return;
}
catch ( SAXException e ) {
}
catch ( IOException e ) {
/** @todo Through a CompilationException? */
}
}
queryNode = buildFromWords(query);
}
public String getQueryStyle() {
return FullTextQueryResolver.STYLE_FULLTEXT;
}
public ResultSet execute(Transaction tx) throws QueryException {
Key[] newKeys = evaluate(tx, queryNode);
if ( keys != null )
newKeys = QueryEngine.andKeySets(new Key[][]{keys, newKeys});
if ( xpath != null ) {
try {
return context.queryDocument(tx, XPathQueryResolver.STYLE_XPATH, xpath, nsMap, newKeys);
}
catch ( QueryException e ) {
throw e;
}
catch ( DBException e ) {
throw new ProcessingException(e);
}
}
else
return new FullTextResultSet(tx, context, this, newKeys);
}
private Key[] evaluate(Transaction tx, QueryNode node) throws QueryException {
switch ( node.getNodeType() ) {
case NODE_AND:
return QueryEngine.andKeySets(evaluateChildren(tx, node));
case NODE_OR:
return QueryEngine.orKeySets(evaluateChildren(tx, node));
case NODE_SELECT:
return evaluateSelect(tx, (SelectQueryNode)node);
default:
throw new ProcessingException("What is this QueryNode?");
}
}
private Key[][] evaluateChildren(Transaction tx, QueryNode node) throws QueryException {
if ( node.hasChildren() ) {
List list = new ArrayList();
Iterator iter = node.getChildren().iterator();
while ( iter.hasNext() ) {
Key[] keys = evaluate(tx, (QueryNode)iter.next());
if ( keys != null )
list.add(keys);
}
return (Key[][])list.toArray(EmptyKeySet);
}
else
throw new ProcessingException("Why are there no children?");
}
private Key[] evaluateSelect(Transaction tx, SelectQueryNode node) throws QueryException {
try {
IndexPattern pattern = new IndexPattern(context.getSymbols(), node.name, nsMap);
Indexer idx = context.getIndexManager().getBestIndexer(Indexer.STYLE_FULLTEXT, pattern);
if ( idx != null ) {
String value = node.value;
// If it's a FullTextIndexer, check it for stop words instead
// of querying it for no reason.
if ( idx instanceof FullTextIndexer ) {
FullTextIndexer fidx = (FullTextIndexer)idx;
WordStemmer stemmer = fidx.getWordStemmer();
if ( stemmer != null )
value = stemmer.normalizeCase(value);
Set stopWords = fidx.getStopWords();
if ( stopWords != null && stopWords.contains(value) )
return null;
}
IndexQuery idxQry = new IndexQuery(pattern, node.operator, new Value(node.value));
IndexMatch[] matches = idx.queryMatches(tx, idxQry);
return QueryEngine.getUniqueKeys(matches);
}
else
throw new ProcessingException("No FullText Index to support pattern '"+node.name+"'");
}
catch ( ProcessingException e ) {
throw e;
}
catch ( DBException e ) {
throw new ProcessingException(e);
}
}
private QueryNode buildQueryTree(Document doc) throws CompilationException {
Element rootElem = doc.getDocumentElement();
if ( rootElem.getNamespaceURI().equals(Query.NSURI) && rootElem.getLocalName().equals(FULLTEXT) ) {
String xp = rootElem.getAttribute(XPATH);
if ( xp != null && xp.trim().length() > 0 )
xpath = xp;
QueryNode root = new AndQueryNode();
buildChildNodes(rootElem, root);
return root;
}
else
throw new CompilationException("This is not a valid query");
}
private void buildChildNodes(Element parentElem, QueryNode parentNode) throws CompilationException {
NodeList nl = parentElem.getChildNodes();
for ( int i = 0; i < nl.getLength(); i++ ) {
Node n = nl.item(i);
switch ( n.getNodeType() ) {
case Node.ELEMENT_NODE:
Element elem = (Element)n;
String name = elem.getTagName();
if ( name.equals(AND) ) {
AndQueryNode node = new AndQueryNode();
parentNode.addChild(node);
buildChildNodes(elem, node);
}
else if ( name.equals(OR) ) {
OrQueryNode node = new OrQueryNode();
parentNode.addChild(node);
buildChildNodes(elem, node);
}
else if ( name.equals(SELECT) ) {
String nodeName = elem.getAttribute(NAME);
String opName = elem.getAttribute(OPERATOR);
NodeList cl = elem.getChildNodes();
StringBuffer sb = new StringBuffer();
for ( int j = 0; j < cl.getLength(); j++ ) {
Node cn = cl.item(j);
switch ( cn.getNodeType() ) {
case Node.TEXT_NODE:
sb.append(((Text)cn).getData());
break;
case Node.ELEMENT_NODE:
throw new CompilationException("Text is only allowed in '"+SELECT+"'");
}
}
StringTokenizer st = new StringTokenizer(sb.toString());
QueryNode pn = parentNode;
if ( st.countTokens() > 1 ) {
pn = new AndQueryNode();
parentNode.addChild(pn);
}
while ( st.hasMoreTokens() ) {
String value = st.nextToken();
SelectQueryNode node = new SelectQueryNode(nodeName, value, opLookup(opName));
pn.addChild(node);
}
}
else
throw new CompilationException("Unknown element '"+name+"'");
break;
case Node.TEXT_NODE:
if ( ((Text)n).getData().trim().length() > 0 )
throw new CompilationException("Text is only allowed in '"+SELECT+"'");
}
}
}
/**
* buildFromWords exposes a simplified query syntax that adheres to the
* following format:
*
* (element[@attribute]=(word|"word*"))*
*
* So you can do stuff like:
*
* interests="coffee cigarettes"
* person@keywords=caffeine
* *="jamaican blue"
* *@*=cup
*/
private QueryNode buildFromWords(String words) throws CompilationException {
QueryNode root = new AndQueryNode();
StringTokenizer st = new StringTokenizer(words, "=\" \t\n\r", true);
while ( st.hasMoreTokens() ) {
String name = getNextToken(st, "Name");
if ( getTokenType(name) != TOKEN_WORD )
throw new CompilationException("Name Token Expected");
String operator = getNextToken(st, "Operator");
if ( getTokenType(operator) != TOKEN_OPERATOR )
throw new CompilationException("Operator Token Expected");
String value = getNextToken(st, "Value");
switch ( getTokenType(value) ) {
case TOKEN_STRING:
boolean done = false;
while ( !done ) {
value = getNextToken(st, "Word");
switch ( getTokenType(value) ) {
case TOKEN_STRING:
done = true;
break;
case TOKEN_WORD:
root.addChild(new SelectQueryNode(name, value));
break;
default:
throw new CompilationException("String Token Expected");
}
}
break;
case TOKEN_WORD:
root.addChild(new SelectQueryNode(name, value));
break;
default:
throw new CompilationException("Word Or String Token Expected");
}
}
return root;
}
private int getTokenType(String token) {
if ( token.length() == 1 ) {
switch ( token.charAt(0) ) {
case '=':
return TOKEN_OPERATOR;
case '\"':
return TOKEN_STRING;
default:
return TOKEN_WHITESPACE;
}
}
else
return TOKEN_WORD;
}
private String getNextToken(StringTokenizer st, String expecting) throws CompilationException {
String token = null;
do {
if ( !st.hasMoreTokens() )
throw new CompilationException(expecting+" Token Expected");
token = st.nextToken();
}
while ( getTokenType(token) == TOKEN_WHITESPACE );
return token;
}
private int opLookup(String opName) throws CompilationException {
if ( opName != null && opName.length() > 0 ) {
String opUpper = opName.toUpperCase();
for ( int i = 0; i < OPS.length; i++ )
if ( opUpper.equals(STRS[i]) || opUpper.equals(ALTS[i]) )
return OPS[i];
throw new CompilationException("Unknown operator '"+opName+"'");
}
else
return OP_EQ;
}
/**
* QueryNode
*/
private abstract class QueryNode {
public List children;
public QueryNode() {
}
public void addChild(QueryNode node) {
if ( children == null )
children = new ArrayList();
children.add(node);
}
public boolean hasChildren() {
return children != null;
}
public List getChildren() {
return children;
}
public abstract int getNodeType();
}
/**
* AndQueryNode represents a node set where the resulting keys will be
* produced as a result of an ANDing operation.
*/
private class AndQueryNode extends QueryNode {
public int getNodeType() {
return NODE_AND;
}
}
/**
* OrQueryNode represents a node set where the resulting keys will be
* produced as a result of an ORing operation.
*/
private class OrQueryNode extends QueryNode {
public int getNodeType() {
return NODE_OR;
}
}
/**
* SelectQueryNode performs a full text search based on the specified
* value and operator.
*/
private class SelectQueryNode extends QueryNode {
public int operator = OP_EQ;
public String name;
public String value;
public SelectQueryNode(String name, String value, int operator) {
this.name = name;
this.value = value;
this.operator = operator;
}
public SelectQueryNode(String name, String value) {
this.name = name;
this.value = value;
}
public int getNodeType() {
return NODE_SELECT;
}
}
}