Package com.dbxml.db.common.fulltext

Source Code of com.dbxml.db.common.fulltext.FullTextQuery

package com.dbxml.db.common.fulltext;

/*
* dbXML - Native XML Database
* Copyright (c) 1999-2006 The dbXML Group, L.L.C.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* $Id: FullTextQuery.java,v 1.5 2006/02/02 18:53:52 bradford Exp $
*/

import com.dbxml.db.core.query.*;

import com.dbxml.db.common.query.QueryBase;
import com.dbxml.db.common.xpath.XPathQueryResolver;
import com.dbxml.db.core.Collection;
import com.dbxml.db.core.DBException;
import com.dbxml.db.core.data.Key;
import com.dbxml.db.core.data.Value;
import com.dbxml.db.core.indexer.IndexMatch;
import com.dbxml.db.core.indexer.IndexPattern;
import com.dbxml.db.core.indexer.IndexQuery;
import com.dbxml.db.core.indexer.Indexer;
import com.dbxml.db.core.transaction.Transaction;
import com.dbxml.xml.NamespaceMap;
import com.dbxml.xml.dom.DOMHelper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;

/**
* FullTextQuery
*/

public final class FullTextQuery extends QueryBase {
   private static final Key[][] EmptyKeySet = new Key[0][0];

   private static final String FULLTEXT = "fulltext";
   private static final String XPATH = "xpath";
   private static final String AND = "and";
   private static final String OR = "or";
   private static final String SELECT = "select";
   private static final String NAME = "name";
   private static final String OPERATOR = "operator";

   private static final int NODE_AND = 0;
   private static final int NODE_OR = 1;
   private static final int NODE_SELECT = 2;

   private static final int OP_EQ = IndexQuery.EQ;
   private static final String STR_EQ = "EQ";
   private static final String ALT_EQ = "=";

   private static final int OP_NEQ = IndexQuery.NEQ;
   private static final String STR_NEQ = "NEQ";
   private static final String ALT_NEQ = "!=";

   private static final int OP_GT = IndexQuery.GT;
   private static final String STR_GT = "GT";
   private static final String ALT_GT = ">";

   private static final int OP_LT = IndexQuery.LT;
   private static final String STR_LT = "LT";
   private static final String ALT_LT = "<";

   private static final int OP_GTE = IndexQuery.GTE;
   private static final String STR_GTE = "GTE";
   private static final String ALT_GTE = ">=";

   private static final int OP_LTE = IndexQuery.LTE;
   private static final String STR_LTE = "LTE";
   private static final String ALT_LTE = "<=";

   private static final int OP_SW = IndexQuery.SW;
   private static final String STR_SW = "SW";
   private static final String ALT_SW = "STARTS-WITH";

   private static final int OP_NSW = IndexQuery.NSW;
   private static final String STR_NSW = "NSW";
   private static final String ALT_NSW = "!STARTS-WITH";

   private static final int[] OPS = {
      OP_EQ, OP_NEQ, OP_GT, OP_LT,
      OP_GTE, OP_LTE, OP_SW, OP_NSW
   };

   private static final String[] STRS = {
      STR_EQ, STR_NEQ, STR_GT, STR_LT,
      STR_GTE, STR_LTE, STR_SW, STR_NSW
   };

   private static final String[] ALTS = {
      ALT_EQ, ALT_NEQ, ALT_GT, ALT_LT,
      ALT_GTE, ALT_LTE, ALT_SW, ALT_NSW
   };

   private static final int TOKEN_WORD = 0;
   private static final int TOKEN_WHITESPACE = 1;
   private static final int TOKEN_OPERATOR = 2;
   private static final int TOKEN_STRING = 3;

   private QueryNode queryNode;
   private String xpath;

   public FullTextQuery(Collection context, String query, NamespaceMap nsMap, Key[] keys) throws QueryException {
      super(context, query, nsMap, keys);
   }

   public void compileQuery() throws CompilationException {
      if ( query.trim().startsWith("<") ) {
         try {
            Document d = DOMHelper.parseText(query);
            queryNode = buildQueryTree(d);
            return;
         }
         catch ( SAXException e ) {
         }
         catch ( IOException e ) {
            /** @todo Through a CompilationException? */
         }
      }

      queryNode = buildFromWords(query);
   }

   public String getQueryStyle() {
      return FullTextQueryResolver.STYLE_FULLTEXT;
   }

   public ResultSet execute(Transaction tx) throws QueryException {
      Key[] newKeys = evaluate(tx, queryNode);

      if ( keys != null )
         newKeys = QueryEngine.andKeySets(new Key[][]{keys, newKeys});

      if ( xpath != null ) {
         try {
            return context.queryDocument(tx, XPathQueryResolver.STYLE_XPATH, xpath, nsMap, newKeys);
         }
         catch ( QueryException e ) {
            throw e;
         }
         catch ( DBException e ) {
            throw new ProcessingException(e);
         }
      }
      else
         return new FullTextResultSet(tx, context, this, newKeys);
   }

   private Key[] evaluate(Transaction tx, QueryNode node) throws QueryException {
      switch ( node.getNodeType() ) {
         case NODE_AND:
            return QueryEngine.andKeySets(evaluateChildren(tx, node));

         case NODE_OR:
            return QueryEngine.orKeySets(evaluateChildren(tx, node));

         case NODE_SELECT:
            return evaluateSelect(tx, (SelectQueryNode)node);

         default:
            throw new ProcessingException("What is this QueryNode?");
      }
   }

   private Key[][] evaluateChildren(Transaction tx, QueryNode node) throws QueryException {
      if ( node.hasChildren() ) {
         List list = new ArrayList();

         Iterator iter = node.getChildren().iterator();
         while ( iter.hasNext() ) {
            Key[] keys = evaluate(tx, (QueryNode)iter.next());
            if ( keys != null )
               list.add(keys);
         }

         return (Key[][])list.toArray(EmptyKeySet);
      }
      else
         throw new ProcessingException("Why are there no children?");
   }

   private Key[] evaluateSelect(Transaction tx, SelectQueryNode node) throws QueryException {
      try {
         IndexPattern pattern = new IndexPattern(context.getSymbols(), node.name, nsMap);
         Indexer idx = context.getIndexManager().getBestIndexer(Indexer.STYLE_FULLTEXT, pattern);
         if ( idx != null ) {
            String value = node.value;

            // If it's a FullTextIndexer, check it for stop words instead
            // of querying it for no reason.
            if ( idx instanceof FullTextIndexer ) {
               FullTextIndexer fidx = (FullTextIndexer)idx;
               WordStemmer stemmer = fidx.getWordStemmer();
               if ( stemmer != null )
                  value = stemmer.normalizeCase(value);

               Set stopWords = fidx.getStopWords();
               if ( stopWords != null && stopWords.contains(value) )
                  return null;
            }

            IndexQuery idxQry = new IndexQuery(pattern, node.operator, new Value(node.value));
            IndexMatch[] matches = idx.queryMatches(tx, idxQry);
            return QueryEngine.getUniqueKeys(matches);
         }
         else
            throw new ProcessingException("No FullText Index to support pattern '"+node.name+"'");
      }
      catch ( ProcessingException e ) {
         throw e;
      }
      catch ( DBException e ) {
         throw new ProcessingException(e);
      }
   }

   private QueryNode buildQueryTree(Document doc) throws CompilationException {
      Element rootElem = doc.getDocumentElement();
      if ( rootElem.getNamespaceURI().equals(Query.NSURI) && rootElem.getLocalName().equals(FULLTEXT) ) {
         String xp = rootElem.getAttribute(XPATH);
         if ( xp != null && xp.trim().length() > 0 )
            xpath = xp;
         QueryNode root = new AndQueryNode();
         buildChildNodes(rootElem, root);
         return root;
      }
      else
         throw new CompilationException("This is not a valid query");
   }

   private void buildChildNodes(Element parentElem, QueryNode parentNode) throws CompilationException {
      NodeList nl = parentElem.getChildNodes();
      for ( int i = 0; i < nl.getLength(); i++ ) {
         Node n = nl.item(i);
         switch ( n.getNodeType() ) {
            case Node.ELEMENT_NODE:
               Element elem = (Element)n;
               String name = elem.getTagName();
               if ( name.equals(AND) ) {
                  AndQueryNode node = new AndQueryNode();
                  parentNode.addChild(node);
                  buildChildNodes(elem, node);
               }
               else if ( name.equals(OR) ) {
                  OrQueryNode node = new OrQueryNode();
                  parentNode.addChild(node);
                  buildChildNodes(elem, node);
               }
               else if ( name.equals(SELECT) ) {
                  String nodeName = elem.getAttribute(NAME);
                  String opName = elem.getAttribute(OPERATOR);
                  NodeList cl = elem.getChildNodes();
                  StringBuffer sb = new StringBuffer();
                  for ( int j = 0; j < cl.getLength(); j++ ) {
                     Node cn = cl.item(j);
                     switch ( cn.getNodeType() ) {
                        case Node.TEXT_NODE:
                           sb.append(((Text)cn).getData());
                           break;

                        case Node.ELEMENT_NODE:
                           throw new CompilationException("Text is only allowed in '"+SELECT+"'");
                     }
                  }
                  StringTokenizer st = new StringTokenizer(sb.toString());

                  QueryNode pn = parentNode;
                  if ( st.countTokens() > 1 ) {
                     pn = new AndQueryNode();
                     parentNode.addChild(pn);
                  }

                  while ( st.hasMoreTokens() ) {
                     String value = st.nextToken();
                     SelectQueryNode node = new SelectQueryNode(nodeName, value, opLookup(opName));
                     pn.addChild(node);
                  }
               }
               else
                  throw new CompilationException("Unknown element '"+name+"'");
               break;

            case Node.TEXT_NODE:
               if ( ((Text)n).getData().trim().length() > 0 )
                  throw new CompilationException("Text is only allowed in '"+SELECT+"'");
         }
      }
   }

   /**
    * buildFromWords exposes a simplified query syntax that adheres to the
    * following format:
    *
    *    (element[@attribute]=(word|"word*"))*
    *
    * So you can do stuff like:
    *
    *    interests="coffee cigarettes"
    *    person@keywords=caffeine
    *    *="jamaican blue"
    *    *@*=cup
    */
   private QueryNode buildFromWords(String words) throws CompilationException {
      QueryNode root = new AndQueryNode();
      StringTokenizer st = new StringTokenizer(words, "=\" \t\n\r", true);
      while ( st.hasMoreTokens() ) {
         String name = getNextToken(st, "Name");
         if ( getTokenType(name) != TOKEN_WORD )
            throw new CompilationException("Name Token Expected");

         String operator = getNextToken(st, "Operator");
         if ( getTokenType(operator) != TOKEN_OPERATOR )
            throw new CompilationException("Operator Token Expected");

         String value = getNextToken(st, "Value");
         switch ( getTokenType(value) ) {
            case TOKEN_STRING:
               boolean done = false;
               while ( !done ) {
                  value = getNextToken(st, "Word");
                  switch ( getTokenType(value) ) {
                     case TOKEN_STRING:
                        done = true;
                        break;
                     case TOKEN_WORD:
                        root.addChild(new SelectQueryNode(name, value));
                        break;
                     default:
                        throw new CompilationException("String Token Expected");
                  }
               }
               break;

            case TOKEN_WORD:
               root.addChild(new SelectQueryNode(name, value));
               break;

            default:
               throw new CompilationException("Word Or String Token Expected");
         }
      }
      return root;
   }

   private int getTokenType(String token) {
      if ( token.length() == 1 ) {
         switch ( token.charAt(0) ) {
            case '=':
               return TOKEN_OPERATOR;
            case '\"':
               return TOKEN_STRING;
            default:
               return TOKEN_WHITESPACE;
         }
      }
      else
         return TOKEN_WORD;
   }

   private String getNextToken(StringTokenizer st, String expecting) throws CompilationException {
      String token = null;
      do {
         if ( !st.hasMoreTokens() )
            throw new CompilationException(expecting+" Token Expected");
         token = st.nextToken();
      }
      while ( getTokenType(token) == TOKEN_WHITESPACE );
      return token;
   }

   private int opLookup(String opName) throws CompilationException {
      if ( opName != null && opName.length() > 0 ) {
         String opUpper = opName.toUpperCase();
         for ( int i = 0; i < OPS.length; i++ )
            if ( opUpper.equals(STRS[i]) || opUpper.equals(ALTS[i]) )
               return OPS[i];
         throw new CompilationException("Unknown operator '"+opName+"'");
      }
      else
         return OP_EQ;
   }


   /**
    * QueryNode
    */

   private abstract class QueryNode {
      public List children;

      public QueryNode() {
      }

      public void addChild(QueryNode node) {
         if ( children == null )
            children = new ArrayList();
         children.add(node);
      }

      public boolean hasChildren() {
         return children != null;
      }

      public List getChildren() {
         return children;
      }

      public abstract int getNodeType();
   }


   /**
    * AndQueryNode represents a node set where the resulting keys will be
    * produced as a result of an ANDing operation.
    */

   private class AndQueryNode extends QueryNode {
      public int getNodeType() {
         return NODE_AND;
      }
   }


   /**
    * OrQueryNode represents a node set where the resulting keys will be
    * produced as a result of an ORing operation.
    */

   private class OrQueryNode extends QueryNode {
      public int getNodeType() {
         return NODE_OR;
      }
   }


   /**
    * SelectQueryNode performs a full text search based on the specified
    * value and operator.
    */

   private class SelectQueryNode extends QueryNode {
      public int operator = OP_EQ;
      public String name;
      public String value;

      public SelectQueryNode(String name, String value, int operator) {
         this.name = name;
         this.value = value;
         this.operator = operator;
      }

      public SelectQueryNode(String name, String value) {
         this.name = name;
         this.value = value;
      }

      public int getNodeType() {
         return NODE_SELECT;
      }
   }
}
TOP

Related Classes of com.dbxml.db.common.fulltext.FullTextQuery

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.