Package org.exist.storage.analysis

Examples of org.exist.storage.analysis.Tokenizer


        return path.getDependencies();
    }

    protected String[] getSearchTerms(String searchString) throws EXistException {
        final List<String> tokens = new ArrayList<String>();
        final Tokenizer tokenizer = context.getBroker().getTextEngine().getTokenizer();
        tokenizer.setText(searchString);
        org.exist.storage.analysis.TextToken token;
        String word;
        while (null != (token = tokenizer.nextToken(true))) {
            word = token.getText();
            tokens.add(word);
        }
        final String[] terms = new String[tokens.size()];
        return tokens.toArray(terms);
View Full Code Here


    }

    private Sequence exactMatch(XQueryContext context, String[] terms, NodeSet result) {
        //Walk through hits and calculate term-distances
        final NodeSet r = new ExtArrayNodeSet();
        final Tokenizer tok = context.getBroker().getTextEngine().getTokenizer();
        String term;
        for (final NodeProxy current : result) {
            final String value = current.getNodeValueSeparated();
            tok.setText(value);
            int j = 0;
            if (j < terms.length) {
                term = terms[j];
            } else {
                break;
            }
            int current_distance = -1;
            TextToken token;
            while ((token = tok.nextToken()) != null) {
                final String word = token.getText().toLowerCase();
                if (current_distance > max_distance) {
                    // reset
                    j = 0;
                    term = terms[j];
View Full Code Here

                throw new XPathException("Malformed pattern: " + patterns[i]);
            }
        }
        //Walk through hits and calculate term-distances
        final ExtArrayNodeSet r = new ExtArrayNodeSet(100);
        final Tokenizer tok = context.getBroker().getTextEngine().getTokenizer();
        Matcher matcher;
        TextToken token;
        for (final NodeProxy current : result) {
            final String value = current.getNodeValueSeparated();
            tok.setText(value);
            int j = 0;
            if (j < patterns.length) {
                matcher = matchers[j];
            } else {
                break;
            }
            int current_distance = -1;
            while ((token = tok.nextToken()) != null) {
                final String word = token.getText().toLowerCase();
                if (current_distance > max_distance) {
                    //Reset
                    j = 0;
                    matcher = matchers[j];
View Full Code Here

     * @param result
     */
    private Sequence exactMatch(XQueryContext context, String[] terms, NodeSet result) {
        TextToken token;
        final NodeSet r = new ExtArrayNodeSet();
        final Tokenizer tok = context.getBroker().getTextEngine().getTokenizer();
        //Define search phrase for matches
        String matchTerm = "";
        for (int k = 0; k < terms.length ; k++) {
            matchTerm = matchTerm + terms[k];
            if (k != terms.length - 1)
                {matchTerm = matchTerm + "\\W*";}
        }
        //Iterate on results
        for (final NodeProxy current : result) {
            final Vector<NodeId> matchNodeIDs = new Vector<NodeId>();
            //Get first match
            Match nextMatch = current.getMatches();
            //Remove previously found matches on current
            current.setMatches(null);
            //Iterate on attach matches, with unicity of related nodeproxy gid
            String term;
            while(nextMatch != null) {
                final NodeId nodeId= nextMatch.getNodeId();
                //If current node id has not been previously processed
                if (!matchNodeIDs.contains(nodeId)) {
                    final NodeProxy mcurrent = new NodeProxy(current.getDocument(), nodeId);
                    Match match = null;
                    int firstOffset = -1;
                    matchNodeIDs.add(nodeId);
                    final String value = mcurrent.getNodeValue();
                    tok.setText(value);
                    int j = 0;
                    if (j < terms.length)
                        {term = terms[j];}
                    else
                        {break;}
                    int frequency = 0;
                    while ((token = tok.nextToken()) != null) {
                        final String word = token.getText().toLowerCase();
                        if (word.equalsIgnoreCase(term)) {
                            j++;
                            if (j == terms.length) {
                                //All terms found
View Full Code Here

                return Sequence.EMPTY_SEQUENCE;
            }
        }
        //Walk through hits
        final ExtArrayNodeSet r = new ExtArrayNodeSet();
        final Tokenizer tok = context.getBroker().getTextEngine().getTokenizer();
        Matcher matcher;
        for (final NodeProxy current : result) {
            Match nextMatch;
            final Vector<NodeId> matchGid = new Vector<NodeId>();
            //Get first match
            nextMatch = current.getMatches();
            //Remove previously found matches on current
            current.setMatches(null);
            //Iterate on attach matches, with unicity of related nodeproxy gid
            while (nextMatch != null) {
                final Hashtable<String, Match> matchTable = new Hashtable<String, Match>();
                final NodeId nodeId = nextMatch.getNodeId();
                //If current node id has not been previously processed
                if (!matchGid.contains(nodeId)) {
                    final NodeProxy mcurrent = new NodeProxy(current.getDocument(), nodeId);
                    //Add it in node id array
                    matchGid.add(nodeId);
                    final String value = mcurrent.getNodeValue();
                    tok.setText(value);
                    int j = 0;
                    if (j < patterns.length) {
                        matcher = matchers[j];
                    } else
                        {break;}
                    String matchTerm = null;
                    TextToken token;
                    while ((token = tok.nextToken()) != null) {
                        String word = token.getText().toLowerCase();
                        matcher.reset(word);
                        matchers[0].reset(word);
                        if (matcher.matches()) {
                            j++;
View Full Code Here

                    Sequence contextSequence)
      throws XPathException {
    final String searchString = getArgument(1).eval(contextSequence)
        .getStringValue();
    final List<String> tokens = new ArrayList<String>();
    final Tokenizer tokenizer = context.getBroker().getTextEngine()
        .getTokenizer();
    tokenizer.setText(searchString);
    org.exist.storage.analysis.TextToken token;
    String word;
    while (null != (token = tokenizer.nextToken(true))) {
      word = token.getText();
      tokens.add(word);
    }
    return tokens;
  }
View Full Code Here

        return result;
    }
   
    protected String[] getSearchTerms(String searchString) {
        final List<String> tokens = new ArrayList<String>();
        final Tokenizer tokenizer =
            context.getBroker().getTextEngine().getTokenizer();
        tokenizer.setText(searchString);
        org.exist.storage.analysis.TextToken token;
        String word;
        while (null != (token = tokenizer.nextToken(true))) {
            word = token.getText();
            tokens.add(word);
        }
        String[] terms = new String[tokens.size()];
        terms = tokens.toArray(terms);
View Full Code Here

TOP

Related Classes of org.exist.storage.analysis.Tokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.