Examples of org.exist.dom.Match$F

org.exist.dom.Match
Used to track fulltext matches throughout the query. {@link org.exist.storage.TextSearchEngine} will add amatch object to every {@link org.exist.dom.NodeProxy}that triggered a fulltext match for every term matched. The Match object contains the nodeId of the text node that triggered the match, the string value of the matching term and a frequency count, indicating the frequency of the matching term string within the corresponding single text node. All path operations copy existing match objects, i.e. the match objects are copied to the selected descendant or child nodes. This means that every NodeProxy being the direct or indirect result of a fulltext selection will have one or more match objects, indicating which text nodes among its descendant nodes contained a fulltext match. @author wolf

                // the current string of text
                for (int i = 0; i < offsetStack.size(); i++) {
                    NodeOffset no = offsetStack.get(i);
                    int end = no.offset + seq.length();
                    // scan all matches
                    Match next = match;
                    while (next != null) {
                        if (next.getIndexId() == NGramIndex.ID && next.getNodeId().equals(no.nodeId)) {
                            int freq = next.getFrequency();
                            for (int j = 0; j < freq; j++) {
                                Match.Offset offset = next.getOffset(j);
                                if (offset.getOffset() < end &&
                                    offset.getOffset() + offset.getLength() > no.offset) {
                                    // add it to the list to be processed
                                    if (offsets == null) {
                                        offsets = new ArrayList<Match.Offset>(4);
                                    }
                                    // adjust the offset and add it to the list
                                    int start = offset.getOffset() - no.offset;
                                    int len = offset.getLength();
                                    if (start < 0) {
                                        len = len - Math.abs(start);
                                        start = 0;
                                    }
                                    if (start + len > seq.length())
                                        len = seq.length() - start;
                                    offsets.add(new Match.Offset(start, len));
                                }
                            }
                        }
                        next = next.getNextMatch();
                    }
                    // add the length of the current text to the element content length
                    no.offset = end;
                }
            }

View Full Code Here


        private void readMatches(String current, VariableByteInput is, NodeId nodeId, int freq, NodeProxy parentNode) throws IOException {
            int diff = 0;
            if (current.length() > ngram.length())
                diff = current.lastIndexOf(ngram);
            Match match = new NGramMatch(contextId, nodeId, ngram, freq);
            for (int n = 0; n < freq; n++) {
                int offset = is.readInt();
                if (diff > 0)
                    offset += diff;
                match.addOffset(offset, ngram.length());
            }
            parentNode.addMatch(match);
        }

View Full Code Here

    public FTMatchListener(DBBroker broker, NodeProxy proxy) {
        reset(broker, proxy);
    }


    public boolean hasMatches(NodeProxy proxy) {
        Match nextMatch = proxy.getMatches();
        while (nextMatch != null) {
            if (nextMatch.getIndexId() == FTIndex.ID) {
                return true;
            }
            nextMatch = nextMatch.getNextMatch();
        }
        return false;
    }

View Full Code Here

        * in the current node. For example, if the indexed node is &lt;a>abc&lt;b>de&lt;/b></a>
        * and we query for //a[text:ngram-contains(., 'de')]/b, proxy will be a &lt;b> node, but
        * the offsets of the matches are relative to the start of &lt;a>.
        */
        NodeSet ancestors = null;
        Match nextMatch = this.match;
        while (nextMatch != null) {
            if (proxy.getNodeId().isDescendantOf(nextMatch.getNodeId())) {
                if (ancestors == null)
                    {ancestors = new ExtArrayNodeSet();}
                ancestors.add(new NodeProxy(proxy.getDocument(), nextMatch.getNodeId()));
            }
            nextMatch = nextMatch.getNextMatch();
        }
        if (ancestors != null && !ancestors.isEmpty()) {
            for (final Iterator<NodeProxy> i = ancestors.iterator(); i.hasNext();) {
                final NodeProxy p = i.next();
                int startOffset = 0;

View Full Code Here

        }
    }


    @Override
    public void startElement(QName qname, AttrList attribs) throws SAXException {
        Match nextMatch = match;
        // check if there are any matches in the current element
        // if yes, push a NodeOffset object to the stack to track
        // the node contents
        while (nextMatch != null) {
            if (nextMatch.getNodeId().equals(getCurrentNode().getNodeId())) {
                if (offsetStack == null)
                    {offsetStack = new Stack<NodeOffset>();}
                offsetStack.push(new NodeOffset(nextMatch.getNodeId()));
                break;
            }
            nextMatch = nextMatch.getNextMatch();
        }
        super.startElement(qname, attribs);
    }

View Full Code Here

        super.startElement(qname, attribs);
    }


    @Override
    public void endElement(QName qname) throws SAXException {
        Match nextMatch = match;
        // check if we need to pop the stack
        while (nextMatch != null) {
            if (nextMatch.getNodeId().equals(getCurrentNode().getNodeId())) {
                offsetStack.pop();
                break;
            }
            nextMatch = nextMatch.getNextMatch();
        }
        super.endElement(qname);
    }

View Full Code Here

            // the current string of text
            for (int i = 0; i < offsetStack.size(); i++) {
                final NodeOffset no = offsetStack.get(i);
                int end = no.offset + seq.length();
                // scan all matches
                Match next = match;
                while (next != null) {
                    if (next.getIndexId() == FTIndex.ID && next.getNodeId().equals(no.nodeId)) {
                        final int freq = next.getFrequency();
                        for (int j = 0; j < freq; j++) {
                            final Match.Offset offset = next.getOffset(j);
                            if (offset.getOffset() < end &&
                                offset.getOffset() + offset.getLength() > no.offset) {
                                // add it to the list to be processed
                                if (offsets == null) {
                                    offsets = new ArrayList<Match.Offset>(4);
                                }
                                // adjust the offset and add it to the list
                                int start = offset.getOffset() - no.offset;
                                int len = offset.getLength();
                                if (start < 0) {
                                    len = len - Math.abs(start);
                                    start = 0;
                                }
                                if (start + len > seq.length())
                                    {len = seq.length() - start;}
                                offsets.add(new Match.Offset(start, len));
                            }
                        }
                    }
                    next = next.getNextMatch();
                }
                // add the length of the current text to the element content length
                no.offset = end;
            }
        }
        // walk through the matches a second time to find matches in the text node itself
        Match next = match;
        while (next != null) {
            if (next.getIndexId() == FTIndex.ID &&
                next.getNodeId().equals(getCurrentNode().getNodeId())) {
                if (offsets == null)
                    {offsets = new ArrayList<Match.Offset>();}
                final int freq = next.getFrequency();
                for (int i = 0; i < freq; i++) {
                    offsets.add(next.getOffset(i));
                }
            }
            next = next.getNextMatch();
        }
        // now print out the text, marking all matches with a match element
        if (offsets != null) {
            FastQSort.sort(offsets, 0, offsets.size() - 1);
            final String s = seq.toString();

View Full Code Here

        }
        //Iterate on results
        for (final NodeProxy current : result) {
            final Vector<NodeId> matchNodeIDs = new Vector<NodeId>();
            //Get first match
            Match nextMatch = current.getMatches();
            //Remove previously found matches on current
            current.setMatches(null);
            //Iterate on attach matches, with unicity of related nodeproxy gid
            String term;
            while(nextMatch != null) {
                final NodeId nodeId= nextMatch.getNodeId();
                //If current node id has not been previously processed
                if (!matchNodeIDs.contains(nodeId)) {
                    final NodeProxy mcurrent = new NodeProxy(current.getDocument(), nodeId);
                    Match match = null;
                    int firstOffset = -1;
                    matchNodeIDs.add(nodeId);
                    final String value = mcurrent.getNodeValue();
                    tok.setText(value);
                    int j = 0;
                    if (j < terms.length)
                        {term = terms[j];}
                    else
                        {break;}
                    int frequency = 0;
                    while ((token = tok.nextToken()) != null) {
                        final String word = token.getText().toLowerCase();
                        if (word.equalsIgnoreCase(term)) {
                            j++;
                            if (j == terms.length) {
                                //All terms found
                                if (match == null)
                                    {match = nextMatch.createInstance(getExpressionId(),
                                        nodeId, matchTerm);}
                                if (firstOffset < 0)
                                    {firstOffset = token.startOffset();}
                                match.addOffset(firstOffset, token.endOffset() - firstOffset);
                                frequency++;
                                //Start again on fist term
                                j = 0;
                                term = terms[j];
                                continue;

View Full Code Here

        //Walk through hits 
        final ExtArrayNodeSet r = new ExtArrayNodeSet();
        final Tokenizer tok = context.getBroker().getTextEngine().getTokenizer();
        Matcher matcher;
        for (final NodeProxy current : result) {
            Match nextMatch;
            final Vector<NodeId> matchGid = new Vector<NodeId>();
            //Get first match
            nextMatch = current.getMatches();
            //Remove previously found matches on current
            current.setMatches(null);
            //Iterate on attach matches, with unicity of related nodeproxy gid
            while (nextMatch != null) {
                final Hashtable<String, Match> matchTable = new Hashtable<String, Match>();
                final NodeId nodeId = nextMatch.getNodeId(); 
                //If current node id has not been previously processed
                if (!matchGid.contains(nodeId)) {
                    final NodeProxy mcurrent = new NodeProxy(current.getDocument(), nodeId);
                    //Add it in node id array
                    matchGid.add(nodeId);
                    final String value = mcurrent.getNodeValue();
                    tok.setText(value);
                    int j = 0;
                    if (j < patterns.length) {
                        matcher = matchers[j];
                    } else
                        {break;}
                    String matchTerm = null;
                    TextToken token;
                    while ((token = tok.nextToken()) != null) {
                        String word = token.getText().toLowerCase();
                        matcher.reset(word);
                        matchers[0].reset(word);
                        if (matcher.matches()) {
                            j++;
                            if (matchTerm == null)
                                {matchTerm=word;}
                            else
                                {matchTerm = matchTerm + "\\W*" + word;}
                            if (j == patterns.length) {
                                //All terms found
                                if (matchTable.containsKey(matchTerm)) {
                                    //Previously found matchTerm
                                    final Match match = matchTable.get(matchTerm);
                                    match.addOffset(token.startOffset(), matchTerm.length());
                                } else {
                                    final Match match = nextMatch.createInstance(getExpressionId(),
                                        nodeId, matchTerm);
                                    match.addOffset(token.startOffset(), matchTerm.length());
                                    matchTable.put(matchTerm,match);
                                }
                                //Start again on fist term
                                j = 0;
                                matcher = matchers[j];
                                matchTerm = null;
                                continue;
                            } else {
                                matcher = matchers[j];
                            }
                        } else if (j > 0 && matchers[0].matches()) {
                            //First search term found: start again
                            j = 1;
                            //Pattern term = patterns[j];
                            matcher = matchers[j];
                            matchTerm = word;
                            continue;
                        } else {
                            //Reset
                            j = 0;
                            matcher = matchers[j];
                            matchTerm = null;
                            continue;
                        }
                    }
                    //One or more match found
                    if (matchTable.size() != 0) {
                        final Enumeration<Match> eMatch = matchTable.elements();
                        while (eMatch.hasMoreElements()){
                            final Match match = eMatch.nextElement();
                            current.addMatch(match);
                        }
                        //Add current to result
                        r.add(current);
                    }

View Full Code Here

            // remember the first node, we need it later
            if (firstProxy == null)
                {firstProxy = proxy;}
            final TextImpl text = (TextImpl) proxy.getNode();
            
            Match next = proxy.getMatches();
            while (next != null) {
                if (next.getNodeId().equals(text.getNodeId())) {
                    if (offsets == null)
                        {offsets = new ArrayList<Match.Offset>();}
                    final int freq = next.getFrequency();
                    for (int j = 0; j < freq; j++) {
                        // translate the relative offset into an absolute offset and add it to the list
                        final Match.Offset offset = next.getOffset(j);
                        offset.setOffset(str.length() + offset.getOffset());
                        offsets.add(offset);
                    }
                }
                next = next.getNextMatch();
            }
            
            // append the string value of the node to the buffer
            str.append(text.getData());
        }
        
        // Second step: output the text
        ValueSequence result = new ValueSequence();
        final DocumentBuilderReceiver receiver = new DocumentBuilderReceiver(builder);
        int nodeNr;
        int currentWidth = 0;
        if (offsets == null) {
            // no matches: just output the entire text
            if (width > str.length())
                {width = str.length();}
            nodeNr = builder.characters(str.substring(0, width));
            result.add(builder.getDocument().getNode(nodeNr));
            currentWidth += width;
        } else {
            // sort the offsets
            FastQSort.sort(offsets, 0, offsets.size() - 1);
            
            int nextOffset = 0;
            int pos = 0;
            int lastNodeNr = -1;
            
            // prepare array for callback function arguments
            final Sequence params[] = new Sequence[callback.getSignature().getArgumentCount()];
            params[1] = firstProxy;
            params[2] = extraArgs;
            
            // handle the first match: if the text to the left of the match
            // is larger than half of the width, truncate it. 
            if (str.length() > width) {
                final Match.Offset firstMatch = offsets.get(nextOffset++);
                if (firstMatch.getOffset() > 0) {
                    int leftWidth = (width - firstMatch.getLength()) / 2;
                    if (firstMatch.getOffset() > leftWidth) {
                        pos = truncateStart(str, firstMatch.getOffset() - leftWidth, firstMatch.getOffset());
                        leftWidth = firstMatch.getOffset() - pos;
                    } else
                        {leftWidth = firstMatch.getOffset();}
                    nodeNr = builder.characters(str.substring(pos, pos + leftWidth));
                    // adjacent chunks of text will be merged into one text node. we may
                    // thus get duplicate nodes here. check the nodeNr to avoid adding
                    // the same node twice.
                    if (lastNodeNr != nodeNr)
                      {result.add(builder.getDocument().getNode(nodeNr));}
                    lastNodeNr = nodeNr;
                    currentWidth += leftWidth;
                    pos += leftWidth;
                }
    
                // put the matching term into argument 0 of the callback function
                params[0] = new StringValue(str.substring(firstMatch.getOffset(), firstMatch.getOffset() + firstMatch.getLength()));
                // if the callback function accepts 4 arguments, the last argument should contain additional
                // information on the match:
                if (callback.getSignature().getArgumentCount() == 4) {
                  params[3] = new ValueSequence();
                  params[3].add(new IntegerValue(nextOffset - 1));
                  params[3].add(new IntegerValue(firstMatch.getOffset()));
                  params[3].add(new IntegerValue(firstMatch.getLength()));
                }
                // now execute the callback func.
                final Sequence callbackResult = callback.evalFunction(null, null, params);
                // iterate through the result of the callback
                for (final SequenceIterator iter = callbackResult.iterate(); iter.hasNext(); ) {
                  final Item next = iter.nextItem();
                  if (Type.subTypeOf(next.getType(), Type.NODE)) {
                    nodeNr = builder.getDocument().getLastNode();
                    try {
              next.copyTo(context.getBroker(), receiver);
              result.add(builder.getDocument().getNode(++nodeNr));
              lastNodeNr = nodeNr;
            } catch (final SAXException e) {
              throw new XPathException(this, "Internal error while copying nodes: " + e.getMessage(), e);
            }
                  }
                }
                currentWidth += firstMatch.getLength();
                pos += firstMatch.getLength();
            } else
                {width = str.length();}
            
            // output the rest of the text and matches
            Match.Offset offset;
            for (int i = nextOffset; i < offsets.size() && currentWidth < width; i++) {
                offset = offsets.get(i);
                if (offset.getOffset() > pos) {
                    int len = offset.getOffset() - pos;
                    if (currentWidth + len > width)
                        {len = width - currentWidth;}
                    nodeNr = builder.characters(str.substring(pos, pos + len));
                    if (lastNodeNr != nodeNr)
                      {result.add(builder.getDocument().getNode(nodeNr));}
                    currentWidth += len;
                    pos += len;
                }
                
                if (currentWidth + offset.getLength() < width) {
                  // put the matching term into argument 0 of the callback function
                    params[0] = new StringValue(str.substring(offset.getOffset(), offset.getOffset() + offset.getLength()));
                    // if the callback function accepts 4 arguments, the last argument should contain additional
                    // information on the match:
                    if (callback.getSignature().getArgumentCount() == 4) {
                      params[3] = new ValueSequence();
                      params[3].add(new IntegerValue(i));
                      params[3].add(new IntegerValue(offset.getOffset()));
                      params[3].add(new IntegerValue(offset.getLength()));
                    }
                    // execute the callback function
                    final Sequence callbackResult = callback.evalFunction(null, null, params);
                    for (final SequenceIterator iter = callbackResult.iterate(); iter.hasNext(); ) {
                      final Item next = iter.nextItem();
                      if (Type.subTypeOf(next.getType(), Type.NODE)) {
                        nodeNr = builder.getDocument().getLastNode();
                        try {
                  next.copyTo(context.getBroker(), receiver);
                  result.add(builder.getDocument().getNode(++nodeNr));
                  lastNodeNr = nodeNr;
                } catch (final SAXException e) {
                  throw new XPathException(this, "Internal error while copying nodes: " + e.getMessage(), e);
                }

View Full Code Here

0 1 2

TOP

Related Classes of org.exist.dom.Match$F

erjang.ESmall

org.exist.fulltext.FTMatchListener

org.exist.indexing.ngram.NGramIndexWorker

org.exist.indexing.ngram.NGramIndexWorker$NGramMatchListener

org.exist.indexing.ngram.NGramIndexWorker$SearchCallback

org.exist.storage.serializers.NativeSerializer

org.exist.xquery.functions.fn.ExtPhrase

org.exist.xquery.functions.text.HighlightMatches

org.exist.xquery.functions.text.KWICDisplay

org.exist.xquery.functions.text.MatchCount

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.