Package org.exist.dom

Examples of org.exist.dom.Match


                // the current string of text
                for (int i = 0; i < offsetStack.size(); i++) {
                    NodeOffset no = offsetStack.get(i);
                    int end = no.offset + seq.length();
                    // scan all matches
                    Match next = match;
                    while (next != null) {
                        if (next.getIndexId() == NGramIndex.ID && next.getNodeId().equals(no.nodeId)) {
                            int freq = next.getFrequency();
                            for (int j = 0; j < freq; j++) {
                                Match.Offset offset = next.getOffset(j);
                                if (offset.getOffset() < end &&
                                    offset.getOffset() + offset.getLength() > no.offset) {
                                    // add it to the list to be processed
                                    if (offsets == null) {
                                        offsets = new ArrayList<Match.Offset>(4);
                                    }
                                    // adjust the offset and add it to the list
                                    int start = offset.getOffset() - no.offset;
                                    int len = offset.getLength();
                                    if (start < 0) {
                                        len = len - Math.abs(start);
                                        start = 0;
                                    }
                                    if (start + len > seq.length())
                                        len = seq.length() - start;
                                    offsets.add(new Match.Offset(start, len));
                                }
                            }
                        }
                        next = next.getNextMatch();
                    }
                    // add the length of the current text to the element content length
                    no.offset = end;
                }
            }
View Full Code Here


        private void readMatches(String current, VariableByteInput is, NodeId nodeId, int freq, NodeProxy parentNode) throws IOException {
            int diff = 0;
            if (current.length() > ngram.length())
                diff = current.lastIndexOf(ngram);
            Match match = new NGramMatch(contextId, nodeId, ngram, freq);
            for (int n = 0; n < freq; n++) {
                int offset = is.readInt();
                if (diff > 0)
                    offset += diff;
                match.addOffset(offset, ngram.length());
            }
            parentNode.addMatch(match);
        }
View Full Code Here

    public FTMatchListener(DBBroker broker, NodeProxy proxy) {
        reset(broker, proxy);
    }

    public boolean hasMatches(NodeProxy proxy) {
        Match nextMatch = proxy.getMatches();
        while (nextMatch != null) {
            if (nextMatch.getIndexId() == FTIndex.ID) {
                return true;
            }
            nextMatch = nextMatch.getNextMatch();
        }
        return false;
    }
View Full Code Here

        * in the current node. For example, if the indexed node is &lt;a>abc&lt;b>de&lt;/b></a>
        * and we query for //a[text:ngram-contains(., 'de')]/b, proxy will be a &lt;b> node, but
        * the offsets of the matches are relative to the start of &lt;a>.
        */
        NodeSet ancestors = null;
        Match nextMatch = this.match;
        while (nextMatch != null) {
            if (proxy.getNodeId().isDescendantOf(nextMatch.getNodeId())) {
                if (ancestors == null)
                    {ancestors = new ExtArrayNodeSet();}
                ancestors.add(new NodeProxy(proxy.getDocument(), nextMatch.getNodeId()));
            }
            nextMatch = nextMatch.getNextMatch();
        }
        if (ancestors != null && !ancestors.isEmpty()) {
            for (final Iterator<NodeProxy> i = ancestors.iterator(); i.hasNext();) {
                final NodeProxy p = i.next();
                int startOffset = 0;
View Full Code Here

        }
    }

    @Override
    public void startElement(QName qname, AttrList attribs) throws SAXException {
        Match nextMatch = match;
        // check if there are any matches in the current element
        // if yes, push a NodeOffset object to the stack to track
        // the node contents
        while (nextMatch != null) {
            if (nextMatch.getNodeId().equals(getCurrentNode().getNodeId())) {
                if (offsetStack == null)
                    {offsetStack = new Stack<NodeOffset>();}
                offsetStack.push(new NodeOffset(nextMatch.getNodeId()));
                break;
            }
            nextMatch = nextMatch.getNextMatch();
        }
        super.startElement(qname, attribs);
    }
View Full Code Here

        super.startElement(qname, attribs);
    }

    @Override
    public void endElement(QName qname) throws SAXException {
        Match nextMatch = match;
        // check if we need to pop the stack
        while (nextMatch != null) {
            if (nextMatch.getNodeId().equals(getCurrentNode().getNodeId())) {
                offsetStack.pop();
                break;
            }
            nextMatch = nextMatch.getNextMatch();
        }
        super.endElement(qname);
    }
View Full Code Here

            // the current string of text
            for (int i = 0; i < offsetStack.size(); i++) {
                final NodeOffset no = offsetStack.get(i);
                int end = no.offset + seq.length();
                // scan all matches
                Match next = match;
                while (next != null) {
                    if (next.getIndexId() == FTIndex.ID && next.getNodeId().equals(no.nodeId)) {
                        final int freq = next.getFrequency();
                        for (int j = 0; j < freq; j++) {
                            final Match.Offset offset = next.getOffset(j);
                            if (offset.getOffset() < end &&
                                offset.getOffset() + offset.getLength() > no.offset) {
                                // add it to the list to be processed
                                if (offsets == null) {
                                    offsets = new ArrayList<Match.Offset>(4);
                                }
                                // adjust the offset and add it to the list
                                int start = offset.getOffset() - no.offset;
                                int len = offset.getLength();
                                if (start < 0) {
                                    len = len - Math.abs(start);
                                    start = 0;
                                }
                                if (start + len > seq.length())
                                    {len = seq.length() - start;}
                                offsets.add(new Match.Offset(start, len));
                            }
                        }
                    }
                    next = next.getNextMatch();
                }
                // add the length of the current text to the element content length
                no.offset = end;
            }
        }
        // walk through the matches a second time to find matches in the text node itself
        Match next = match;
        while (next != null) {
            if (next.getIndexId() == FTIndex.ID &&
                next.getNodeId().equals(getCurrentNode().getNodeId())) {
                if (offsets == null)
                    {offsets = new ArrayList<Match.Offset>();}
                final int freq = next.getFrequency();
                for (int i = 0; i < freq; i++) {
                    offsets.add(next.getOffset(i));
                }
            }
            next = next.getNextMatch();
        }
        // now print out the text, marking all matches with a match element
        if (offsets != null) {
            FastQSort.sort(offsets, 0, offsets.size() - 1);
            final String s = seq.toString();
View Full Code Here

        }
        //Iterate on results
        for (final NodeProxy current : result) {
            final Vector<NodeId> matchNodeIDs = new Vector<NodeId>();
            //Get first match
            Match nextMatch = current.getMatches();
            //Remove previously found matches on current
            current.setMatches(null);
            //Iterate on attach matches, with unicity of related nodeproxy gid
            String term;
            while(nextMatch != null) {
                final NodeId nodeId= nextMatch.getNodeId();
                //If current node id has not been previously processed
                if (!matchNodeIDs.contains(nodeId)) {
                    final NodeProxy mcurrent = new NodeProxy(current.getDocument(), nodeId);
                    Match match = null;
                    int firstOffset = -1;
                    matchNodeIDs.add(nodeId);
                    final String value = mcurrent.getNodeValue();
                    tok.setText(value);
                    int j = 0;
                    if (j < terms.length)
                        {term = terms[j];}
                    else
                        {break;}
                    int frequency = 0;
                    while ((token = tok.nextToken()) != null) {
                        final String word = token.getText().toLowerCase();
                        if (word.equalsIgnoreCase(term)) {
                            j++;
                            if (j == terms.length) {
                                //All terms found
                                if (match == null)
                                    {match = nextMatch.createInstance(getExpressionId(),
                                        nodeId, matchTerm);}
                                if (firstOffset < 0)
                                    {firstOffset = token.startOffset();}
                                match.addOffset(firstOffset, token.endOffset() - firstOffset);
                                frequency++;
                                //Start again on fist term
                                j = 0;
                                term = terms[j];
                                continue;
View Full Code Here

        //Walk through hits
        final ExtArrayNodeSet r = new ExtArrayNodeSet();
        final Tokenizer tok = context.getBroker().getTextEngine().getTokenizer();
        Matcher matcher;
        for (final NodeProxy current : result) {
            Match nextMatch;
            final Vector<NodeId> matchGid = new Vector<NodeId>();
            //Get first match
            nextMatch = current.getMatches();
            //Remove previously found matches on current
            current.setMatches(null);
            //Iterate on attach matches, with unicity of related nodeproxy gid
            while (nextMatch != null) {
                final Hashtable<String, Match> matchTable = new Hashtable<String, Match>();
                final NodeId nodeId = nextMatch.getNodeId();
                //If current node id has not been previously processed
                if (!matchGid.contains(nodeId)) {
                    final NodeProxy mcurrent = new NodeProxy(current.getDocument(), nodeId);
                    //Add it in node id array
                    matchGid.add(nodeId);
                    final String value = mcurrent.getNodeValue();
                    tok.setText(value);
                    int j = 0;
                    if (j < patterns.length) {
                        matcher = matchers[j];
                    } else
                        {break;}
                    String matchTerm = null;
                    TextToken token;
                    while ((token = tok.nextToken()) != null) {
                        String word = token.getText().toLowerCase();
                        matcher.reset(word);
                        matchers[0].reset(word);
                        if (matcher.matches()) {
                            j++;
                            if (matchTerm == null)
                                {matchTerm=word;}
                            else
                                {matchTerm = matchTerm + "\\W*" + word;}
                            if (j == patterns.length) {
                                //All terms found
                                if (matchTable.containsKey(matchTerm)) {
                                    //Previously found matchTerm
                                    final Match match = matchTable.get(matchTerm);
                                    match.addOffset(token.startOffset(), matchTerm.length());
                                } else {
                                    final Match match = nextMatch.createInstance(getExpressionId(),
                                        nodeId, matchTerm);
                                    match.addOffset(token.startOffset(), matchTerm.length());
                                    matchTable.put(matchTerm,match);
                                }
                                //Start again on fist term
                                j = 0;
                                matcher = matchers[j];
                                matchTerm = null;
                                continue;
                            } else {
                                matcher = matchers[j];
                            }
                        } else if (j > 0 && matchers[0].matches()) {
                            //First search term found: start again
                            j = 1;
                            //Pattern term = patterns[j];
                            matcher = matchers[j];
                            matchTerm = word;
                            continue;
                        } else {
                            //Reset
                            j = 0;
                            matcher = matchers[j];
                            matchTerm = null;
                            continue;
                        }
                    }
                    //One or more match found
                    if (matchTable.size() != 0) {
                        final Enumeration<Match> eMatch = matchTable.elements();
                        while (eMatch.hasMoreElements()){
                            final Match match = eMatch.nextElement();
                            current.addMatch(match);
                        }
                        //Add current to result
                        r.add(current);
                    }
View Full Code Here

            // remember the first node, we need it later
            if (firstProxy == null)
                {firstProxy = proxy;}
            final TextImpl text = (TextImpl) proxy.getNode();
           
            Match next = proxy.getMatches();
            while (next != null) {
                if (next.getNodeId().equals(text.getNodeId())) {
                    if (offsets == null)
                        {offsets = new ArrayList<Match.Offset>();}
                    final int freq = next.getFrequency();
                    for (int j = 0; j < freq; j++) {
                        // translate the relative offset into an absolute offset and add it to the list
                        final Match.Offset offset = next.getOffset(j);
                        offset.setOffset(str.length() + offset.getOffset());
                        offsets.add(offset);
                    }
                }
                next = next.getNextMatch();
            }
           
            // append the string value of the node to the buffer
            str.append(text.getData());
        }
       
        // Second step: output the text
        ValueSequence result = new ValueSequence();
        final DocumentBuilderReceiver receiver = new DocumentBuilderReceiver(builder);
        int nodeNr;
        int currentWidth = 0;
        if (offsets == null) {
            // no matches: just output the entire text
            if (width > str.length())
                {width = str.length();}
            nodeNr = builder.characters(str.substring(0, width));
            result.add(builder.getDocument().getNode(nodeNr));
            currentWidth += width;
        } else {
            // sort the offsets
            FastQSort.sort(offsets, 0, offsets.size() - 1);
           
            int nextOffset = 0;
            int pos = 0;
            int lastNodeNr = -1;
           
            // prepare array for callback function arguments
            final Sequence params[] = new Sequence[callback.getSignature().getArgumentCount()];
            params[1] = firstProxy;
            params[2] = extraArgs;
           
            // handle the first match: if the text to the left of the match
            // is larger than half of the width, truncate it.
            if (str.length() > width) {
                final Match.Offset firstMatch = offsets.get(nextOffset++);
                if (firstMatch.getOffset() > 0) {
                    int leftWidth = (width - firstMatch.getLength()) / 2;
                    if (firstMatch.getOffset() > leftWidth) {
                        pos = truncateStart(str, firstMatch.getOffset() - leftWidth, firstMatch.getOffset());
                        leftWidth = firstMatch.getOffset() - pos;
                    } else
                        {leftWidth = firstMatch.getOffset();}
                    nodeNr = builder.characters(str.substring(pos, pos + leftWidth));
                    // adjacent chunks of text will be merged into one text node. we may
                    // thus get duplicate nodes here. check the nodeNr to avoid adding
                    // the same node twice.
                    if (lastNodeNr != nodeNr)
                      {result.add(builder.getDocument().getNode(nodeNr));}
                    lastNodeNr = nodeNr;
                    currentWidth += leftWidth;
                    pos += leftWidth;
                }
   
                // put the matching term into argument 0 of the callback function
                params[0] = new StringValue(str.substring(firstMatch.getOffset(), firstMatch.getOffset() + firstMatch.getLength()));
                // if the callback function accepts 4 arguments, the last argument should contain additional
                // information on the match:
                if (callback.getSignature().getArgumentCount() == 4) {
                  params[3] = new ValueSequence();
                  params[3].add(new IntegerValue(nextOffset - 1));
                  params[3].add(new IntegerValue(firstMatch.getOffset()));
                  params[3].add(new IntegerValue(firstMatch.getLength()));
                }
                // now execute the callback func.
                final Sequence callbackResult = callback.evalFunction(null, null, params);
                // iterate through the result of the callback
                for (final SequenceIterator iter = callbackResult.iterate(); iter.hasNext(); ) {
                  final Item next = iter.nextItem();
                  if (Type.subTypeOf(next.getType(), Type.NODE)) {
                    nodeNr = builder.getDocument().getLastNode();
                    try {
              next.copyTo(context.getBroker(), receiver);
              result.add(builder.getDocument().getNode(++nodeNr));
              lastNodeNr = nodeNr;
            } catch (final SAXException e) {
              throw new XPathException(this, "Internal error while copying nodes: " + e.getMessage(), e);
            }
                  }
                }
                currentWidth += firstMatch.getLength();
                pos += firstMatch.getLength();
            } else
                {width = str.length();}
           
            // output the rest of the text and matches
            Match.Offset offset;
            for (int i = nextOffset; i < offsets.size() && currentWidth < width; i++) {
                offset = offsets.get(i);
                if (offset.getOffset() > pos) {
                    int len = offset.getOffset() - pos;
                    if (currentWidth + len > width)
                        {len = width - currentWidth;}
                    nodeNr = builder.characters(str.substring(pos, pos + len));
                    if (lastNodeNr != nodeNr)
                      {result.add(builder.getDocument().getNode(nodeNr));}
                    currentWidth += len;
                    pos += len;
                }
               
                if (currentWidth + offset.getLength() < width) {
                  // put the matching term into argument 0 of the callback function
                    params[0] = new StringValue(str.substring(offset.getOffset(), offset.getOffset() + offset.getLength()));
                    // if the callback function accepts 4 arguments, the last argument should contain additional
                    // information on the match:
                    if (callback.getSignature().getArgumentCount() == 4) {
                      params[3] = new ValueSequence();
                      params[3].add(new IntegerValue(i));
                      params[3].add(new IntegerValue(offset.getOffset()));
                      params[3].add(new IntegerValue(offset.getLength()));
                    }
                    // execute the callback function
                    final Sequence callbackResult = callback.evalFunction(null, null, params);
                    for (final SequenceIterator iter = callbackResult.iterate(); iter.hasNext(); ) {
                      final Item next = iter.nextItem();
                      if (Type.subTypeOf(next.getType(), Type.NODE)) {
                        nodeNr = builder.getDocument().getLastNode();
                        try {
                  next.copyTo(context.getBroker(), receiver);
                  result.add(builder.getDocument().getNode(++nodeNr));
                  lastNodeNr = nodeNr;
                } catch (final SAXException e) {
                  throw new XPathException(this, "Internal error while copying nodes: " + e.getMessage(), e);
                }
View Full Code Here

TOP

Related Classes of org.exist.dom.Match

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.