Package org.apache.lucene.index

Examples of org.apache.lucene.index.TermVectorOffsetInfo


        public void trim() {
            int end = startOffset + (mergeGap / 2);
            Iterator<TermVectorOffsetInfo> it = offsetInfosList.iterator();
            while (it.hasNext()) {
                TermVectorOffsetInfo tvoi = it.next();
                if (tvoi.getStartOffset() > end) {
                    it.remove();
                }
            }
        }
View Full Code Here


                    Iterator<TermVectorOffsetInfo> intervalIterator = intervalTermOffsetInfo
                            .iterator();

                    int index = 0;
                    while (intervalIterator.hasNext()) {
                        TermVectorOffsetInfo intervalOI = intervalIterator
                                .next();
                        if (index >= termoffsets.length) {
                            intervalIterator.remove();
                            continue;
                        }
                        boolean matchSearch = true;
                        boolean matchFound = false;
                        while (matchSearch) {
                            TermVectorOffsetInfo localOI = termoffsets[index];
                            // check interval match
                            // CJK languages will have the tokens from the PhraseQuery glued together (see LUCENE-2458)
                            int diff = localOI.getStartOffset()
                                    - intervalOI.getEndOffset();
                            // TODO we'll probably have to remove 'diff == 0'
                            // after upgrading to lucene 3.1
                            if (diff == 1 || diff == 0) {
                                intervalOI.setEndOffset(localOI.getEndOffset());
                                matchSearch = false;
                                matchFound = true;
                            }
                            index++;
                            if (index >= termoffsets.length) {
View Full Code Here

            sb.append(escape(new String(cbuf, skippedChars, cbuf.length
                    - skippedChars)));

            // iterate terms
            for (Iterator<TermVectorOffsetInfo> iter = fi.iterator(); iter.hasNext();) {
                TermVectorOffsetInfo ti = iter.next();
                nextStart = ti.getStartOffset();
                if (nextStart - pos > 0) {
                    cbuf = new char[nextStart - pos];
                    int charsRead = reader.read(cbuf, 0, nextStart - pos);
                    pos += (nextStart - pos);
                    sb.append(escape(new String(cbuf, 0, charsRead)));
                }
                sb.append(hlStart);
                nextStart = ti.getEndOffset();
                // print term
                cbuf = new char[nextStart - pos];
                reader.read(cbuf, 0, nextStart - pos);
                pos += (nextStart - pos);
                sb.append(escape(new String(cbuf)));
View Full Code Here

        public void trim() {
            int end = startOffset + (mergeGap / 2);
            Iterator<TermVectorOffsetInfo> it = offsetInfosList.iterator();
            while (it.hasNext()) {
                TermVectorOffsetInfo tvoi = it.next();
                if (tvoi.getStartOffset() > end) {
                    it.remove();
                }
            }
        }
View Full Code Here

                    Iterator<TermVectorOffsetInfo> intervalIterator = intervalTermOffsetInfo
                            .iterator();

                    int index = 0;
                    while (intervalIterator.hasNext()) {
                        TermVectorOffsetInfo intervalOI = intervalIterator
                                .next();
                        if (index >= termoffsets.length) {
                            intervalIterator.remove();
                            continue;
                        }
                        boolean matchSearch = true;
                        boolean matchFound = false;
                        while (matchSearch) {
                            TermVectorOffsetInfo localOI = termoffsets[index];
                            // check interval match
                            // CJK languages will have the tokens from the PhraseQuery glued together (see LUCENE-2458)
                            int diff = localOI.getStartOffset()
                                    - intervalOI.getEndOffset();
                            // TODO we'll probably have to remove 'diff == 0'
                            // after upgrading to lucene 3.1
                            if (diff == 1 || diff == 0) {
                                intervalOI.setEndOffset(localOI.getEndOffset());
                                matchSearch = false;
                                matchFound = true;
                            }
                            index++;
                            if (index >= termoffsets.length) {
View Full Code Here

            sb.append(escape(new String(cbuf, skippedChars, cbuf.length
                    - skippedChars)));

            // iterate terms
            for (Iterator<TermVectorOffsetInfo> iter = fi.iterator(); iter.hasNext();) {
                TermVectorOffsetInfo ti = iter.next();
                nextStart = ti.getStartOffset();
                if (nextStart - pos > 0) {
                    cbuf = new char[nextStart - pos];
                    int charsRead = reader.read(cbuf, 0, nextStart - pos);
                    pos += (nextStart - pos);
                    sb.append(escape(new String(cbuf, 0, charsRead)));
                }
                sb.append(hlStart);
                nextStart = ti.getEndOffset();
                // print term
                cbuf = new char[nextStart - pos];
                reader.read(cbuf, 0, nextStart - pos);
                pos += (nextStart - pos);
                sb.append(escape(new String(cbuf)));
View Full Code Here

        public void trim() {
            int end = startOffset + (mergeGap / 2);
            Iterator<TermVectorOffsetInfo> it = offsetInfosList.iterator();
            while (it.hasNext()) {
                TermVectorOffsetInfo tvoi = it.next();
                if (tvoi.getStartOffset() > end) {
                    it.remove();
                }
            }
        }
View Full Code Here

                } else {
                    TermVectorOffsetInfo[] tmp = info;
                    info = new TermVectorOffsetInfo[tmp.length + 1];
                    System.arraycopy(tmp, 0, info, 0, tmp.length);
                }
                info[info.length - 1] = new TermVectorOffsetInfo(
                    offset.startOffset(), offset.endOffset());
                termMap.put(termText, info);
            }
            ts.end();
            ts.close();
View Full Code Here

        while (it.hasNext()) {
            FragmentInfo fi = it.next();
            boolean overlap = false;
            Iterator<TermVectorOffsetInfo> fit = fi.iterator();
            while (fit.hasNext() && !overlap) {
                TermVectorOffsetInfo oi = fit.next();
                if (offsetInfos.containsKey(oi)) {
                    overlap = true;
                }
            }
            if (overlap) {
                it.remove();
            } else {
                Iterator<TermVectorOffsetInfo> oit = fi.iterator();
                while (oit.hasNext()) {
                    offsetInfos.put(oit.next(), null);
                }
            }
        }

        // create excerpts
        StringBuffer sb = new StringBuffer(excerptStart);
        it = infos.iterator();
        while (it.hasNext()) {
            FragmentInfo fi = it.next();
            sb.append(fragmentStart);
            int limit = Math.max(0, fi.getStartOffset() / 2 + fi.getEndOffset() / 2 - surround);
            int len = startFragment(sb, text, fi.getStartOffset(), limit);
            TermVectorOffsetInfo lastOffsetInfo = null;
            Iterator<TermVectorOffsetInfo> fIt = fi.iterator();
            while (fIt.hasNext()) {
                TermVectorOffsetInfo oi = fIt.next();
                if (lastOffsetInfo != null) {
                    // fill in text between terms
                    sb.append(escape(text.substring(
                            lastOffsetInfo.getEndOffset(), oi.getStartOffset())));
                }
                sb.append(hlStart);
                sb.append(escape(text.substring(oi.getStartOffset(),
                        oi.getEndOffset())));
                sb.append(hlEnd);
                lastOffsetInfo = oi;
            }
            limit = Math.min(text.length(), fi.getStartOffset() - len
                    + (surround * 2));
 
View Full Code Here

            sb.append(Text.encodeIllegalXMLCharacters(
                    new String(cbuf, skippedChars, cbuf.length - skippedChars)));

            // iterate terms
            for (Iterator iter = fi.iterator(); iter.hasNext();) {
                TermVectorOffsetInfo ti = (TermVectorOffsetInfo) iter.next();
                nextStart = ti.getStartOffset();
                if (nextStart - pos > 0) {
                    cbuf = new char[nextStart - pos];
                    int charsRead = reader.read(cbuf, 0, nextStart - pos);
                    pos += (nextStart - pos);
                    sb.append(cbuf, 0, charsRead);
                }
                sb.append(hlStart);
                nextStart = ti.getEndOffset();
                // print term
                cbuf = new char[nextStart - pos];
                reader.read(cbuf, 0, nextStart - pos);
                pos += (nextStart - pos);
                sb.append(cbuf);
View Full Code Here

TOP

Related Classes of org.apache.lucene.index.TermVectorOffsetInfo

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.