Package org.elasticsearch.search.lookup

Source Code of org.elasticsearch.search.lookup.IndexFieldTerm

/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.lookup;

import org.apache.lucene.index.*;
import org.apache.lucene.search.TermStatistics;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.lucene.search.EmptyScorer;

import java.io.IOException;
import java.util.Iterator;

/**
* Holds all information on a particular term in a field.
* */
public class IndexFieldTerm implements Iterable<TermPosition> {

    // The posting list for this term. Is null if the term or field does not
    // exist. Can be DocsEnum or DocsAndPositionsEnum.
    DocsEnum docsEnum;

    // Stores if positions, offsets and payloads are requested.
    private final int flags;

    private final String fieldName;

    private final String term;

    private final PositionIterator iterator;

    // for lucene calls
    private final Term identifier;

    private final TermStatistics termStats;

    static private EmptyScorer EMPTY_DOCS_ENUM = new EmptyScorer(null);

    // get the document frequency of the term
    public long df() throws IOException {
        return termStats.docFreq();
    }

    // get the total term frequency of the term, that is, how often does the
    // term appear in any document?
    public long ttf() throws IOException {
        return termStats.totalTermFreq();
    }

    // when the reader changes, we have to get the posting list for this term
    // and reader
    void setNextReader(LeafReader reader) {
        try {
            // Get the posting list for a specific term. Depending on the flags,
            // this
            // will either get a DocsEnum or a DocsAndPositionsEnum if
            // available.

            // get lucene frequency flag
            int luceneFrequencyFlag = getLuceneFrequencyFlag(flags);
            if (shouldRetrieveFrequenciesOnly()) {
                docsEnum = getOnlyDocsEnum(luceneFrequencyFlag, reader);
            } else {
                int lucenePositionsFlags = getLucenePositionsFlags(flags);
                docsEnum = getDocsAndPosEnum(lucenePositionsFlags, reader);
                if (docsEnum == null) {// no pos available
                    docsEnum = getOnlyDocsEnum(luceneFrequencyFlag, reader);
                }
            }
        } catch (IOException e) {
            throw new ElasticsearchException("Unable to get posting list for field " + fieldName + " and term " + term, e);
        }

    }

    private boolean shouldRetrieveFrequenciesOnly() {
        return (flags & ~IndexLookup.FLAG_FREQUENCIES) == 0;
    }

    private int getLuceneFrequencyFlag(int flags) {
        return (flags & IndexLookup.FLAG_FREQUENCIES) > 0 ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE;
    }

    private int getLucenePositionsFlags(int flags) {
        int lucenePositionsFlags = (flags & IndexLookup.FLAG_PAYLOADS) > 0 ? DocsAndPositionsEnum.FLAG_PAYLOADS : 0x0;
        lucenePositionsFlags |= (flags & IndexLookup.FLAG_OFFSETS) > 0 ? DocsAndPositionsEnum.FLAG_OFFSETS : 0x0;
        return lucenePositionsFlags;
    }

    // get the DocsAndPositionsEnum from the reader.
    private DocsEnum getDocsAndPosEnum(int luceneFlags, LeafReader reader) throws IOException {
        assert identifier.field() != null;
        assert identifier.bytes() != null;
        final Fields fields = reader.fields();
        DocsEnum newDocsEnum = null;
        if (fields != null) {
            final Terms terms = fields.terms(identifier.field());
            if (terms != null) {
                if (terms.hasPositions()) {
                    final TermsEnum termsEnum = terms.iterator(null);
                    if (termsEnum.seekExact(identifier.bytes())) {
                        newDocsEnum = termsEnum.docsAndPositions(reader.getLiveDocs(),
                                docsEnum instanceof DocsAndPositionsEnum ? (DocsAndPositionsEnum) docsEnum : null, luceneFlags);
                    }
                }
            }
        }
        return newDocsEnum;
    }

    // get the DocsEnum from the reader.
    private DocsEnum getOnlyDocsEnum(int luceneFlags, LeafReader reader) throws IOException {
        assert identifier.field() != null;
        assert identifier.bytes() != null;
        final Fields fields = reader.fields();
        DocsEnum newDocsEnum = null;
        if (fields != null) {
            final Terms terms = fields.terms(identifier.field());
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator(null);
                if (termsEnum.seekExact(identifier.bytes())) {
                    newDocsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, luceneFlags);
                }
            }
        }
        if (newDocsEnum == null) {
            newDocsEnum = EMPTY_DOCS_ENUM;
        }
        return newDocsEnum;
    }

    private int freq = 0;

    public void setNextDoc(int docId) {
        assert (docsEnum != null);
        try {
            // we try to advance to the current document.
            int currentDocPos = docsEnum.docID();
            if (currentDocPos < docId) {
                currentDocPos = docsEnum.advance(docId);
            }
            if (currentDocPos == docId) {
                freq = docsEnum.freq();
            } else {
                freq = 0;
            }
            iterator.nextDoc();
        } catch (IOException e) {
            throw new ElasticsearchException("While trying to initialize term positions in IndexFieldTerm.setNextDoc() ", e);
        }
    }

    public IndexFieldTerm(String term, String fieldName, IndexLookup indexLookup, int flags) {
        assert fieldName != null;
        this.fieldName = fieldName;
        assert term != null;
        this.term = term;
        assert indexLookup != null;
        identifier = new Term(fieldName, (String) term);
        this.flags = flags;
        boolean doRecord = ((flags & IndexLookup.FLAG_CACHE) > 0);
        if (withPositions()) {
            if (!doRecord) {
                iterator = new PositionIterator(this);
            } else {
                iterator = new CachedPositionIterator(this);
            }
        } else {
            iterator = new PositionIterator(this);
        }
        setNextReader(indexLookup.getReader());
        setNextDoc(indexLookup.getDocId());
        try {
            termStats = indexLookup.getIndexSearcher().termStatistics(identifier,
                    TermContext.build(indexLookup.getReaderContext(), identifier));
        } catch (IOException e) {
            throw new ElasticsearchException("Cannot get term statistics: ", e);
        }
    }

    private boolean withPositions() {
        return shouldRetrievePositions() || shouldRetrieveOffsets() || shouldRetrievePayloads();
    }

    protected boolean shouldRetrievePositions() {
        return (flags & IndexLookup.FLAG_POSITIONS) > 0;
    }

    protected boolean shouldRetrieveOffsets() {
        return (flags & IndexLookup.FLAG_OFFSETS) > 0;
    }

    protected boolean shouldRetrievePayloads() {
        return (flags & IndexLookup.FLAG_PAYLOADS) > 0;
    }

    public int tf() throws IOException {
        return freq;
    }

    @Override
    public Iterator<TermPosition> iterator() {
        return iterator.reset();
    }

    /*
     * A user might decide inside a script to call get with _POSITIONS and then
     * a second time with _PAYLOADS. If the positions were recorded but the
     * payloads were not, the user will not have access to them. Therfore, throw
     * exception here explaining how to call get().
     */
    public void validateFlags(int flags2) {
        if ((this.flags & flags2) < flags2) {
            throw new ElasticsearchException("You must call get with all required flags! Instead of " + getCalledStatement(flags2)
                    + "call " + getCallStatement(flags2 | this.flags) + " once");
        }
    }

    private String getCalledStatement(int flags2) {
        String calledFlagsCall1 = getFlagsString(flags);
        String calledFlagsCall2 = getFlagsString(flags2);
        String callStatement1 = getCallStatement(calledFlagsCall1);
        String callStatement2 = getCallStatement(calledFlagsCall2);
        return " " + callStatement1 + " and " + callStatement2 + " ";
    }

    private String getCallStatement(String calledFlags) {
        return "_index['" + this.fieldName + "'].get('" + this.term + "', " + calledFlags + ")";
    }

    private String getFlagsString(int flags2) {
        String flagsString = null;
        if ((flags2 & IndexLookup.FLAG_FREQUENCIES) != 0) {
            flagsString = anddToFlagsString(flagsString, "_FREQUENCIES");
        }
        if ((flags2 & IndexLookup.FLAG_POSITIONS) != 0) {
            flagsString = anddToFlagsString(flagsString, "_POSITIONS");
        }
        if ((flags2 & IndexLookup.FLAG_OFFSETS) != 0) {
            flagsString = anddToFlagsString(flagsString, "_OFFSETS");
        }
        if ((flags2 & IndexLookup.FLAG_PAYLOADS) != 0) {
            flagsString = anddToFlagsString(flagsString, "_PAYLOADS");
        }
        if ((flags2 & IndexLookup.FLAG_CACHE) != 0) {
            flagsString = anddToFlagsString(flagsString, "_CACHE");
        }
        return flagsString;
    }

    private String anddToFlagsString(String flagsString, String flag) {
        if (flagsString != null) {
            flagsString += " | ";
        } else {
            flagsString = "";
        }
        flagsString += flag;
        return flagsString;
    }

    private String getCallStatement(int flags2) {
        String calledFlags = getFlagsString(flags2);
        String callStatement = getCallStatement(calledFlags);
        return " " + callStatement + " ";

    }

}
TOP

Related Classes of org.elasticsearch.search.lookup.IndexFieldTerm

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.