Package be.bagofwords.db.data

Source Code of be.bagofwords.db.data.CompactIndex

package be.bagofwords.db.data;

import be.bagofwords.db.bloomfilter.LongBloomFilter;
import org.codehaus.jackson.annotate.JsonIgnore;

import java.util.ArrayList;
import java.util.List;

public class CompactIndex {

    public static final double FPP = 0.01;

    private int numberOfCounts;
    private long numberOfFeatures;
    private List<Long> cachedKeys;
    private LongBloomFilter filterCounts;

    public CompactIndex(long numberOfFeatures, int numberOfCounts) {
        this.cachedKeys = new ArrayList<>();
        this.numberOfFeatures = numberOfFeatures;
        this.numberOfCounts = numberOfCounts;
    }

    public CompactIndex(long numberOfFeatures, LongBloomFilter filterCounts, int numberOfCounts) {
        this.numberOfFeatures = numberOfFeatures;
        this.filterCounts = filterCounts;
        this.numberOfCounts = numberOfCounts;
    }

    public CompactIndex() {

    }

    @JsonIgnore
    public boolean isSparse() {
        return filterCounts == null;
    }

    public void addKey(long key) {
        numberOfCounts++;
        if (isSparse()) {
            cachedKeys.add(key);
        } else {
            filterCounts.put(key);
        }
    }

    public long getNumberOfFeatures() {
        return numberOfFeatures;
    }

    public void setNumberOfFeatures(long numberOfFeatures) {
        this.numberOfFeatures = numberOfFeatures;
    }

    public List<Long> getCachedKeys() {
        return cachedKeys;
    }

    public void setCachedKeys(List<Long> cachedKeys) {
        this.cachedKeys = cachedKeys;
    }

    public LongBloomFilter getFilterCounts() {
        return filterCounts;
    }

    public void setFilterCounts(LongBloomFilter filterCounts) {
        this.filterCounts = filterCounts;
    }

    public int getNumberOfCounts() {
        return numberOfCounts;
    }

    public void setNumberOfCounts(int numberOfCounts) {
        this.numberOfCounts = numberOfCounts;
    }

    public boolean mightContain(long key) {
        if (isSparse()) {
            return cachedKeys.contains(key);
        } else {
            return filterCounts.mightContain(key);
        }
    }

    public void makeNonSparse() {
        this.filterCounts = new LongBloomFilter(numberOfFeatures, FPP);
        for (Long key : cachedKeys) {
            filterCounts.put(key);
        }
        this.cachedKeys = null;
    }

    public CompactIndex mergeWith(CompactIndex second) {
        CompactIndex result;
        long maxSizeForSparse = this.getNumberOfFeatures() / 10;
        boolean makeSparse = this.isSparse() && second.isSparse() && this.getCachedKeys().size() + second.getCachedKeys().size() < maxSizeForSparse;
        if (makeSparse) {
            result = new CompactIndex(this.getNumberOfFeatures(), this.getNumberOfCounts() + second.getNumberOfCounts());
            result.setCachedKeys(new ArrayList<>(this.getCachedKeys()));
            for (Long key : second.getCachedKeys()) {
                result.addKey(key);
            }
        } else {
            LongBloomFilter mergedCounts;
            if (!this.isSparse() && !second.isSparse()) {
                mergedCounts = ApproximateCountsUtils.mergeBloomFilters(this.getFilterCounts(), second.getFilterCounts());
            } else {
                if (!this.isSparse()) {
                    mergedCounts = this.getFilterCounts().clone();
                } else if (!second.isSparse()) {
                    mergedCounts = second.getFilterCounts().clone();
                } else {
                    //Both sparse
                    mergedCounts = new LongBloomFilter(this.getNumberOfFeatures(), CompactIndex.FPP);
                }
            }
            if (this.isSparse()) {
                for (Long key : this.getCachedKeys()) {
                    mergedCounts.put(key);
                }
            }
            if (second.isSparse()) {
                for (Long key : second.getCachedKeys()) {
                    mergedCounts.put(key);
                }
            }
            result = new CompactIndex(this.getNumberOfFeatures(), mergedCounts, this.getNumberOfCounts() + second.getNumberOfCounts());
        }
        return result;
    }

}
TOP

Related Classes of be.bagofwords.db.data.CompactIndex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.