Package org.elasticsearch.index.fielddata.ordinals

Examples of org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder


        // TODO: how can we guess the number of terms? numerics end up creating more terms per value...
        FloatArray values = BigArrays.NON_RECYCLING_INSTANCE.newFloatArray(128);

        final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
        boolean success = false;
        try (OrdinalsBuilder builder = new OrdinalsBuilder(reader.maxDoc(), acceptableTransientOverheadRatio)) {
            BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
            BytesRef term;
            long numTerms = 0;
            while ((term = iter.next()) != null) {
                values = BigArrays.NON_RECYCLING_INSTANCE.grow(values, numTerms + 1);
                values.set(numTerms++, NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(term)));
            }
            values = BigArrays.NON_RECYCLING_INSTANCE.resize(values, numTerms);
            final FloatArray finalValues = values;
            final Ordinals build = builder.build(fieldDataType.getSettings());
            RandomAccessOrds ordinals = build.ordinals();
            if (FieldData.isMultiValued(ordinals) || CommonSettings.getMemoryStorageHint(fieldDataType) == CommonSettings.MemoryStorageFormat.ORDINALS) {
                final long ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed();
                data = new AtomicDoubleFieldData(ramBytesUsed) {

                    @Override
                    public SortedNumericDoubleValues getDoubleValues() {
                        return withOrdinals(build, finalValues, reader.maxDoc());
                    }
                   
                    @Override
                    public Iterable<? extends Accountable> getChildResources() {
                        List<Accountable> resources = new ArrayList<>();
                        resources.add(Accountables.namedAccountable("ordinals", build));
                        resources.add(Accountables.namedAccountable("values", finalValues));
                        return Collections.unmodifiableList(resources);
                    }

                };
            } else {
                final BitSet set = builder.buildDocsWithValuesSet();

                // there's sweet spot where due to low unique value count, using ordinals will consume less memory
                long singleValuesArraySize = reader.maxDoc() * RamUsageEstimator.NUM_BYTES_FLOAT + (set == null ? 0 : set.ramBytesUsed());
                long uniqueValuesArraySize = values.ramBytesUsed();
                long ordinalsSize = build.ramBytesUsed();
View Full Code Here


        final int pageSize = Integer.highestOneBit(BigArrays.PAGE_SIZE_IN_BYTES * 8 / encoding.numBitsPerCoordinate() - 1) << 1;
        PagedMutable lat = new PagedMutable(initialSize, pageSize, encoding.numBitsPerCoordinate(), PackedInts.COMPACT);
        PagedMutable lon = new PagedMutable(initialSize, pageSize, encoding.numBitsPerCoordinate(), PackedInts.COMPACT);
        final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
        boolean success = false;
        try (OrdinalsBuilder builder = new OrdinalsBuilder(terms.size(), reader.maxDoc(), acceptableTransientOverheadRatio)) {
            final GeoPointEnum iter = new GeoPointEnum(builder.buildFromTerms(terms.iterator(null)));
            GeoPoint point;
            while ((point = iter.next()) != null) {
                final long ord = builder.currentOrdinal();
                if (lat.size() <= ord) {
                    final long newSize = BigArrays.overSize(ord + 1);
                    lat = lat.resize(newSize);
                    lon = lon.resize(newSize);
                }
                lat.set(ord, encoding.encodeCoordinate(point.getLat()));
                lon.set(ord, encoding.encodeCoordinate(point.getLon()));
            }

            Ordinals build = builder.build(fieldDataType.getSettings());
            RandomAccessOrds ordinals = build.ordinals();
            if (FieldData.isMultiValued(ordinals) || CommonSettings.getMemoryStorageHint(fieldDataType) == CommonSettings.MemoryStorageFormat.ORDINALS) {
                if (lat.size() != ordinals.getValueCount()) {
                    lat = lat.resize(ordinals.getValueCount());
                    lon = lon.resize(ordinals.getValueCount());
                }
                data = new GeoPointCompressedAtomicFieldData.WithOrdinals(encoding, lon, lat, build, reader.maxDoc());
            } else {
                int maxDoc = reader.maxDoc();
                PagedMutable sLat = new PagedMutable(reader.maxDoc(), pageSize, encoding.numBitsPerCoordinate(), PackedInts.COMPACT);
                PagedMutable sLon = new PagedMutable(reader.maxDoc(), pageSize, encoding.numBitsPerCoordinate(), PackedInts.COMPACT);
                final long missing = encoding.encodeCoordinate(0);
                for (int i = 0; i < maxDoc; i++) {
                    ordinals.setDocument(i);
                    final long nativeOrdinal = ordinals.nextOrd();
                    if (nativeOrdinal >= 0) {
                        sLat.set(i, lat.get(nativeOrdinal));
                        sLon.set(i, lon.get(nativeOrdinal));
                    } else {
                        sLat.set(i, missing);
                        sLon.set(i, missing);
                    }
                }
                BitSet set = builder.buildDocsWithValuesSet();
                data = new GeoPointCompressedAtomicFieldData.Single(encoding, sLon, sLat, set);
            }
            success = true;
            return data;
        } finally {
View Full Code Here

        }
        DoubleArray lat = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(128);
        DoubleArray lon = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(128);
        final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
        boolean success = false;
        try (OrdinalsBuilder builder = new OrdinalsBuilder(terms.size(), reader.maxDoc(), acceptableTransientOverheadRatio)) {
            final GeoPointEnum iter = new GeoPointEnum(builder.buildFromTerms(terms.iterator(null)));
            GeoPoint point;
            long numTerms = 0;
            while ((point = iter.next()) != null) {
                lat = BigArrays.NON_RECYCLING_INSTANCE.resize(lat, numTerms + 1);
                lon = BigArrays.NON_RECYCLING_INSTANCE.resize(lon, numTerms + 1);
                lat.set(numTerms, point.getLat());
                lon.set(numTerms, point.getLon());
                ++numTerms;
            }
            lat = BigArrays.NON_RECYCLING_INSTANCE.resize(lat, numTerms);
            lon = BigArrays.NON_RECYCLING_INSTANCE.resize(lon, numTerms);

            Ordinals build = builder.build(fieldDataType.getSettings());
            RandomAccessOrds ordinals = build.ordinals();
            if (!(FieldData.isMultiValued(ordinals) || CommonSettings.getMemoryStorageHint(fieldDataType) == CommonSettings.MemoryStorageFormat.ORDINALS)) {
                int maxDoc = reader.maxDoc();
                DoubleArray sLat = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(reader.maxDoc());
                DoubleArray sLon = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(reader.maxDoc());
                for (int i = 0; i < maxDoc; i++) {
                    ordinals.setDocument(i);
                    long nativeOrdinal = ordinals.nextOrd();
                    if (nativeOrdinal != RandomAccessOrds.NO_MORE_ORDS) {
                        sLat.set(i, lat.get(nativeOrdinal));
                        sLon.set(i, lon.get(nativeOrdinal));
                    }
                }
                BitSet set = builder.buildDocsWithValuesSet();
                data = new GeoPointDoubleArrayAtomicFieldData.Single(sLon, sLat, set);
            } else {
                data = new GeoPointDoubleArrayAtomicFieldData.WithOrdinals(lon, lat, build, reader.maxDoc());
            }
            success = true;
View Full Code Here

        // TODO: how can we guess the number of terms? numerics end up creating more terms per value...
        DoubleArray values = BigArrays.NON_RECYCLING_INSTANCE.newDoubleArray(128);

        final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
        boolean success = false;
        try (OrdinalsBuilder builder = new OrdinalsBuilder(reader.maxDoc(), acceptableTransientOverheadRatio)) {
            final BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
            BytesRef term;
            long numTerms = 0;
            while ((term = iter.next()) != null) {
                values = BigArrays.NON_RECYCLING_INSTANCE.grow(values, numTerms + 1);
                values.set(numTerms++, NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(term)));
            }
            values = BigArrays.NON_RECYCLING_INSTANCE.resize(values, numTerms);
            final DoubleArray finalValues = values;
            final Ordinals build = builder.build(fieldDataType.getSettings());
            RandomAccessOrds ordinals = build.ordinals();
            if (FieldData.isMultiValued(ordinals) || CommonSettings.getMemoryStorageHint(fieldDataType) == CommonSettings.MemoryStorageFormat.ORDINALS) {
                final long ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed();
                data = new AtomicDoubleFieldData(ramBytesUsed) {

                    @Override
                    public SortedNumericDoubleValues getDoubleValues() {
                        return withOrdinals(build, finalValues, reader.maxDoc());
                    }
                   
                    @Override
                    public Iterable<? extends Accountable> getChildResources() {
                        List<Accountable> resources = new ArrayList<>();
                        resources.add(Accountables.namedAccountable("ordinals", build));
                        resources.add(Accountables.namedAccountable("values", finalValues));
                        return Collections.unmodifiableList(resources);
                    }

                };
            } else {
                final BitSet set = builder.buildDocsWithValuesSet();

                // there's sweet spot where due to low unique value count, using ordinals will consume less memory
                long singleValuesArraySize = reader.maxDoc() * RamUsageEstimator.NUM_BYTES_DOUBLE + (set == null ? 0 : set.ramBytesUsed());
                long uniqueValuesArraySize = values.ramBytesUsed();
                long ordinalsSize = build.ramBytesUsed();
View Full Code Here

        } else {
            numTerms = -1;
        }
        final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
        boolean success = false;
        try (OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio)) {

            // we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support
            // empty strings twice. ie. them merge fails for long output.
            TermsEnum termsEnum = filter(terms, reader);
            DocsEnum docsEnum = null;
            for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
                final long termOrd = builder.nextOrdinal();
                fstBuilder.add(Util.toIntsRef(term, scratch), (long) termOrd);
                docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
                for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
                    builder.addDoc(docId);
                }
            }

            FST<Long> fst = fstBuilder.finish();

            final Ordinals ordinals = builder.build(fieldDataType.getSettings());

            data = new FSTBytesAtomicFieldData(fst, ordinals);
            success = true;
            return data;
        } finally {
View Full Code Here

        final OrdinalsBuilder builder;

        TypeBuilder(float acceptableTransientOverheadRatio, LeafReader reader) throws IOException {
            bytes = new PagedBytes(15);
            termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
            builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
        }
View Full Code Here

        final PackedLongValues.Builder valuesBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);

        final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
        TermsEnum termsEnum = estimator.beforeLoad(terms);
        boolean success = false;
        try (OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio)) {
            BytesRefIterator iter = builder.buildFromTerms(termsEnum);
            BytesRef term;
            assert !getNumericType().isFloatingPoint();
            final boolean indexedAsLong = getNumericType().requiredBits() > 32;
            while ((term = iter.next()) != null) {
                final long value = indexedAsLong
                        ? NumericUtils.prefixCodedToLong(term)
                        : NumericUtils.prefixCodedToInt(term);
                valuesBuilder.add(value);
            }
            final PackedLongValues values = valuesBuilder.build();
            final Ordinals build = builder.build(fieldDataType.getSettings());
            CommonSettings.MemoryStorageFormat formatHint = CommonSettings.getMemoryStorageHint(fieldDataType);

            RandomAccessOrds ordinals = build.ordinals();
            if (FieldData.isMultiValued(ordinals) || formatHint == CommonSettings.MemoryStorageFormat.ORDINALS) {
                final long ramBytesUsed = build.ramBytesUsed() + values.ramBytesUsed();
                data = new AtomicLongFieldData(ramBytesUsed) {

                    @Override
                    public SortedNumericDocValues getLongValues() {
                        return withOrdinals(build, values, reader.maxDoc());
                    }

                    @Override
                    public Iterable<? extends Accountable> getChildResources() {
                        List<Accountable> resources = new ArrayList<>();
                        resources.add(Accountables.namedAccountable("ordinals", build));
                        resources.add(Accountables.namedAccountable("values", values));
                        return Collections.unmodifiableList(resources);
                    }
                };
            } else {
                final BitSet docsWithValues = builder.buildDocsWithValuesSet();

                long minV, maxV;
                minV = maxV = 0;
                if (values.size() > 0) {
                    minV = values.get(0);
View Full Code Here

        // per-term

        TermsEnum termsEnum = estimator.beforeLoad(terms);
        boolean success = false;

        try (OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio)) {
            DocsEnum docsEnum = null;
            for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
                final long termOrd = builder.nextOrdinal();
                assert termOrd == termOrdToBytesOffset.size();
                termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
                docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
                for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
                    builder.addDoc(docId);
                }
            }
            PagedBytes.Reader bytesReader = bytes.freeze(true);
            final Ordinals ordinals = builder.build(fieldDataType.getSettings());

            data = new PagedBytesAtomicFieldData(bytesReader, termOrdToBytesOffset.build(), ordinals);
            success = true;
            return data;
        } finally {
View Full Code Here

TOP

Related Classes of org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.