Package com.facebook.presto.serde

Source Code of com.facebook.presto.serde.BlocksFileWriter$StatsBuilder

/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.serde;

import com.facebook.presto.block.Block;
import com.facebook.presto.tuple.Tuple;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.io.OutputSupplier;
import io.airlift.slice.OutputStreamSliceOutput;
import io.airlift.slice.SliceOutput;

import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

import static com.facebook.presto.block.BlockUtils.toTupleIterable;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;

public class BlocksFileWriter
        implements Closeable
{
    public static void writeBlocks(BlocksFileEncoding encoding, OutputSupplier<? extends OutputStream> sliceOutput, Block... blocks)
    {
        writeBlocks(encoding, sliceOutput, ImmutableList.copyOf(blocks));
    }

    public static void writeBlocks(BlocksFileEncoding encoding, OutputSupplier<? extends OutputStream> sliceOutput, Iterable<? extends Block> blocks)
    {
        writeBlocks(encoding, sliceOutput, blocks.iterator());
    }

    public static void writeBlocks(BlocksFileEncoding encoding, OutputSupplier<? extends OutputStream> sliceOutput, Iterator<? extends Block> blocks)
    {
        checkNotNull(sliceOutput, "sliceOutput is null");
        BlocksFileWriter fileWriter = new BlocksFileWriter(encoding, sliceOutput);
        while (blocks.hasNext()) {
            fileWriter.append(toTupleIterable(blocks.next()));
        }
        fileWriter.close();
    }

    private final BlocksFileEncoding encoding;
    private final OutputSupplier<? extends OutputStream> outputSupplier;
    private final StatsBuilder statsBuilder = new StatsBuilder();
    private Encoder encoder;
    private SliceOutput sliceOutput;
    private boolean closed;

    public BlocksFileWriter(BlocksFileEncoding encoding, OutputSupplier<? extends OutputStream> outputSupplier)
    {
        checkNotNull(encoding, "encoding is null");
        checkNotNull(outputSupplier, "outputSupplier is null");

        this.encoding = encoding;
        this.outputSupplier = outputSupplier;
    }

    public BlocksFileWriter append(Iterable<Tuple> tuples)
    {
        Preconditions.checkNotNull(tuples, "tuples is null");
        if (!Iterables.isEmpty(tuples)) {
            if (encoder == null) {
                open();
            }
            statsBuilder.process(tuples);
            encoder.append(tuples);
        }
        return this;
    }

    private void open()
    {
        try {
            OutputStream outputStream = outputSupplier.getOutput();
            if (outputStream instanceof SliceOutput) {
                sliceOutput = (SliceOutput) outputStream;
            }
            else {
                sliceOutput = new OutputStreamSliceOutput(outputStream);
            }
            encoder = encoding.createBlocksWriter(sliceOutput);
        }
        catch (IOException e) {
            throw Throwables.propagate(e);
        }
    }

    public void close()
    {
        if (!closed && encoder != null) {
            BlockEncoding blockEncoding = encoder.finish();

            int startingIndex = sliceOutput.size();

            // write file encoding
            BlockEncodings.writeBlockEncoding(sliceOutput, blockEncoding);

            // write stats
            BlocksFileStats.serialize(statsBuilder.build(), sliceOutput);

            // write footer size
            int footerSize = sliceOutput.size() - startingIndex;
            checkState(footerSize > 0);
            sliceOutput.writeInt(footerSize);

            try {
                sliceOutput.close();
            }
            catch (IOException e) {
                throw Throwables.propagate(e);
            }

            closed = true;
        }
    }

    private static class StatsBuilder
    {
        private static final int MAX_UNIQUE_COUNT = 1000;

        private long rowCount;
        private long runsCount;
        private Tuple lastTuple;
        private final Set<Tuple> set = new HashSet<>(MAX_UNIQUE_COUNT);

        public void process(Iterable<Tuple> tuples)
        {
            Preconditions.checkNotNull(tuples, "tuples is null");

            for (Tuple tuple : tuples) {
                if (lastTuple == null) {
                    lastTuple = tuple;
                    if (set.size() < MAX_UNIQUE_COUNT) {
                        set.add(lastTuple);
                    }
                }
                else if (!tuple.equals(lastTuple)) {
                    runsCount++;
                    lastTuple = tuple;
                    if (set.size() < MAX_UNIQUE_COUNT) {
                        set.add(lastTuple);
                    }
                }
                rowCount++;
            }
        }

        public BlocksFileStats build()
        {
            // TODO: expose a way to indicate whether the unique count is EXACT or APPROXIMATE
            return new BlocksFileStats(rowCount, runsCount + 1, rowCount / (runsCount + 1), (set.size() == MAX_UNIQUE_COUNT) ? Integer.MAX_VALUE : set.size());
        }
    }
}
TOP

Related Classes of com.facebook.presto.serde.BlocksFileWriter$StatsBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.