/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.serde;
import com.facebook.presto.block.Block;
import com.facebook.presto.tuple.Tuple;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.io.OutputSupplier;
import io.airlift.slice.OutputStreamSliceOutput;
import io.airlift.slice.SliceOutput;
import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import static com.facebook.presto.block.BlockUtils.toTupleIterable;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
public class BlocksFileWriter
implements Closeable
{
public static void writeBlocks(BlocksFileEncoding encoding, OutputSupplier<? extends OutputStream> sliceOutput, Block... blocks)
{
writeBlocks(encoding, sliceOutput, ImmutableList.copyOf(blocks));
}
public static void writeBlocks(BlocksFileEncoding encoding, OutputSupplier<? extends OutputStream> sliceOutput, Iterable<? extends Block> blocks)
{
writeBlocks(encoding, sliceOutput, blocks.iterator());
}
public static void writeBlocks(BlocksFileEncoding encoding, OutputSupplier<? extends OutputStream> sliceOutput, Iterator<? extends Block> blocks)
{
checkNotNull(sliceOutput, "sliceOutput is null");
BlocksFileWriter fileWriter = new BlocksFileWriter(encoding, sliceOutput);
while (blocks.hasNext()) {
fileWriter.append(toTupleIterable(blocks.next()));
}
fileWriter.close();
}
private final BlocksFileEncoding encoding;
private final OutputSupplier<? extends OutputStream> outputSupplier;
private final StatsBuilder statsBuilder = new StatsBuilder();
private Encoder encoder;
private SliceOutput sliceOutput;
private boolean closed;
public BlocksFileWriter(BlocksFileEncoding encoding, OutputSupplier<? extends OutputStream> outputSupplier)
{
checkNotNull(encoding, "encoding is null");
checkNotNull(outputSupplier, "outputSupplier is null");
this.encoding = encoding;
this.outputSupplier = outputSupplier;
}
public BlocksFileWriter append(Iterable<Tuple> tuples)
{
Preconditions.checkNotNull(tuples, "tuples is null");
if (!Iterables.isEmpty(tuples)) {
if (encoder == null) {
open();
}
statsBuilder.process(tuples);
encoder.append(tuples);
}
return this;
}
private void open()
{
try {
OutputStream outputStream = outputSupplier.getOutput();
if (outputStream instanceof SliceOutput) {
sliceOutput = (SliceOutput) outputStream;
}
else {
sliceOutput = new OutputStreamSliceOutput(outputStream);
}
encoder = encoding.createBlocksWriter(sliceOutput);
}
catch (IOException e) {
throw Throwables.propagate(e);
}
}
public void close()
{
if (!closed && encoder != null) {
BlockEncoding blockEncoding = encoder.finish();
int startingIndex = sliceOutput.size();
// write file encoding
BlockEncodings.writeBlockEncoding(sliceOutput, blockEncoding);
// write stats
BlocksFileStats.serialize(statsBuilder.build(), sliceOutput);
// write footer size
int footerSize = sliceOutput.size() - startingIndex;
checkState(footerSize > 0);
sliceOutput.writeInt(footerSize);
try {
sliceOutput.close();
}
catch (IOException e) {
throw Throwables.propagate(e);
}
closed = true;
}
}
private static class StatsBuilder
{
private static final int MAX_UNIQUE_COUNT = 1000;
private long rowCount;
private long runsCount;
private Tuple lastTuple;
private final Set<Tuple> set = new HashSet<>(MAX_UNIQUE_COUNT);
public void process(Iterable<Tuple> tuples)
{
Preconditions.checkNotNull(tuples, "tuples is null");
for (Tuple tuple : tuples) {
if (lastTuple == null) {
lastTuple = tuple;
if (set.size() < MAX_UNIQUE_COUNT) {
set.add(lastTuple);
}
}
else if (!tuple.equals(lastTuple)) {
runsCount++;
lastTuple = tuple;
if (set.size() < MAX_UNIQUE_COUNT) {
set.add(lastTuple);
}
}
rowCount++;
}
}
public BlocksFileStats build()
{
// TODO: expose a way to indicate whether the unique count is EXACT or APPROXIMATE
return new BlocksFileStats(rowCount, runsCount + 1, rowCount / (runsCount + 1), (set.size() == MAX_UNIQUE_COUNT) ? Integer.MAX_VALUE : set.size());
}
}
}