Package com.metamx.common.io.smoosh

Examples of com.metamx.common.io.smoosh.FileSmoosher


      }

      SmooshedFileMapper v8SmooshedFiles = Smoosh.map(v8Dir);

      v9Dir.mkdirs();
      final FileSmoosher v9Smoosher = new FileSmoosher(v9Dir);

      ByteStreams.write(Ints.toByteArray(9), Files.newOutputStreamSupplier(new File(v9Dir, "version.bin")));

      Map<String, GenericIndexed<ImmutableBitmap>> bitmapIndexes = Maps.newHashMap();
      final ByteBuffer invertedBuffer = v8SmooshedFiles.mapFile("inverted.drd");
      while (invertedBuffer.hasRemaining()) {
        final String dimName = serializerUtils.readString(invertedBuffer);
        bitmapIndexes.put(
            dimName,
            GenericIndexed.read(invertedBuffer, bitmapSerdeFactory.getObjectStrategy())
        );
      }

      Map<String, ImmutableRTree> spatialIndexes = Maps.newHashMap();
      final ByteBuffer spatialBuffer = v8SmooshedFiles.mapFile("spatial.drd");
      while (spatialBuffer != null && spatialBuffer.hasRemaining()) {
        spatialIndexes.put(
            serializerUtils.readString(spatialBuffer),
            ByteBufferSerializer.read(
                spatialBuffer, new IndexedRTree.ImmutableRTreeObjectStrategy(
                    bitmapSerdeFactory.getBitmapFactory()
                )
            )
        );
      }

      final LinkedHashSet<String> skippedFiles = Sets.newLinkedHashSet();
      final Set<String> skippedDimensions = Sets.newLinkedHashSet();
      for (String filename : v8SmooshedFiles.getInternalFilenames()) {
        log.info("Processing file[%s]", filename);
        if (filename.startsWith("dim_")) {
          final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();
          builder.setValueType(ValueType.STRING);

          final List<ByteBuffer> outParts = Lists.newArrayList();

          ByteBuffer dimBuffer = v8SmooshedFiles.mapFile(filename);
          String dimension = serializerUtils.readString(dimBuffer);
          if (!filename.equals(String.format("dim_%s.drd", dimension))) {
            throw new ISE("loaded dimension[%s] from file[%s]", dimension, filename);
          }

          ByteArrayOutputStream nameBAOS = new ByteArrayOutputStream();
          serializerUtils.writeString(nameBAOS, dimension);
          outParts.add(ByteBuffer.wrap(nameBAOS.toByteArray()));

          GenericIndexed<String> dictionary = GenericIndexed.read(
              dimBuffer, GenericIndexed.stringStrategy
          );

          if (dictionary.size() == 0) {
            log.info("Dimension[%s] had cardinality 0, equivalent to no column, so skipping.", dimension);
            skippedDimensions.add(dimension);
            continue;
          }

          VSizeIndexedInts singleValCol = null;
          VSizeIndexed multiValCol = VSizeIndexed.readFromByteBuffer(dimBuffer.asReadOnlyBuffer());
          GenericIndexed<ImmutableBitmap> bitmaps = bitmapIndexes.get(dimension);
          ImmutableRTree spatialIndex = spatialIndexes.get(dimension);

          final BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory();
          boolean onlyOneValue = true;
          MutableBitmap nullsSet = null;
          for (int i = 0; i < multiValCol.size(); ++i) {
            VSizeIndexedInts rowValue = multiValCol.get(i);
            if (!onlyOneValue) {
              break;
            }
            if (rowValue.size() > 1) {
              onlyOneValue = false;
            }
            if (rowValue.size() == 0) {
              if (nullsSet == null) {
                nullsSet = bitmapFactory.makeEmptyMutableBitmap();
              }
              nullsSet.add(i);
            }
          }

          if (onlyOneValue) {
            log.info("Dimension[%s] is single value, converting...", dimension);
            final boolean bumpedDictionary;
            if (nullsSet != null) {
              log.info("Dimension[%s] has null rows.", dimension);
              final ImmutableBitmap theNullSet = bitmapFactory.makeImmutableBitmap(nullsSet);

              if (dictionary.get(0) != null) {
                log.info("Dimension[%s] has no null value in the dictionary, expanding...", dimension);
                bumpedDictionary = true;
                final List<String> nullList = Lists.newArrayList();
                nullList.add(null);

                dictionary = GenericIndexed.fromIterable(
                    Iterables.concat(nullList, dictionary),
                    GenericIndexed.stringStrategy
                );

                bitmaps = GenericIndexed.fromIterable(
                    Iterables.concat(Arrays.asList(theNullSet), bitmaps),
                    bitmapSerdeFactory.getObjectStrategy()
                );
              } else {
                bumpedDictionary = false;
                bitmaps = GenericIndexed.fromIterable(
                    Iterables.concat(
                        Arrays.asList(
                            bitmapFactory
                                .union(Arrays.asList(theNullSet, bitmaps.get(0)))
                        ),
                        Iterables.skip(bitmaps, 1)
                    ),
                    bitmapSerdeFactory.getObjectStrategy()
                );
              }
            } else {
              bumpedDictionary = false;
            }

            final VSizeIndexed finalMultiValCol = multiValCol;
            singleValCol = VSizeIndexedInts.fromList(
                new AbstractList<Integer>()
                {
                  @Override
                  public Integer get(int index)
                  {
                    final VSizeIndexedInts ints = finalMultiValCol.get(index);
                    return ints.size() == 0 ? 0 : ints.get(0) + (bumpedDictionary ? 1 : 0);
                  }

                  @Override
                  public int size()
                  {
                    return finalMultiValCol.size();
                  }
                },
                dictionary.size()
            );
            multiValCol = null;
          } else {
            builder.setHasMultipleValues(true);
          }

          builder.addSerde(
              new DictionaryEncodedColumnPartSerde(
                  dictionary,
                  singleValCol,
                  multiValCol,
                  bitmapSerdeFactory,
                  bitmaps,
                  spatialIndex
              )
          );

          final ColumnDescriptor serdeficator = builder.build();

          ByteArrayOutputStream baos = new ByteArrayOutputStream();
          serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator));
          byte[] specBytes = baos.toByteArray();

          final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter(
              dimension, serdeficator.numBytes() + specBytes.length
          );
          channel.write(ByteBuffer.wrap(specBytes));
          serdeficator.write(channel);
          channel.close();
        } else if (filename.startsWith("met_")) {
          if (!filename.endsWith(String.format("%s.drd", BYTE_ORDER))) {
            skippedFiles.add(filename);
            continue;
          }

          MetricHolder holder = MetricHolder.fromByteBuffer(v8SmooshedFiles.mapFile(filename));
          final String metric = holder.getName();

          final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();

          switch (holder.getType()) {
            case LONG:
              builder.setValueType(ValueType.LONG);
              builder.addSerde(new LongGenericColumnPartSerde(holder.longType, BYTE_ORDER));
              break;
            case FLOAT:
              builder.setValueType(ValueType.FLOAT);
              builder.addSerde(new FloatGenericColumnPartSerde(holder.floatType, BYTE_ORDER));
              break;
            case COMPLEX:
              if (!(holder.complexType instanceof GenericIndexed)) {
                throw new ISE("Serialized complex types must be GenericIndexed objects.");
              }
              final GenericIndexed column = (GenericIndexed) holder.complexType;
              final String complexType = holder.getTypeName();

              builder.setValueType(ValueType.COMPLEX);
              builder.addSerde(new ComplexColumnPartSerde(column, complexType));
              break;
            default:
              throw new ISE("Unknown type[%s]", holder.getType());
          }

          final ColumnDescriptor serdeficator = builder.build();

          ByteArrayOutputStream baos = new ByteArrayOutputStream();
          serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator));
          byte[] specBytes = baos.toByteArray();

          final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter(
              metric, serdeficator.numBytes() + specBytes.length
          );
          channel.write(ByteBuffer.wrap(specBytes));
          serdeficator.write(channel);
          channel.close();
        } else if (String.format("time_%s.drd", BYTE_ORDER).equals(filename)) {
          CompressedLongsIndexedSupplier timestamps = CompressedLongsIndexedSupplier.fromByteBuffer(
              v8SmooshedFiles.mapFile(filename), BYTE_ORDER
          );

          final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();
          builder.setValueType(ValueType.LONG);
          builder.addSerde(new LongGenericColumnPartSerde(timestamps, BYTE_ORDER));

          final ColumnDescriptor serdeficator = builder.build();

          ByteArrayOutputStream baos = new ByteArrayOutputStream();
          serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator));
          byte[] specBytes = baos.toByteArray();

          final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter(
              "__time", serdeficator.numBytes() + specBytes.length
          );
          channel.write(ByteBuffer.wrap(specBytes));
          serdeficator.write(channel);
          channel.close();
        } else {
          skippedFiles.add(filename);
        }
      }

      final ByteBuffer indexBuffer = v8SmooshedFiles.mapFile("index.drd");

      indexBuffer.get(); // Skip the version byte
      final GenericIndexed<String> dims8 = GenericIndexed.read(
          indexBuffer, GenericIndexed.stringStrategy
      );
      final GenericIndexed<String> dims9 = GenericIndexed.fromIterable(
          Iterables.filter(
              dims8, new Predicate<String>()
              {
                @Override
                public boolean apply(String s)
                {
                  return !skippedDimensions.contains(s);
                }
              }
          ),
          GenericIndexed.stringStrategy
      );
      final GenericIndexed<String> availableMetrics = GenericIndexed.read(
          indexBuffer, GenericIndexed.stringStrategy
      );
      final Interval dataInterval = new Interval(serializerUtils.readString(indexBuffer));
      final BitmapSerdeFactory segmentBitmapSerdeFactory = mapper.readValue(
          serializerUtils.readString(indexBuffer),
          BitmapSerdeFactory.class
      );

      Set<String> columns = Sets.newTreeSet();
      columns.addAll(Lists.newArrayList(dims9));
      columns.addAll(Lists.newArrayList(availableMetrics));

      GenericIndexed<String> cols = GenericIndexed.fromIterable(columns, GenericIndexed.stringStrategy);

      final String segmentBitmapSerdeFactoryString = mapper.writeValueAsString(segmentBitmapSerdeFactory);

      final long numBytes = cols.getSerializedSize() + dims9.getSerializedSize() + 16
                            + serializerUtils.getSerializedStringByteSize(segmentBitmapSerdeFactoryString);

      final SmooshedWriter writer = v9Smoosher.addWithSmooshedWriter("index.drd", numBytes);
      cols.writeToChannel(writer);
      dims9.writeToChannel(writer);
      serializerUtils.writeLong(writer, dataInterval.getStartMillis());
      serializerUtils.writeLong(writer, dataInterval.getEndMillis());
      serializerUtils.writeString(writer, segmentBitmapSerdeFactoryString);
      writer.close();

      log.info("Skipped files[%s]", skippedFiles);

      v9Smoosher.close();
    }
View Full Code Here


        metricTypeNames.put(metric, adapter.getMetricType(metric));
      }
    }

    outDir.mkdirs();
    final FileSmoosher v9Smoosher = new FileSmoosher(outDir);

    ByteStreams.write(
        Ints.toByteArray(IndexIO.V9_VERSION),
        Files.newOutputStreamSupplier(new File(outDir, "version.bin"))
    );

    final Map<String, Integer> dimIndexes = Maps.newHashMap();
    final Map<String, Iterable<String>> dimensionValuesLookup = Maps.newHashMap();
    final ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(adapters.size());
    final Set<String> skippedDimensions = Sets.newHashSet();
    final List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(adapters.size());

    progress.progress();
    setupDimConversion(
        adapters,
        progress,
        mergedDimensions,
        dimConversions,
        dimIndexes,
        skippedDimensions,
        dimensionValuesLookup
    );

    progress.progress();
    final Iterable<Rowboat> theRows = makeRowIterable(
        adapters,
        mergedDimensions,
        mergedMetrics,
        dimConversions,
        rowMergerFn
    );

    progress.progress();
    final int rowCount = convertDims(adapters, progress, theRows, rowNumConversions);

    progress.progress();
    makeTimeColumn(v9Smoosher, progress, theRows, rowCount);

    progress.progress();
    makeDimColumns(
        v9Smoosher,
        adapters,
        progress,
        mergedDimensions,
        skippedDimensions,
        theRows,
        columnCapabilities,
        dimensionValuesLookup,
        rowNumConversions
    );

    progress.progress();
    makeMetricColumns(v9Smoosher, progress, theRows, mergedMetrics, valueTypes, metricTypeNames, rowCount);

    progress.progress();
    makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, skippedDimensions, progress);

    v9Smoosher.close();

    progress.stop();

    return outDir;
  }
View Full Code Here

TOP

Related Classes of com.metamx.common.io.smoosh.FileSmoosher

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.