Package io.druid.segment

Source Code of io.druid.segment.SchemalessIndex

/*
* Druid - a distributed column store.
* Copyright (C) 2012, 2013  Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
*/

package io.druid.segment;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.base.Throwables;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import com.google.common.hash.Hashing;
import com.metamx.common.Pair;
import com.metamx.common.logger.Logger;
import io.druid.data.input.MapBasedInputRow;
import io.druid.granularity.QueryGranularity;
import io.druid.jackson.DefaultObjectMapper;
import io.druid.query.TestQueryRunners;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.query.aggregation.CountAggregatorFactory;
import io.druid.query.aggregation.DoubleSumAggregatorFactory;
import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory;
import io.druid.query.aggregation.hyperloglog.HyperUniquesSerde;
import io.druid.segment.incremental.IncrementalIndex;
import io.druid.segment.serde.ComplexMetrics;
import io.druid.timeline.TimelineObjectHolder;
import io.druid.timeline.VersionedIntervalTimeline;
import io.druid.timeline.partition.NoneShardSpec;
import io.druid.timeline.partition.ShardSpec;
import org.joda.time.DateTime;
import org.joda.time.Interval;

import javax.annotation.Nullable;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

/**
*/
public class SchemalessIndex
{
  private static final Logger log = new Logger(SchemalessIndex.class);
  private static final ObjectMapper jsonMapper = new DefaultObjectMapper();

  private static final String testFile = "druid.sample.json";
  private static final String TIMESTAMP = "timestamp";
  private static final List<String> METRICS = Arrays.asList("index");
  private static final AggregatorFactory[] METRIC_AGGS = new AggregatorFactory[]{
      new DoubleSumAggregatorFactory("index", "index"),
      new CountAggregatorFactory("count"),
      new HyperUniquesAggregatorFactory("quality_uniques", "quality")
  };
  private static final AggregatorFactory[] METRIC_AGGS_NO_UNIQ = new AggregatorFactory[]{
      new DoubleSumAggregatorFactory("index", "index"),
      new CountAggregatorFactory("count")
  };

  private static final List<Map<String, Object>> events = Lists.newArrayList();

  private static final Map<Integer, Map<Integer, QueryableIndex>> incrementalIndexes = Maps.newHashMap();
  private static final Map<Integer, Map<Integer, QueryableIndex>> mergedIndexes = Maps.newHashMap();
  private static final List<QueryableIndex> rowPersistedIndexes = Lists.newArrayList();

  private static IncrementalIndex index = null;
  private static QueryableIndex mergedIndex = null;

  static {
    if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
      ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(Hashing.murmur3_128()));
    }
  }

  public static IncrementalIndex getIncrementalIndex()
  {
    synchronized (log) {
      if (index != null) {
        return index;
      }

      index = makeIncrementalIndex(testFile, METRIC_AGGS);

      return index;
    }
  }

  public static QueryableIndex getIncrementalIndex(int index1, int index2)
  {
    synchronized (log) {
      if (events.isEmpty()) {
        makeEvents();
      }

      Map<Integer, QueryableIndex> entry = incrementalIndexes.get(index1);
      if (entry != null) {
        QueryableIndex index = entry.get(index2);
        if (index != null) {
          return index;
        }
      } else {
        entry = Maps.<Integer, QueryableIndex>newHashMap();
        incrementalIndexes.put(index1, entry);
      }

      IncrementalIndex theIndex = null;

      int count = 0;
      for (final Map<String, Object> event : events) {
        if (count != index1 && count != index2) {
          count++;
          continue;
        }

        final long timestamp = new DateTime(event.get(TIMESTAMP)).getMillis();

        if (theIndex == null) {
          theIndex = new IncrementalIndex(timestamp, QueryGranularity.MINUTE, METRIC_AGGS, TestQueryRunners.pool);
        }

        final List<String> dims = Lists.newArrayList();
        for (final Map.Entry<String, Object> val : event.entrySet()) {
          if (!val.getKey().equalsIgnoreCase(TIMESTAMP) && !METRICS.contains(val.getKey())) {
            dims.add(val.getKey());
          }
        }

        theIndex.add(new MapBasedInputRow(timestamp, dims, event));

        count++;
      }
      QueryableIndex retVal = TestIndex.persistRealtimeAndLoadMMapped(theIndex);
      entry.put(index2, retVal);
      return retVal;
    }
  }

  public static QueryableIndex getMergedIncrementalIndex()
  {
    synchronized (log) {
      if (mergedIndex != null) {
        return mergedIndex;
      }

      try {
        IncrementalIndex top = makeIncrementalIndex("druid.sample.json.top", METRIC_AGGS);
        IncrementalIndex bottom = makeIncrementalIndex("druid.sample.json.bottom", METRIC_AGGS);

        File tmpFile = File.createTempFile("yay", "who");
        tmpFile.delete();

        File topFile = new File(tmpFile, "top");
        File bottomFile = new File(tmpFile, "bottom");
        File mergedFile = new File(tmpFile, "merged");

        topFile.mkdirs();
        topFile.deleteOnExit();
        bottomFile.mkdirs();
        bottomFile.deleteOnExit();
        mergedFile.mkdirs();
        mergedFile.deleteOnExit();

        IndexMerger.persist(top, topFile);
        IndexMerger.persist(bottom, bottomFile);

        mergedIndex = io.druid.segment.IndexIO.loadIndex(
            IndexMerger.mergeQueryableIndex(
                Arrays.asList(IndexIO.loadIndex(topFile), IndexIO.loadIndex(bottomFile)), METRIC_AGGS, mergedFile
            )
        );

        return mergedIndex;
      }
      catch (IOException e) {
        mergedIndex = null;
        throw Throwables.propagate(e);
      }
    }
  }

  public static QueryableIndex getMergedIncrementalIndex(int index1, int index2)
  {
    synchronized (log) {
      if (rowPersistedIndexes.isEmpty()) {
        makeRowPersistedIndexes();
      }

      Map<Integer, QueryableIndex> entry = mergedIndexes.get(index1);
      if (entry != null) {
        QueryableIndex index = entry.get(index2);
        if (index != null) {
          return index;
        }
      } else {
        entry = Maps.<Integer, QueryableIndex>newHashMap();
        mergedIndexes.put(index1, entry);
      }

      try {
        File tmpFile = File.createTempFile("yay", "who");
        tmpFile.delete();

        File mergedFile = new File(tmpFile, "merged");

        mergedFile.mkdirs();
        mergedFile.deleteOnExit();

        QueryableIndex index = IndexIO.loadIndex(
            IndexMerger.mergeQueryableIndex(
                Arrays.asList(rowPersistedIndexes.get(index1), rowPersistedIndexes.get(index2)), METRIC_AGGS, mergedFile
            )
        );

        entry.put(index2, index);

        return index;
      }
      catch (IOException e) {
        throw Throwables.propagate(e);
      }
    }
  }

  public static QueryableIndex getMergedIncrementalIndex(int[] indexes)
  {
    synchronized (log) {
      if (rowPersistedIndexes.isEmpty()) {
        makeRowPersistedIndexes();
      }

      try {
        File tmpFile = File.createTempFile("yay", "who");
        tmpFile.delete();

        File mergedFile = new File(tmpFile, "merged");

        mergedFile.mkdirs();
        mergedFile.deleteOnExit();

        List<QueryableIndex> indexesToMerge = Lists.newArrayList();
        for (int i = 0; i < indexes.length; i++) {
          indexesToMerge.add(rowPersistedIndexes.get(indexes[i]));
        }

        QueryableIndex index = IndexIO.loadIndex(
            IndexMerger.mergeQueryableIndex(indexesToMerge, METRIC_AGGS, mergedFile)
        );

        return index;
      }
      catch (IOException e) {
        throw Throwables.propagate(e);
      }
    }
  }

  public static QueryableIndex getAppendedIncrementalIndex(
      Iterable<Pair<String, AggregatorFactory[]>> files,
      List<Interval> intervals
  )
  {
    return makeAppendedMMappedIndex(files, intervals);
  }

  public static QueryableIndex getMergedIncrementalIndexDiffMetrics()
  {
    return getMergedIncrementalIndex(
        Arrays.<Pair<String, AggregatorFactory[]>>asList(
            new Pair<String, AggregatorFactory[]>("druid.sample.json.top", METRIC_AGGS_NO_UNIQ),
            new Pair<String, AggregatorFactory[]>("druid.sample.json.bottom", METRIC_AGGS)
        )
    );
  }

  public static QueryableIndex getMergedIncrementalIndex(Iterable<Pair<String, AggregatorFactory[]>> files)
  {
    return makeMergedMMappedIndex(files);
  }

  private static void makeEvents()
  {
    URL resource = TestIndex.class.getClassLoader().getResource(testFile);
    String filename = resource.getFile();
    log.info("Realtime loading index file[%s]", filename);
    try {
      for (Object obj : jsonMapper.readValue(new File(filename), List.class)) {
        final Map<String, Object> event = jsonMapper.convertValue(obj, Map.class);
        events.add(event);
      }
    }
    catch (Exception e) {
      Throwables.propagate(e);
    }
  }

  private static void makeRowPersistedIndexes()
  {
    synchronized (log) {
      try {
        if (events.isEmpty()) {
          makeEvents();
        }

        for (final Map<String, Object> event : events) {

          final long timestamp = new DateTime(event.get(TIMESTAMP)).getMillis();
          final List<String> dims = Lists.newArrayList();
          for (Map.Entry<String, Object> entry : event.entrySet()) {
            if (!entry.getKey().equalsIgnoreCase(TIMESTAMP) && !METRICS.contains(entry.getKey())) {
              dims.add(entry.getKey());
            }
          }

          final IncrementalIndex rowIndex = new IncrementalIndex(
              timestamp, QueryGranularity.MINUTE, METRIC_AGGS, TestQueryRunners.pool
          );

          rowIndex.add(
              new MapBasedInputRow(timestamp, dims, event)
          );

          File tmpFile = File.createTempFile("billy", "yay");
          tmpFile.delete();
          tmpFile.mkdirs();
          tmpFile.deleteOnExit();

          IndexMerger.persist(rowIndex, tmpFile);
          rowPersistedIndexes.add(IndexIO.loadIndex(tmpFile));
        }
      }
      catch (IOException e) {
        throw Throwables.propagate(e);
      }
    }
  }

  private static IncrementalIndex makeIncrementalIndex(final String resourceFilename, AggregatorFactory[] aggs)
  {
    URL resource = TestIndex.class.getClassLoader().getResource(resourceFilename);
    log.info("Realtime loading resource[%s]", resource);
    String filename = resource.getFile();
    log.info("Realtime loading index file[%s]", filename);

    final IncrementalIndex retVal = new IncrementalIndex(
        new DateTime("2011-01-12T00:00:00.000Z").getMillis(), QueryGranularity.MINUTE, aggs, TestQueryRunners.pool
    );

    try {
      final List<Object> events = jsonMapper.readValue(new File(filename), List.class);
      for (Object obj : events) {
        final Map<String, Object> event = jsonMapper.convertValue(obj, Map.class);

        final List<String> dims = Lists.newArrayList();
        for (Map.Entry<String, Object> entry : event.entrySet()) {
          if (!entry.getKey().equalsIgnoreCase(TIMESTAMP) && !METRICS.contains(entry.getKey())) {
            dims.add(entry.getKey());
          }
        }

        retVal.add(
            new MapBasedInputRow(
                new DateTime(event.get(TIMESTAMP)).getMillis(),
                dims,
                event
            )
        );
      }
    }
    catch (IOException e) {
      index = null;
      throw Throwables.propagate(e);
    }

    return retVal;
  }

  private static List<File> makeFilesToMap(File tmpFile, Iterable<Pair<String, AggregatorFactory[]>> files)
      throws IOException
  {
    List<File> filesToMap = Lists.newArrayList();
    for (Pair<String, AggregatorFactory[]> file : files) {
      IncrementalIndex index = makeIncrementalIndex(file.lhs, file.rhs);
      File theFile = new File(tmpFile, file.lhs);
      theFile.mkdirs();
      theFile.deleteOnExit();
      filesToMap.add(theFile);
      IndexMerger.persist(index, theFile);
    }

    return filesToMap;
  }

  private static QueryableIndex makeAppendedMMappedIndex(
      Iterable<Pair<String, AggregatorFactory[]>> files,
      final List<Interval> intervals
  )
  {
    try {
      File tmpFile = File.createTempFile("yay", "boo");
      tmpFile.delete();
      File mergedFile = new File(tmpFile, "merged");
      mergedFile.mkdirs();
      mergedFile.deleteOnExit();

      List<File> filesToMap = makeFilesToMap(tmpFile, files);

      List<IndexableAdapter> adapters = Lists.newArrayList();

      VersionedIntervalTimeline<Integer, File> timeline = new VersionedIntervalTimeline<Integer, File>(
          Ordering.natural().nullsFirst()
      );

      ShardSpec noneShardSpec = new NoneShardSpec();

      for (int i = 0; i < intervals.size(); i++) {
        timeline.add(intervals.get(i), i, noneShardSpec.createChunk(filesToMap.get(i)));
      }

      List<Pair<File, Interval>> intervalsToMerge = Lists.transform(
          timeline.lookup(new Interval("1000-01-01/3000-01-01")),
          new Function<TimelineObjectHolder<Integer, File>, Pair<File, Interval>>()
          {
            @Override
            public Pair<File, Interval> apply(@Nullable TimelineObjectHolder<Integer, File> input)
            {
              return new Pair<File, Interval>(input.getObject().getChunk(0).getObject(), input.getInterval());
            }
          }
      );

      for (final Pair<File, Interval> pair : intervalsToMerge) {
        adapters.add(
            new RowboatFilteringIndexAdapter(
                new QueryableIndexIndexableAdapter(IndexIO.loadIndex(pair.lhs)),
                new Predicate<Rowboat>()
                {
                  @Override
                  public boolean apply(@Nullable Rowboat input)
                  {
                    return pair.rhs.contains(input.getTimestamp());
                  }
                }
            )
        );
      }

      return IndexIO.loadIndex(IndexMerger.append(adapters, mergedFile));
    }
    catch (IOException e) {
      throw Throwables.propagate(e);
    }
  }

  private static QueryableIndex makeMergedMMappedIndex(Iterable<Pair<String, AggregatorFactory[]>> files)
  {
    try {
      File tmpFile = File.createTempFile("yay", "who");
      tmpFile.delete();
      File mergedFile = new File(tmpFile, "merged");
      mergedFile.mkdirs();
      mergedFile.deleteOnExit();

      List<File> filesToMap = makeFilesToMap(tmpFile, files);

      return IndexIO.loadIndex(
          IndexMerger.mergeQueryableIndex(
              Lists.newArrayList(
                  Iterables.transform(
                      filesToMap,
                      new Function<File, QueryableIndex>()
                      {
                        @Override
                        public QueryableIndex apply(@Nullable File input)
                        {
                          try {
                            return IndexIO.loadIndex(input);
                          }
                          catch (IOException e) {
                            throw Throwables.propagate(e);
                          }
                        }
                      }
                  )
              ),
              METRIC_AGGS,
              mergedFile
          )
      );
    }
    catch (IOException e) {
      throw Throwables.propagate(e);
    }
  }
}
TOP

Related Classes of io.druid.segment.SchemalessIndex

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.