package com.codecademy.eventhub;
import com.google.common.collect.ArrayTable;
import com.google.common.collect.ContiguousSet;
import com.google.common.collect.DiscreteDomain;
import com.google.common.collect.Lists;
import com.google.common.collect.Range;
import com.google.common.collect.Sets;
import com.google.common.collect.Table;
import com.codecademy.eventhub.index.DatedEventIndex;
import com.codecademy.eventhub.index.EventIndex;
import com.codecademy.eventhub.index.PropertiesIndex;
import com.codecademy.eventhub.index.ShardedEventIndex;
import com.codecademy.eventhub.index.UserEventIndex;
import com.codecademy.eventhub.list.DummyIdList;
import com.codecademy.eventhub.list.IdList;
import com.codecademy.eventhub.list.MemIdList;
import com.codecademy.eventhub.model.Event;
import com.codecademy.eventhub.model.User;
import com.codecademy.eventhub.storage.EventStorage;
import com.codecademy.eventhub.storage.UserStorage;
import com.codecademy.eventhub.storage.filter.Filter;
import com.codecademy.eventhub.storage.filter.TrueFilter;
import org.joda.time.DateTime;
import org.joda.time.Days;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Set;
// TODO: query language
// TODO: integrate com.codahale.metrics
// TODO: refactor to have IdManager & Id class
// TODO: consider column oriented storage
// TODO: separate cache for previously computed result? same binary or redis?
// TODO: move synchronization responsibility to low level
// TODO: compression of DmaIdList
// TODO: native byte order for performance
public class EventHub implements Closeable {
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormat.forPattern("yyyyMMdd");
private final String directory;
private final ShardedEventIndex shardedEventIndex;
private final DatedEventIndex datedEventIndex;
private final PropertiesIndex propertiesIndex;
private final UserEventIndex userEventIndex;
private final EventStorage eventStorage;
private final UserStorage userStorage;
public EventHub(String directory, ShardedEventIndex shardedEventIndex,
DatedEventIndex datedEventIndex, PropertiesIndex propertiesIndex,
UserEventIndex userEventIndex, EventStorage eventStorage, UserStorage userStorage) {
this.directory = directory;
this.shardedEventIndex = shardedEventIndex;
this.datedEventIndex = datedEventIndex;
this.propertiesIndex = propertiesIndex;
this.userEventIndex = userEventIndex;
this.eventStorage = eventStorage;
this.userStorage = userStorage;
}
public int[][] getRetentionTable(String startDateString,
String endDateString, int numDaysPerCohort, int numColumns, String rowEventType,
String columnEventType, Filter rowEventFilter, Filter columnEventFilter) {
DateTime startDate = DATE_TIME_FORMATTER.parseDateTime(startDateString);
DateTime endDate = DATE_TIME_FORMATTER.parseDateTime(endDateString);
int numRows = (Days.daysBetween(startDate, endDate).getDays() + 1) / numDaysPerCohort;
List<Set<Integer>> rowIdSets = getUserIdsSets(rowEventType, startDate, rowEventFilter,
numDaysPerCohort, numRows);
List<Set<Integer>> columnIdSets = getUserIdsSets(columnEventType, startDate, columnEventFilter,
numDaysPerCohort,
numColumns + numRows);
Table<Integer, Integer, Integer> retentionTable = ArrayTable.create(
ContiguousSet.create(Range.closedOpen(0, numRows), DiscreteDomain.integers()),
ContiguousSet.create(Range.closedOpen(0, numColumns + 1), DiscreteDomain.integers()));
retentionTable.put(0, 0, 0);
for (int i = 0; i < numRows; i++) {
retentionTable.put(i, 0, rowIdSets.get(i).size());
}
for (int i = 0; i < numRows; i++) {
for (int j = 0; j < numColumns; j++) {
Set<Integer> rowSet = rowIdSets.get(i);
Set<Integer> columnSet = columnIdSets.get(j + i);
int count = 0;
for (Integer columnValue : columnSet) {
if (rowSet.contains(columnValue)) {
count++;
}
}
retentionTable.put(i, j + 1, count);
}
}
int[][] result = new int[numRows][numColumns + 1];
for (int i = 0; i < numRows; i++) {
for (int j = 0; j < numColumns + 1; j++) {
result[i][j] = retentionTable.get(i, j);
}
}
return result;
}
public synchronized int[] getFunnelCounts(String startDate, String endDate, String[] funnelStepsEventTypes,
int numDaysToCompleteFunnel, List<Filter> eventFilters, Filter userFilter) {
IdList firstStepEventIdList = new MemIdList(new long[10000], 0);
int[] funnelStepsEventTypeIds = getEventTypeIds(funnelStepsEventTypes);
List<Integer> userIdsList = Lists.newArrayList();
Set<Integer> userIdsSet = Sets.newHashSet();
EventIndex.Callback aggregateUserIdsCallback = new AggregateUserIds(eventStorage, userStorage,
firstStepEventIdList, eventFilters.get(0), userFilter, userIdsList, userIdsSet);
shardedEventIndex.enumerateEventIds(funnelStepsEventTypes[0], startDate, endDate,
aggregateUserIdsCallback);
int[] numFunnelStepsMatched = new int[funnelStepsEventTypes.length];
IdList.Iterator firstStepEventIdIterator = firstStepEventIdList.iterator();
if (funnelStepsEventTypes.length == 1) {
for (int userId : userIdsList) {
numFunnelStepsMatched[0]++;
}
} else {
for (int userId : userIdsList) {
long firstStepEventId = firstStepEventIdIterator.next();
long maxLastStepEventId = datedEventIndex.findFirstEventIdOnDate(firstStepEventId, numDaysToCompleteFunnel);
CountMatchedFunnelSteps countMatchedFunnelSteps = new CountMatchedFunnelSteps(
eventStorage, userStorage, funnelStepsEventTypeIds, 1 /* first step already matched*/,
maxLastStepEventId, eventFilters, userFilter);
userEventIndex.enumerateEventIds(userId, userEventIndex.getEventOffset(userId, firstStepEventId),
Integer.MAX_VALUE, countMatchedFunnelSteps);
for (int i = 0; i < countMatchedFunnelSteps.getNumMatchedSteps(); i++) {
numFunnelStepsMatched[i]++;
}
}
}
return numFunnelStepsMatched;
}
public synchronized void aliasUser(String fromExternalUserId, String toExternalUserId) {
userStorage.ensureUser(toExternalUserId);
int id = userStorage.getId(toExternalUserId);
if (id == UserStorage.USER_NOT_FOUND) {
throw new IllegalArgumentException(String .format("User: %s does not exist!!!", toExternalUserId));
}
userStorage.alias(fromExternalUserId, id);
}
public synchronized int addOrUpdateUser(User user) {
userStorage.ensureUser(user.getExternalId());
int userId = userStorage.updateUser(user);
propertiesIndex.addUser(user);
return userId;
}
public User getUser(int userId) {
return userStorage.getUser(userId);
}
public Event getEvent(long eventId) {
return eventStorage.getEvent(eventId);
}
public synchronized long addEvent(Event event) {
// ensure the given event type has an id associated
int eventTypeId = shardedEventIndex.ensureEventType(event.getEventType());
// ensure the given user has an id associated
int userId = userStorage.ensureUser(event.getExternalUserId());
long eventId = eventStorage.addEvent(event, userId, eventTypeId);
String date = event.getDate();
datedEventIndex.addEvent(eventId, date);
shardedEventIndex.addEvent(eventId, event.getEventType(), date);
userEventIndex.addEvent(userId, eventId);
propertiesIndex.addEvent(event);
return eventId;
}
public List<String> getEventTypes() {
return shardedEventIndex.getEventTypes();
}
public List<Event> getUserEvents(String externalUserId, int offset, int numRecords) {
List<Event> events = Lists.newArrayList();
int userId = userStorage.getId(externalUserId);
userEventIndex.enumerateEventIds(userId, offset, numRecords,
new CollectEvents(events, eventStorage));
return events;
}
@Override
public void close() throws IOException {
//noinspection ResultOfMethodCallIgnored
new File(directory).mkdirs();
eventStorage.close();
userStorage.close();
shardedEventIndex.close();
propertiesIndex.close();
datedEventIndex.close();
userEventIndex.close();
}
public String getVarz() {
return String.format(
"current date: %s\n" +
"Event Storage:\n==============\n%s\n\n" +
"User Storage:\n==============\n%s\n\n" +
"Event Index:\n==============\n%s\n\n" +
"User Event Index:\n==============\n%s",
datedEventIndex.getCurrentDate(),
eventStorage.getVarz(1),
userStorage.getVarz(1),
shardedEventIndex.getVarz(1),
userEventIndex.getVarz(1));
}
private int[] getEventTypeIds(String[] eventTypes) {
int[] eventTypeIds = new int[eventTypes.length];
for (int i = 0; i < eventTypeIds.length; i++) {
eventTypeIds[i] = shardedEventIndex.getEventTypeId(eventTypes[i]);
}
return eventTypeIds;
}
private List<Set<Integer>> getUserIdsSets(String groupByEventType, DateTime startDate,
Filter eventFilter, int numDaysPerCohort, int numCohorts) {
List<Set<Integer>> rows = Lists.newArrayListWithCapacity(numCohorts);
for (int i = 0; i < numCohorts; i++) {
DateTime currentStartDate = startDate.plusDays(i * numDaysPerCohort);
DateTime currentEndDate = startDate.plusDays((i + 1) * numDaysPerCohort);
List<Integer> userIdsList = Lists.newArrayList();
Set<Integer> userIdsSet = Sets.newHashSet();
EventIndex.Callback aggregateUserIdsCallback = new AggregateUserIds(eventStorage, userStorage,
new DummyIdList(), eventFilter, TrueFilter.INSTANCE, userIdsList, userIdsSet);
shardedEventIndex.enumerateEventIds(
groupByEventType,
currentStartDate.toString(DATE_TIME_FORMATTER),
currentEndDate.toString(DATE_TIME_FORMATTER),
aggregateUserIdsCallback);
rows.add(userIdsSet);
}
return rows;
}
public List<String> getEventKeys(String eventType) {
return propertiesIndex.getEventKeys(eventType);
}
public List<String> getEventValues(String eventType, String eventKey, String prefix) {
return propertiesIndex.getEventValues(eventType, eventKey, prefix);
}
public List<String> getUserKeys() {
return propertiesIndex.getUserKeys();
}
public List<String> getUserValues(String eventKey, String prefix) {
return propertiesIndex.getUserValues(eventKey, prefix);
}
public List<User> findUsers(Filter filter) {
List<User> users = Lists.newArrayList();
for (int userId = 0; userId < userStorage.getNumRecords(); userId++) {
if (filter.accept(userStorage.getFilterVisitor(userId))) {
users.add(getUser(userId));
}
}
return users;
}
private static class AggregateUserIds implements EventIndex.Callback {
private final EventStorage eventStorage;
private final UserStorage userStorage;
private final IdList earliestEventIdList;
private final Filter eventFilter;
private final Filter userFilter;
private final List<Integer> seenUserIdList;
private final Set<Integer> seenUserIdSet;
public AggregateUserIds(EventStorage eventStorage, UserStorage userStorage,
IdList earliestEventIdList, Filter eventFilter, Filter userFilter,
List<Integer> seenUserIdList, Set<Integer> seenUserIdSet) {
this.eventStorage = eventStorage;
this.userStorage = userStorage;
this.earliestEventIdList = earliestEventIdList;
this.eventFilter = eventFilter;
this.userFilter = userFilter;
this.seenUserIdList = seenUserIdList;
this.seenUserIdSet = seenUserIdSet;
}
@Override
public void onEventId(long eventId) {
if (seenUserIdSet.contains(eventStorage.getUserId(eventId))) {
return;
}
if (!eventFilter.accept(eventStorage.getFilterVisitor(eventId))) {
return;
}
int userId = eventStorage.getUserId(eventId);
if (!userFilter.accept(userStorage.getFilterVisitor(userId))) {
return;
}
// TODO: consider other higher performing Set implementation
if (!seenUserIdSet.contains(userId)) {
seenUserIdSet.add(userId);
seenUserIdList.add(userId);
earliestEventIdList.add(eventId);
}
}
}
private static class CountMatchedFunnelSteps implements UserEventIndex.Callback {
private final EventStorage eventStorage;
private final UserStorage userStorage;
private final int[] funnelStepsEventTypeIds;
private int numMatchedSteps;
private final List<Filter> eventFilters;
private final Filter userFilter;
private final long maxEventId;
public CountMatchedFunnelSteps(EventStorage eventStorage, UserStorage userStorage,
int[] funnelStepsEventTypeIds, int numMatchedSteps, long maxEventId, List<Filter> eventFilters,
Filter userFilter) {
this.eventStorage = eventStorage;
this.userStorage = userStorage;
this.funnelStepsEventTypeIds = funnelStepsEventTypeIds;
this.numMatchedSteps = numMatchedSteps;
this.maxEventId = maxEventId;
this.eventFilters = eventFilters;
this.userFilter = userFilter;
}
@Override
public boolean shouldContinueOnEventId(long eventId) {
if (eventId >= maxEventId) {
return false;
}
int eventTypeId = eventStorage.getEventTypeId(eventId);
if (eventTypeId != funnelStepsEventTypeIds[numMatchedSteps]) {
return true;
}
if (!eventFilters.get(numMatchedSteps).accept(eventStorage.getFilterVisitor(eventId))) {
return true;
}
// TODO: user ctriteria filter should be at higher level
int userId = eventStorage.getUserId(eventId);
if (!userFilter.accept(userStorage.getFilterVisitor(userId))) {
return true;
}
numMatchedSteps++;
return numMatchedSteps != funnelStepsEventTypeIds.length;
}
public int getNumMatchedSteps() {
return numMatchedSteps;
}
}
private static class CollectEvents implements UserEventIndex.Callback, EventIndex.Callback {
private final List<Event> events;
private final EventStorage eventStorage;
private CollectEvents(List<Event> events, EventStorage eventStorage) {
this.events = events;
this.eventStorage = eventStorage;
}
@Override
public boolean shouldContinueOnEventId(long eventId) {
events.add(eventStorage.getEvent(eventId));
return true;
}
@Override
public void onEventId(long eventId) {
events.add(eventStorage.getEvent(eventId));
}
}
}