package edu.brown.workload;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.json.JSONException;
import org.json.JSONObject;
import org.voltdb.catalog.Database;
import org.voltdb.utils.Pair;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.statistics.ObjectHistogram;
import edu.brown.utils.ArgumentsParser;
import edu.brown.utils.FileUtil;
import edu.brown.workload.filters.Filter;
import edu.brown.workload.filters.Filter.FilterResult;
public abstract class WorkloadUtil {
private static final Logger LOG = Logger.getLogger(WorkloadUtil.class);
private static final LoggerBoolean debug = new LoggerBoolean();
private static final LoggerBoolean trace = new LoggerBoolean();
static {
LoggerUtil.attachObserver(LOG, debug, trace);
}
private static final int ELEMENT_CTR_IDX = 0;
private static final int TXN_CTR_IDX = ELEMENT_CTR_IDX + 1;
private static final int QUERY_CTR_IDX = ELEMENT_CTR_IDX + 2;
private static final int WEIGHTED_TXN_CTR_IDX = ELEMENT_CTR_IDX + 3;
private static final int WEIGHTED_QUERY_CTR_IDX = ELEMENT_CTR_IDX + 4;
/**
* READ THREAD
*/
public static class ReadThread implements Runnable {
final File input_path;
final List<ProcessingThread> processingThreads = new ArrayList<ProcessingThread>();
final AtomicInteger processingThreadId = new AtomicInteger(0);
final LinkedBlockingDeque<Pair<Integer, String>> lines;
final Pattern pattern;
boolean stop = false;
Thread self;
public ReadThread(File input_path, Pattern pattern, int num_threads) {
this.input_path = input_path;
this.pattern = pattern;
this.lines = new LinkedBlockingDeque<Pair<Integer, String>>(num_threads * 1000);
}
@Override
public void run() {
self = Thread.currentThread();
self.setName(this.getClass().getSimpleName());
if (debug.val) LOG.debug(String.format("Starting thread to read workload '%s' [processingThreads=%d]",
this.input_path.getAbsolutePath(), this.processingThreads.size()));
BufferedReader in = null;
try {
in = FileUtil.getReader(this.input_path);
} catch (Exception ex) {
throw new RuntimeException(ex);
}
int totalCtr = 0;
int procCtr = 0;
int fastCtr = 0;
try {
while (in.ready() && this.stop == false) {
totalCtr++;
if (debug.val && totalCtr % 10000 == 0)
LOG.debug(String.format("Read in %d lines from '%s' [queue=%d, procCtr=%d, fastCtr=%d]",
totalCtr, this.input_path.getName(), this.lines.size(), procCtr, fastCtr));
String line = in.readLine().trim();
if (line.isEmpty()) continue;
if (this.pattern != null && this.pattern.matcher(line).find() == false) {
fastCtr++;
continue;
}
this.lines.put(Pair.of(procCtr, line)); // , 100, TimeUnit.SECONDS);
procCtr++;
} // WHILE
in.close();
if (debug.val) LOG.debug("Finished reading file. Telling all ProcessingThreads to stop when their queue is empty");
} catch (InterruptedException ex) {
if (this.stop == false) throw new RuntimeException(ex);
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
// Tell all the load threads to stop before we finish
for (ProcessingThread lt : this.processingThreads) {
lt.stop();
} // FOR
}
if (debug.val) LOG.debug(String.format("Read %d lines [fast_filter=%d]", procCtr, fastCtr));
}
public synchronized void stop() {
if (this.stop == false) {
if (debug.val) LOG.debug("ReadThread told to stop by LoadThread [queue_size=" + this.lines.size() + "]");
this.stop = true;
this.lines.clear();
this.self.interrupt();
}
}
} // END CLASS
/**
* PROCESSING THREAD
*/
public static class ProcessingThread implements Runnable {
final int id;
final Workload workload;
final File input_path;
final ReadThread reader;
final Database catalog_db;
final Filter filter;
final AtomicInteger counters[];
boolean stop = false;
public ProcessingThread(Workload workload, File input_path, ReadThread reader, Database catalog_db, Filter filter, AtomicInteger counters[]) {
this.workload = workload;
this.input_path = input_path;
this.reader = reader;
this.catalog_db = catalog_db;
this.filter = filter;
this.counters = counters;
this.id = reader.processingThreadId.getAndIncrement();
}
@Override
public void run() {
Thread self = Thread.currentThread();
self.setName(String.format("%s-%d", this.getClass().getSimpleName(), this.id));
if (debug.val) LOG.debug(String.format("Starting %s [%d / %d]",
this.getClass().getSimpleName(), this.id, this.reader.processingThreads.size()));
AtomicInteger element_ctr = this.counters[ELEMENT_CTR_IDX];
AtomicInteger xact_ctr = this.counters[TXN_CTR_IDX];
AtomicInteger query_ctr = this.counters[QUERY_CTR_IDX];
AtomicInteger weightedTxn_ctr = this.counters[WEIGHTED_TXN_CTR_IDX];
AtomicInteger weightedQuery_ctr = this.counters[WEIGHTED_QUERY_CTR_IDX];
while (true) {
String line = null;
Integer line_ctr = null;
JSONObject jsonObject = null;
Pair<Integer, String> p = null;
try {
p = this.reader.lines.poll(100, TimeUnit.MILLISECONDS);
} catch (InterruptedException ex) {
// IGNORE
break;
} catch (Exception ex) {
throw new RuntimeException(ex);
}
if (p == null) {
if (this.stop) {
if (debug.val) LOG.debug("Queue is empty and we were told to stop!");
break;
}
if (debug.val) LOG.debug("Queue is empty but we haven't been told to stop yet");
continue;
}
line_ctr = p.getFirst();
line = p.getSecond();
if (trace.val)
LOG.trace(String.format("Processing TransactionTrace on line %d [queueSize=%d, bytes=%d]",
line_ctr, reader.lines.size(), line.length()));
try {
try {
jsonObject = new JSONObject(line);
} catch (JSONException ex) {
String msg = String.format("Ignoring invalid TransactionTrace on line %d of '%s'", (line_ctr+1), input_path);
if (debug.val) {
LOG.warn(msg, ex);
} else {
LOG.warn(msg);
}
continue;
}
// TransactionTrace
if (jsonObject.has(TransactionTrace.Members.TXN_ID.name())) {
// If we have already loaded in up to our limit, then we don't need to
// do anything else. But we still have to keep reading because we need
// be able to load in our index structures that are at the bottom of the file
//
// NOTE: If we ever load something else but the straight trace dumps, then the following
// line should be a continue and not a break.
// Load the xact from the jsonObject
TransactionTrace xact = null;
try {
xact = TransactionTrace.loadFromJSONObject(jsonObject, catalog_db);
} catch (Throwable ex) {
LOG.warn(ex.getMessage());
continue;
}
if (xact == null) {
throw new Exception("Failed to deserialize transaction trace on line " + xact_ctr);
} else if (filter != null) {
FilterResult result = null;
// It's ok to do this because the real CPU bottleneck is
// the JSON deserialization
synchronized (filter) {
result = filter.apply(xact);
} // SYNCH
if (trace.val) LOG.trace(xact + " Filter Result: " + result);
if (result == FilterResult.HALT) {
// We have to tell the ReadThread to stop too!
if (debug.val) LOG.debug("Got HALT response from filter! Telling ReadThread to stop!");
this.reader.stop();
break;
}
else if (result == FilterResult.SKIP) continue;
if (trace.val) LOG.trace(result + ": " + xact);
}
// Keep track of how many trace elements we've loaded so that we can make sure
// that our element trace list is complete
int x = xact_ctr.incrementAndGet();
if (debug.val && x % 10000 == 0) LOG.debug("Processed " + xact_ctr + " transactions...");
query_ctr.addAndGet(xact.getQueryCount());
element_ctr.addAndGet(1 + xact.getQueries().size());
weightedTxn_ctr.addAndGet(xact.weight);
for (QueryTrace q : xact.getQueries()) {
weightedQuery_ctr.addAndGet(q.weight);
} // FOR
// This call just updates the various other index structures
this.workload.addTransaction(xact.getCatalogItem(catalog_db), xact, true);
// Unknown!
} else {
throw new Exception("Unexpected serialization line in workload trace file '" + input_path.getAbsolutePath() + "'");
}
} catch (Exception ex) {
throw new RuntimeException("Error on line " + (line_ctr+1) + " of workload trace file '" + input_path.getAbsolutePath() + "'", ex);
}
} // WHILE
}
public void stop() {
if (debug.val) LOG.debug("Told to stop [queue_size=" + this.reader.lines.size() + "]");
this.stop = true;
}
} // END CLASS
/**
* WRITE THREAD
*/
public static final class WriteThread implements Runnable {
final Database catalog_db;
final OutputStream output;
final LinkedBlockingDeque<TransactionTrace> traces = new LinkedBlockingDeque<TransactionTrace>();
static final byte[] newline = "\n".getBytes();
public WriteThread(Database catalog_db, OutputStream output) {
this.catalog_db = catalog_db;
this.output = output;
assert(this.output != null);
}
@Override
public void run() {
TransactionTrace xact = null;
while (true) {
try {
xact = this.traces.take();
assert(xact != null);
write(this.catalog_db, xact, this.output);
} catch (InterruptedException ex) {
// IGNORE
break;
}
} // WHILE
}
public static void write(Database catalog_db, TransactionTrace xact, OutputStream output) {
try {
output.write(xact.toJSONString(catalog_db).getBytes());
output.write(newline);
output.flush();
if (debug.val) LOG.debug("Wrote out new trace record for " + xact + " with " + xact.getQueries().size() + " queries");
} catch (IOException ex) {
LOG.fatal("Failed to write " + xact + " out to file", ex);
System.exit(1);
}
}
} // END CLASS
/**
* Read a Workload file and generate a Histogram for how often each procedure
* is executed in the trace. This is a faster method than having to deserialize the entire
* workload trace into memory.
* @param workload_path
* @return
* @throws Exception
*/
public static ObjectHistogram<String> getProcedureHistogram(File workload_path) throws Exception {
final ObjectHistogram<String> h = new ObjectHistogram<String>();
final String regex = "^\\{[\\s]*" +
// Old Format: Start with an ID#
"(\"ID\":[\\d]+,)?" +
"\"" +
AbstractTraceElement.Members.NAME.name() +
"\":[\\s]*\"([\\w\\d]+)\"[\\s]*,[\\s]*.*";
final Pattern p = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
if (debug.val) LOG.debug("Fast generation of Procedure Histogram from Workload '" + workload_path.getAbsolutePath() + "'");
BufferedReader reader = FileUtil.getReader(workload_path.getAbsolutePath());
int line_ctr = 0;
while (reader.ready()) {
String line = reader.readLine();
Matcher m = p.matcher(line);
assert(m != null) : "Invalid Line #" + line_ctr + " [" + workload_path + "]\n" + line;
assert(m.matches()) : "Invalid Line #" + line_ctr + " [" + workload_path + "]\n" + line;
if (m.groupCount() > 0) {
try {
h.put(m.group(2));
} catch (IllegalStateException ex) {
LOG.error("Invalud Workload Line: " + line, ex);
System.exit(1);
}
} else {
LOG.error("Invalid Workload Line: " + line);
assert(m.groupCount() == 0);
}
line_ctr++;
} // WHILE
reader.close();
if (debug.val) LOG.debug("Processed " + line_ctr + " workload trace records for histogram\n" + h);
return (h);
}
public static void main(String[] vargs) throws Exception {
ArgumentsParser args = ArgumentsParser.load(vargs);
assert(args != null);
System.out.println(getProcedureHistogram(new File("/home/pavlo/Documents/H-Store/SVN-Brown/trunk/files/workloads/tpce.trace.gz")));
}
}