package edu.brown.markov;
import java.io.File;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.junit.Test;
import org.voltdb.VoltProcedure;
import org.voltdb.benchmark.tpcc.procedures.neworder;
import org.voltdb.catalog.Procedure;
import weka.classifiers.Classifier;
import weka.clusterers.AbstractClusterer;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.NumericToNominal;
import edu.brown.BaseTestCase;
import edu.brown.catalog.CatalogUtil;
import edu.brown.markov.FeatureClusterer.SplitType;
import edu.brown.markov.features.BasePartitionFeature;
import edu.brown.markov.features.FeatureUtil;
import edu.brown.markov.features.ParamArrayLengthFeature;
import edu.brown.markov.features.ParamHashPartitionFeature;
import edu.brown.statistics.Histogram;
import edu.brown.statistics.ObjectHistogram;
import edu.brown.utils.CollectionUtil;
import edu.brown.utils.ProjectType;
import edu.brown.utils.StringUtil;
import edu.brown.workload.TransactionTrace;
import edu.brown.workload.Workload;
import edu.brown.workload.filters.ProcedureLimitFilter;
import edu.brown.workload.filters.ProcedureNameFilter;
import edu.brown.hstore.conf.HStoreConf;
/**
* NOTE: 2012-10-20
* I am getting random JVM crashes with some of these test cases.
* I think it's because of Weka, but I don't have time to look into it
* I've commented out the tests for now.
* @author pavlo
*/
public class TestFeatureClusterer extends BaseTestCase {
private static final Class<? extends VoltProcedure> TARGET_PROCEDURE = neworder.class;
private static final int WORKLOAD_XACT_LIMIT = 1000;
// private static final int BASE_PARTITION = 1;
private static final int NUM_PARTITIONS = 50;
private static Procedure catalog_proc;
private static Workload workload;
private static Instances data;
private FeatureClusterer fclusterer;
@Override
protected void setUp() throws Exception {
super.setUp(ProjectType.TPCC);
this.addPartitions(NUM_PARTITIONS);
HStoreConf.singleton().site.markov_path_caching = false;
if (workload == null) {
catalog_proc = this.getProcedure(TARGET_PROCEDURE);
File file = this.getWorkloadFile(ProjectType.TPCC);
workload = new Workload(catalog);
// Check out this beauty:
// (1) Filter by procedure name
// (2) Filter on partitions that start on our BASE_PARTITION
// (3) Filter to only include multi-partition txns
// (4) Another limit to stop after allowing ### txns
// Where is your god now???
edu.brown.workload.filters.Filter filter = new ProcedureNameFilter(false)
.include(TARGET_PROCEDURE.getSimpleName())
// .attach(new ProcParameterValueFilter().include(1, new Long(5))) // D_ID
// .attach(new ProcParameterArraySizeFilter(CatalogUtil.getArrayProcParameters(catalog_proc).get(0), 10, ExpressionType.COMPARE_EQUAL))
// .attach(new BasePartitionTxnFilter(p_estimator, BASE_PARTITION))
// .attach(new MultiPartitionTxnFilter(p_estimator))
.attach(new ProcedureLimitFilter(WORKLOAD_XACT_LIMIT));
workload.load(file, catalog_db, filter);
assert(workload.getTransactionCount() > 0);
// Now extract the FeatureSet that we will use in our tests
Map<Procedure, FeatureSet> fsets = new FeatureExtractor(catalogContext, p_estimator).calculate(workload);
FeatureSet fset = fsets.get(catalog_proc);
assertNotNull(fset);
data = fset.export(catalog_proc.getName(), false);
NumericToNominal weka_filter = new NumericToNominal();
weka_filter.setInputFormat(data);
data = Filter.useFilter(data, weka_filter);
}
assertNotNull(data);
fclusterer = new FeatureClusterer(catalogContext, catalog_proc, workload, catalogContext.getAllPartitionIds());
}
/**
* testSplitPercentages
*/
@Test
public void testSplitPercentages() {
double total = 0.0d;
for (SplitType stype : FeatureClusterer.SplitType.values()) {
total += stype.getPercentage();
}
assertEquals(1.0d, total);
}
// /**
// * testCalculateGlobalCost
// */
// @Test
// public void testCalculateGlobalCost() throws Exception {
// this.fclusterer.splitWorkload(data);
// this.fclusterer.calculateGlobalCost();
// int counters[] = this.fclusterer.getGlobalCounters();
// assertNotNull(counters);
// for (int i = 0; i < counters.length; i++) {
// int val = counters[i];
// assert(val >= 0) : String.format("Invalid Counter[%d] => %d", i, val);
// } // FOR
// }
// /**
// * testCalculate
// */
// @Test
// public void testCalculate() throws Exception {
// this.fclusterer.setNumRounds(1);
// this.fclusterer.setAttributeTopK(0.50);
// MarkovAttributeSet aset = this.fclusterer.calculate(data);
// assertNotNull(aset);
//
// System.err.println(aset);
// System.err.println("COST: " + aset.getCost());
//
//
// }
/**
* testCreateMarkovAttributeSetFilter
*/
@Test
public void testCreateMarkovAttributeSetFilter() throws Exception {
// Test that we can create a filter from an MarkovAttributeSet
MarkovAttributeSet aset = new MarkovAttributeSet(data, FeatureUtil.getFeatureKeyPrefix(ParamArrayLengthFeature.class));
assertEquals(CatalogUtil.getArrayProcParameters(catalog_proc).size(), aset.size());
Filter filter = aset.createFilter(data);
Instances newData = Filter.useFilter(data, filter);
for (int i = 0, cnt = newData.numInstances(); i < cnt; i++) {
Instance processed = newData.instance(i);
// System.err.println(processed);
assertEquals(aset.size(), processed.numAttributes());
} // WHILE
assertEquals(data.numInstances(), newData.numInstances());
// System.err.println("MarkovAttributeSet: " + aset);
}
/**
* testCreateClusterer
*/
@Test
public void testCreateClusterer() throws Exception {
// Construct a simple MarkovAttributeSet that only contains the BasePartitionFeature
MarkovAttributeSet base_aset = new MarkovAttributeSet(data, FeatureUtil.getFeatureKeyPrefix(BasePartitionFeature.class));
assertFalse(base_aset.isEmpty());
int base_partition_idx = CollectionUtil.first(base_aset).index();
AbstractClusterer clusterer = this.fclusterer.createClusterer(base_aset, data);
assertNotNull(clusterer);
// Make sure that each Txn gets mapped to the same cluster as its base partition
Map<Integer, Histogram<Integer>> p_c_xref = new HashMap<Integer, Histogram<Integer>>();
for (int i = 0, cnt = data.numInstances(); i < cnt; i++) {
Instance inst = data.instance(i);
assertNotNull(inst);
long txn_id = FeatureUtil.getTransactionId(inst);
TransactionTrace txn_trace = workload.getTransaction(txn_id);
assertNotNull(txn_trace);
Integer base_partition = p_estimator.getBasePartition(txn_trace);
assertNotNull(base_partition);
assertEquals(base_partition.intValue(), (int)inst.value(base_partition_idx));
int c = clusterer.clusterInstance(inst);
Histogram<Integer> h = p_c_xref.get(base_partition);
if (h == null) {
h = new ObjectHistogram<Integer>();
p_c_xref.put(base_partition, h);
}
h.put(c);
} // FOR
// System.err.println(StringUtil.formatMaps(p_c_xref));
// Set<Integer> c_p_xref = new HashSet<Integer>();
// for (Entry<Integer, Histogram> e : p_c_xref.entrySet()) {
// Set<Integer> clusters = e.getValue().values();
//
// // Make sure that each base partition is only mapped to one cluster
// assertEquals(e.getKey().toString(), 1, clusters.size());
//
// // Make sure that two different base partitions are not mapped to the same cluster
// assertFalse(c_p_xref.contains(CollectionUtil.getFirst(clusters)));
// c_p_xref.addAll(clusters);
// } // FOR
}
// /**
// * testCalculateAttributeSetCost
// */
// @Test
// public void testCalculateAttributeSetCost() throws Exception {
// Set<Attribute> attributes = FeatureClusterer.prefix2attributes(data,
// FeatureUtil.getFeatureKeyPrefix(ParamArrayLengthFeature.class, this.getProcParameter(catalog_proc, 4)),
// FeatureUtil.getFeatureKeyPrefix(ParamHashPartitionFeature.class, this.getProcParameter(catalog_proc, 1))
// );
//
// Instances instances[] = fclusterer.splitWorkload(data);
// assertNotNull(instances);
// MarkovAttributeSet aset = new MarkovAttributeSet(attributes);
// assertNotNull(aset);
// fclusterer.calculateAttributeSetCost(aset);
// assert(aset.getCost() > 0);
// }
// /**
// * testGenerateDecisionTree
// */
// @Test
// public void testGenerateDecisionTree() throws Exception {
// Set<Attribute> attributes = FeatureClusterer.prefix2attributes(data,
// FeatureUtil.getFeatureKeyPrefix(ParamArrayLengthFeature.class, this.getProcParameter(catalog_proc, 4)),
// FeatureUtil.getFeatureKeyPrefix(ParamHashPartitionFeature.class, this.getProcParameter(catalog_proc, 1))
// );
// MarkovAttributeSet aset = new MarkovAttributeSet(attributes);
// assertNotNull(aset);
//
// Histogram<String> key_h = new Histogram<String>();
// int key_len = aset.size();
// for (int i = 0, cnt = data.numInstances(); i < cnt; i++) {
// Instance inst = data.instance(i);
// Object key[] = new Object[key_len];
// for (int ii = 0; ii < key_len; ii++) {
// key[ii] = inst.value(aset.get(ii));
// }
// key_h.put(Arrays.toString(key));
// } // FOR
// System.err.println("Number of Elements: " + key_h.getValueCount());
// System.err.println(key_h);
// System.err.println(StringUtil.repeat("+", 100));
//
//// Instances instances[] = fclusterer.splitWorkload(data);
//// assertNotNull(instances);
//
// AbstractClusterer clusterer = fclusterer.createClusterer(aset, data);
// assertNotNull(clusterer);
//
// Classifier classifier = fclusterer.generateDecisionTree(clusterer, aset, data);
// assertNotNull(classifier);
// }
}