learnerRunner.setPickUniqueFromInitial(useUnique);learnerRunner.setOnlyUsePositives(onlyPositives);learnerRunner.setIfdepth(ifDepth);learnerRunner.setLengthMultiplier(lengthMultiplier);
learnerRunner.setSelectionID(selection+"_states"+states+"_sample"+sample);
runner.submit(learnerRunner);
++numberOfTasks;
}
ProgressIndicator progress = new ProgressIndicator("running "+numberOfTasks+" tasks for "+selection, numberOfTasks);
for(int count=0;count < numberOfTasks;++count)
{
ThreadResult result = runner.take().get();// this will throw an exception if any of the tasks failed.
samples.addAll(result.samples);
progress.next();
}
}
catch(Exception ex)
{
IllegalArgumentException e = new IllegalArgumentException("failed to compute, the problem is: "+ex);e.initCause(ex);
if (executorService != null) { executorService.shutdown();executorService = null; }
throw e;
}
int nonZeroes = 0;
long numberOfValues = 0;
System.out.println("number of instances: "+dataCollector.trainingData.numInstances());
int freqData[] = new int[dataCollector.attributesOfAnInstance.length];
for(int i=0;i<dataCollector.trainingData.numInstances();++i)
for(int attrNum=0;attrNum<dataCollector.attributesOfAnInstance.length;++attrNum)
{
assert dataCollector.attributesOfAnInstance[attrNum].index() == attrNum;
if (dataCollector.trainingData.instance(i).stringValue(attrNum) != WekaDataCollector.ZERO)
{
++freqData[attrNum];++numberOfValues;
}
}
for(int attrNum=0;attrNum<dataCollector.attributesOfAnInstance.length;++attrNum)
if (freqData[attrNum]>0)
++nonZeroes;
System.out.println("Total instances: "+dataCollector.trainingData.numInstances()+" with "+dataCollector.attributesOfAnInstance.length+" attributes, non-zeroes are "+nonZeroes+" with average of "+((double)numberOfValues)/nonZeroes);
Arrays.sort(freqData);
int numOfcolumns=20;
int stepWidth = dataCollector.attributesOfAnInstance.length/numOfcolumns;
final RBoxPlot<Long> gr_HistogramOfAttributeValues = new RBoxPlot<Long>("Attributes","Number of values",new File("attributes_use"+selection+".pdf"));
for(int i=0;i<numOfcolumns;++i)
{
int columnData=0;
for(int j=i*stepWidth;j<(i+1)*stepWidth;++j)
if (j < dataCollector.attributesOfAnInstance.length)
columnData+=freqData[j];
gr_HistogramOfAttributeValues.add(new Long(numOfcolumns-i),new Double(columnData>0?Math.log10(columnData):0));
}
//gr_HistogramOfAttributeValues.drawInteractive(gr);
gr_HistogramOfAttributeValues.drawPdf(gr);
/*
// write arff
FileWriter wekaInstances = null;
String whereToWrite = "qualityLearner_"+selection+".arff";
try
{
wekaInstances = new FileWriter(whereToWrite);
// This chunk is almost verbatim from Weka's Instances.toString()
wekaInstances.append(Instances.ARFF_RELATION).append(" ").append(Utils.quote(dataCollector.trainingData.relationName())).append("\n\n");
for (int i = 0; i < dataCollector.trainingData.numAttributes(); i++) {
wekaInstances.append(dataCollector.trainingData.attribute(i).toString()).append("\n");
}
wekaInstances.append("\n").append(Instances.ARFF_DATA).append("\n");
for (int i = 0; i < dataCollector.trainingData.numInstances(); i++) {
wekaInstances.append(dataCollector.trainingData.instance(i).toString());
if (i < dataCollector.trainingData.numInstances() - 1) {
wekaInstances.append('\n');
}
}
}
catch(Exception ex)
{
Helper.throwUnchecked("failed to create a file with training data for "+whereToWrite, ex);
}
finally
{
if (wekaInstances != null)
try {
wekaInstances.close();
} catch (IOException e) {
// ignore this, we are not proceeding anyway due to an earlier exception so whether the file was actually written does not matter
}
}
*/
// Run the evaluation
final weka.classifiers.trees.REPTree repTree = new weka.classifiers.trees.REPTree();repTree.setMaxDepth(4);
//repTree.setNoPruning(true);// since we only use the tree as a classifier (as a conservative extension of what is currently done) and do not actually look at it, elimination of pruning is not a problem.
// As part of learning, we also prune some of the nodes where the ratio of correctly-classified pairs to those incorrectly classified is comparable.
// The significant advantage of not pruning is that the result is no longer sensitive to the order of elements in the tree and hence does not depend on the order in which elements have been obtained by concurrent threads.
//final weka.classifiers.lazy.IB1 ib1 = new weka.classifiers.lazy.IB1();
//final weka.classifiers.trees.J48 classifier = new weka.classifiers.trees.J48();
final Classifier classifier = repTree;
classifier.buildClassifier(dataCollector.trainingData);
System.out.println("Entries in the classifier: "+dataCollector.trainingData.numInstances());
System.out.println(classifier);
dataCollector=null;// throw all the training data away.
{// serialise the classifier, this is the only way to store it.
OutputStream os = new FileOutputStream(selection+".ser");
ObjectOutputStream oo = new ObjectOutputStream(os);
oo.writeObject(classifier);
os.close();
}
for(final boolean selectingRed:new boolean[]{false})
for(final boolean classifierToBlockAllMergers:new boolean[]{true})
//for(final boolean zeroScoringAsRed:(classifierToBlockAllMergers?new boolean[]{true,false}:new boolean[]{false}))// where we are not using classifier to rule out all mergers proposed by pair selection, it does not make sense to use two values configuring this classifier.
for(final double threshold:new double[]{1})
{
final boolean zeroScoringAsRed = false;
selection = "TRUNK;EVALUATION;"+"ifDepth="+ifDepth+";threshold="+threshold+// ";useUnique="+useUnique+";onlyPositives="+onlyPositives+
";selectingRed="+selectingRed+";classifierToBlockAllMergers="+classifierToBlockAllMergers+";zeroScoringAsRed="+zeroScoringAsRed+";traceQuantity="+traceQuantity+";lengthMultiplier="+lengthMultiplier+";trainingDataMultiplier="+trainingDataMultiplier+";";
final int totalTaskNumber = traceQuantity;
final RBoxPlot<Long> gr_PairQuality = new RBoxPlot<Long>("Correct v.s. wrong","%%",new File("percentage_score"+selection+".pdf"));
final RBoxPlot<String> gr_QualityForNumberOfTraces = new RBoxPlot<String>("traces","%%",new File("quality_traces"+selection+".pdf"));
SquareBagPlot gr_NewToOrig = new SquareBagPlot("orig score","score with learnt selection",new File("new_to_orig"+selection+".pdf"),0,1,true);
final Map<Long,TrueFalseCounter> pairQualityCounter = new TreeMap<Long,TrueFalseCounter>();
try
{
int numberOfTasks = 0;
for(int states=minStateNumber;states < minStateNumber+rangeOfStateNumbers;states+=stateNumberIncrement)
for(int sample=0;sample<samplesPerFSM;++sample)
{
LearnerRunner learnerRunner = new LearnerRunner(dataCollector,states,sample,totalTaskNumber+numberOfTasks,traceQuantity, config, converter)
{
@Override
public LearnerThatCanClassifyPairs createLearner(LearnerEvaluationConfiguration evalCnf,LearnerGraph argReferenceGraph,@SuppressWarnings("unused") WekaDataCollector argDataCollector, LearnerGraph argInitialPTA)
{
LearnerThatUsesWekaResults l = new LearnerThatUsesWekaResults(ifDepth,evalCnf,argReferenceGraph,classifier,argInitialPTA);
if (gr_PairQuality != null)
l.setPairQualityCounter(pairQualityCounter);
l.setUseClassifierForRed(selectingRed);l.setUseClassifierToChooseNextRed(classifierToBlockAllMergers);
l.setBlacklistZeroScoringPairs(zeroScoringAsRed);
l.setThreshold(threshold);
return l;
}
};
learnerRunner.setPickUniqueFromInitial(useUnique);learnerRunner.setEvaluateAlsoUsingReferenceLearner(true);
learnerRunner.setOnlyUsePositives(onlyPositives);learnerRunner.setIfdepth(ifDepth);learnerRunner.setLengthMultiplier(lengthMultiplier);
learnerRunner.setSelectionID(selection+"_states"+states+"_sample"+sample);
runner.submit(learnerRunner);
++numberOfTasks;
}
ProgressIndicator progress = new ProgressIndicator(new Date()+" evaluating "+numberOfTasks+" tasks for "+selection, numberOfTasks);
for(int count=0;count < numberOfTasks;++count)
{
ThreadResult result = runner.take().get();// this will throw an exception if any of the tasks failed.
if (gr_NewToOrig != null)
{
for(SampleData sample:result.samples)
gr_NewToOrig.add(sample.differenceForReferenceLearner.getValue(),sample.difference.getValue());
}
for(SampleData sample:result.samples)
if (sample.differenceForReferenceLearner.getValue() > 0)
gr_QualityForNumberOfTraces.add(traceQuantity+"",sample.difference.getValue()/sample.differenceForReferenceLearner.getValue());
progress.next();
}
if (gr_PairQuality != null)
{
synchronized(pairQualityCounter)
{