@Override
public ThreadResult call() throws Exception
{
final int alphabet = 2*states;
LearnerGraph referenceGraph = null;
ThreadResult outcome = new ThreadResult();
Label uniqueFromInitial = null;
MachineGenerator mg = new MachineGenerator(states, 400 , (int)Math.round((double)states/5));mg.setGenerateConnected(true);
do
{
referenceGraph = mg.nextMachine(alphabet,seed, config, converter).pathroutines.buildDeterministicGraph();// reference graph has no reject-states, because we assume that undefined transitions lead to reject states.
if (pickUniqueFromInitial)
{
Map<Label,CmpVertex> uniques = uniqueFromState(referenceGraph);
if(!uniques.isEmpty())
{
Entry<Label,CmpVertex> entry = uniques.entrySet().iterator().next();
referenceGraph.setInit(entry.getValue());uniqueFromInitial = entry.getKey();
}
}
}
while(pickUniqueFromInitial && uniqueFromInitial == null);
LearnerEvaluationConfiguration learnerEval = new LearnerEvaluationConfiguration(config);learnerEval.setLabelConverter(converter);
final Collection<List<Label>> testSet = PaperUAS.computeEvaluationSet(referenceGraph,states*3,states*alphabet);
for(int attempt=0;attempt<2;++attempt)
{// try learning the same machine a few times
LearnerGraph pta = new LearnerGraph(config);
RandomPathGenerator generator = new RandomPathGenerator(referenceGraph,new Random(attempt),5,null);
// test sequences will be distributed around
final int pathLength = generator.getPathLength();
// The total number of elements in test sequences (alphabet*states*traceQuantity) will be distributed around (random(pathLength)+1). The total size of PTA is a product of these two.
// For the purpose of generating long traces, we construct as many traces as there are states but these traces have to be rather long,
// that is, length of traces will be (random(pathLength)+1)*sequencesPerChunk/states and the number of traces generated will be the same as the number of states.
final int tracesToGenerate = makeEven(traceQuantity);
final Random rnd = new Random(seed*31+attempt);
generator.generateRandomPosNeg(tracesToGenerate, 1, false, new RandomLengthGenerator() {
@Override
public int getLength() {
return 2*states*alphabet;//(rnd.nextInt(pathLength)+1)*lengthMultiplier;
}
@Override
public int getPrefixLength(int len) {
return len;
}
});
if (onlyUsePositives)
pta.paths.augmentPTA(generator.getAllSequences(0).filter(new FilterPredicate() {
@Override
public boolean shouldBeReturned(Object name) {
return ((statechum.analysis.learning.rpnicore.RandomPathGenerator.StateName)name).accept;
}
}));
else
pta.paths.augmentPTA(generator.getAllSequences(0));// the PTA will have very few reject-states because we are generating few sequences and hence there will be few negative sequences.
// In order to approximate the behaviour of our case study, we need to compute which pairs are not allowed from a reference graph and use those as if-then automata to start the inference.
//pta.paths.augmentPTA(referenceGraph.wmethod.computeNewTestSet(referenceGraph.getInit(),1));
List<List<Label>> sPlus = generator.getAllSequences(0).getData(new FilterPredicate() {
@Override
public boolean shouldBeReturned(Object name) {
return ((statechum.analysis.learning.rpnicore.RandomPathGenerator.StateName)name).accept;
}
});
List<List<Label>> sMinus= generator.getAllSequences(0).getData(new FilterPredicate() {
@Override
public boolean shouldBeReturned(Object name) {
return !((statechum.analysis.learning.rpnicore.RandomPathGenerator.StateName)name).accept;
}
});
assert sPlus.size() > 0;
assert sMinus.size() > 0;
final MarkovModel m= new MarkovModel(chunkLen,true,true);
m.createMarkovLearner(sPlus, sMinus,false);
pta.clearColours();
synchronized (AbstractLearnerGraph.syncObj) {
//PaperUAS.computePTASize(selectionID+" attempt: "+attempt+" with unique: ", pta, referenceGraph);
}
if (!onlyUsePositives)
assert pta.getStateNumber() > pta.getAcceptStateNumber() : "graph with only accept states but onlyUsePositives is not set";
else
assert pta.getStateNumber() == pta.getAcceptStateNumber() : "graph with negatives but onlyUsePositives is set";
LearnerMarkovPassive learnerOfPairs = null;
LearnerGraph actualAutomaton = null;
final Configuration deepCopy = pta.config.copy();deepCopy.setLearnerCloneGraph(true);
LearnerGraph ptaCopy = new LearnerGraph(deepCopy);LearnerGraph.copyGraphs(pta, ptaCopy);
// now use pathsToMerge to compute which states can/cannot be merged together.
LearnerGraph trimmedReference = trimUncoveredTransitions(pta,referenceGraph);
final ConsistencyChecker checker = new MarkovClassifier.DifferentPredictionsInconsistencyNoBlacklisting();
//long inconsistencyForTheReferenceGraph = MarkovClassifier.computeInconsistency(trimmedReference, m, checker,false);
//System.out.println("Inconsistency of trimmed reference : "+inconsistencyForTheReferenceGraph);
//if (inconsistencyForTheReferenceGraph != 53)
// break;// ignore automata where we get good results.
MarkovClassifier ptaClassifier = new MarkovClassifier(m,pta);
final List<List<Label>> pathsToMerge=ptaClassifier.identifyPathsToMerge(checker);
final Collection<Set<CmpVertex>> verticesToMergeBasedOnInitialPTA=ptaClassifier.buildVerticesToMergeForPaths(pathsToMerge);
/*
List<StatePair> pairsListInitialMerge = ptaClassifier.buildVerticesToMergeForPath(pathsToMerge);
LinkedList<AMEquivalenceClass<CmpVertex,LearnerGraphCachedData>> verticesToMergeInitialMerge = new LinkedList<AMEquivalenceClass<CmpVertex,LearnerGraphCachedData>>();
int scoreInitialMerge = pta.pairscores.computePairCompatibilityScore_general(null, pairsListInitialMerge, verticesToMergeInitialMerge);
assert scoreInitialMerge >= 0;
final LearnerGraph ptaAfterInitialMerge = MergeStates.mergeCollectionOfVertices(pta, null, verticesToMergeInitialMerge);
final CmpVertex vertexWithMostTransitions = findVertexWithMostTransitions(ptaAfterInitialMerge,MarkovClassifier.computeInverseGraph(pta));
ptaAfterInitialMerge.clearColours();ptaAfterInitialMerge.getInit().setColour(null);vertexWithMostTransitions.setColour(JUConstants.RED);
ptaClassifier = new MarkovClassifier(m,ptaAfterInitialMerge);// rebuild the classifier
LearnerGraphND inverseOfPtaAfterInitialMerge = MarkovClassifier.computeInverseGraph(ptaAfterInitialMerge);
System.out.println("Centre vertex: "+vertexWithMostTransitions+" "+countTransitions(ptaAfterInitialMerge, inverseOfPtaAfterInitialMerge, vertexWithMostTransitions));
//checkIfSingleStateLoopsCanBeFormed(pta,m,referenceGraph,pathsToMerge,directionForwardOrInverse);
/*
System.out.println("initially: "+whatToMerge.size()+" clusters "+whatToMerge+"\nafter sideways "+clustersOfStates.size()+" clusters "+clustersOfStates);
showInconsistenciesForDifferentMergers(referenceGraph,m,pta,clustersOfStates);
*/
if (pickUniqueFromInitial)
{
pta = mergeStatesForUnique(pta,uniqueFromInitial);
learnerOfPairs = new LearnerMarkovPassive(learnerEval,referenceGraph,pta);learnerOfPairs.setMarkovModel(m);
learnerOfPairs.setLabelsLeadingFromStatesToBeMerged(Arrays.asList(new Label[]{uniqueFromInitial}));
actualAutomaton = learnerOfPairs.learnMachine(new LinkedList<List<Label>>(),new LinkedList<List<Label>>());
LinkedList<AMEquivalenceClass<CmpVertex,LearnerGraphCachedData>> verticesToMerge = new LinkedList<AMEquivalenceClass<CmpVertex,LearnerGraphCachedData>>();
List<StatePair> pairsList = LearnerThatCanClassifyPairs.buildVerticesToMerge(actualAutomaton,learnerOfPairs.getLabelsLeadingToStatesToBeMerged(),learnerOfPairs.getLabelsLeadingFromStatesToBeMerged());
if (!pairsList.isEmpty())
{
int score = actualAutomaton.pairscores.computePairCompatibilityScore_general(null, pairsList, verticesToMerge);
if (score < 0)
{
learnerOfPairs = new LearnerMarkovPassive(learnerEval,referenceGraph,pta);learnerOfPairs.setMarkovModel(m);
learnerOfPairs.setLabelsLeadingFromStatesToBeMerged(Arrays.asList(new Label[]{uniqueFromInitial}));
actualAutomaton = learnerOfPairs.learnMachine(new LinkedList<List<Label>>(),new LinkedList<List<Label>>());
score = actualAutomaton.pairscores.computePairCompatibilityScore_general(null, pairsList, verticesToMerge);
throw new RuntimeException("last merge in the learning process was not possible");
}
actualAutomaton = MergeStates.mergeCollectionOfVertices(actualAutomaton, null, verticesToMerge);
}
}
else
{// not merging based on a unique transition from an initial state
//learnerEval.config.setGeneralisationThreshold(1);
learnerOfPairs = new LearnerMarkovPassive(learnerEval,referenceGraph,pta);learnerOfPairs.setMarkovModel(m);
//learnerOfPairs.setPairsToMerge(checkVertices(pta, referenceGraph, m));
final LearnerGraph finalReferenceGraph = referenceGraph;
learnerOfPairs.setScoreComputationOverride(new statechum.analysis.learning.rpnicore.PairScoreComputation.RedNodeSelectionProcedure() {
@SuppressWarnings("unused")
@Override
public CmpVertex selectRedNode(LearnerGraph gr,Collection<CmpVertex> reds, Collection<CmpVertex> tentativeRedNodes)
{
return tentativeRedNodes.iterator().next();
}
@SuppressWarnings("unused")
@Override
public CmpVertex resolvePotentialDeadEnd(LearnerGraph gr, Collection<CmpVertex> reds, List<PairScore> pairs)
{
PairScore p = LearnerThatCanClassifyPairs.pickPairQSMLike(pairs);
LinkedList<AMEquivalenceClass<CmpVertex,LearnerGraphCachedData>> verticesToMerge = new LinkedList<AMEquivalenceClass<CmpVertex,LearnerGraphCachedData>>();
// constructPairsToMergeBasedOnSetsToMerge(coregraph.transitionMatrix.keySet(),verticesToMergeBasedOnInitialPTA)
int genScore = coregraph.pairscores.computePairCompatibilityScore_general(p, null, verticesToMerge);
assert genScore >= 0;
LearnerGraph merged = MergeStates.mergeCollectionOfVertices(coregraph, null, verticesToMerge);
long value = MarkovClassifier.computeInconsistency(merged, m, checker,false);
inconsistencyFromAnEarlierIteration = value;
return null;
}
long inconsistencyFromAnEarlierIteration = 0;
LearnerGraph coregraph = null;
LearnerGraphND inverseGraph = null;
/** Where I have a set of paths to merge because I have identified specific states, this map is constructed that maps vertices to be merged together to the partition number that corresponds to them. */
Map<CmpVertex,Integer> vertexToPartition = new TreeMap<CmpVertex,Integer>();
@Override
public void initComputation(LearnerGraph graph)
{
coregraph = graph;
//labelStatesAwayFromRoot(coregraph,m.getChunkLen()-1);
inverseGraph = (LearnerGraphND)MarkovClassifier.computeInverseGraph(coregraph,true);
vertexToPartition.clear();
int partitionNumber=0;
for(Set<CmpVertex> set:verticesToMergeBasedOnInitialPTA)
{
for(CmpVertex v:set) vertexToPartition.put(v, partitionNumber);
++partitionNumber;
}
}
@Override
public long overrideScoreComputation(PairScore p)
{
/*
MarkovClassifier cl = new MarkovClassifier(m, coregraph);
long score = 0;
Map<Label, MarkovOutcome> predictedFromRed=cl.predictTransitionsFromState(p.getR(), null, m.getChunkLen(), null);
for(Entry<Label,MarkovOutcome> entry:cl.predictTransitionsFromState(p.getQ(), null, m.getChunkLen(), null).entrySet())
{
MarkovOutcome red = predictedFromRed.get(entry.getKey());
if (red == null || red != entry.getValue())
{
score = -1;break;
}
}
if (score >= 0)
{
LearnerGraph extendedGraph = cl.constructMarkovTentative();
score = extendedGraph.pairscores.computePairCompatibilityScore(p);
}
*/
long score = p.getScore();//computeScoreUsingMarkovFanouts(coregraph,origInverse,m,callbackAlphabet,p);
if (score < 0)
return score;
long currentInconsistency = 0;
Integer a=vertexToPartition.get(p.getR()), b = vertexToPartition.get(p.getQ());
LinkedList<AMEquivalenceClass<CmpVertex,LearnerGraphCachedData>> verticesToMerge = new LinkedList<AMEquivalenceClass<CmpVertex,LearnerGraphCachedData>>();
int genScore = coregraph.pairscores.computePairCompatibilityScore_general(p, null, verticesToMerge);
if (genScore >= 0)
{
LearnerGraph merged = MergeStates.mergeCollectionOfVertices(coregraph, null, verticesToMerge);
currentInconsistency = MarkovClassifier.computeInconsistency(merged, m, checker,
false
//p.getQ().getStringId().equals("P2672") && p.getR().getStringId().equals("P2209")
)-inconsistencyFromAnEarlierIteration;
if (a == null || b == null || a != b)
score -= currentInconsistency;
}
//System.out.println(p.toString()+", score "+score);
/*
ArrayList<PairScore> pairOfInterest = new ArrayList<PairScore>(1);pairOfInterest.add(p);
List<PairScore> correctPairs = new ArrayList<PairScore>(1), wrongPairs = new ArrayList<PairScore>(1);
SplitSetOfPairsIntoRightAndWrong(coregraph, finalReferenceGraph, pairOfInterest, correctPairs, wrongPairs);
long score = p.getScore();//computeScoreUsingMarkovFanouts(coregraph,origInverse,m,callbackAlphabet,p);
if (score < 0)
return score;
long currentInconsistency = 0;
double relativeInconsistency = 0.;
Integer a=vertexToPartition.get(p.getR()), b = vertexToPartition.get(p.getQ());
LinkedList<AMEquivalenceClass<CmpVertex,LearnerGraphCachedData>> verticesToMerge = new LinkedList<AMEquivalenceClass<CmpVertex,LearnerGraphCachedData>>();
int genScore = coregraph.pairscores.computePairCompatibilityScore_general(p, null, verticesToMerge);
if (genScore >= 0)
LearnerGraph merged = MergeStates.mergeCollectionOfVertices(coregraph, null, verticesToMerge);
currentInconsistency = MarkovClassifier.computeInconsistency(merged, m, checker,
false
//p.getQ().getStringId().equals("P2672") && p.getR().getStringId().equals("P2209")
)-inconsistencyFromAnEarlierIteration;
relativeInconsistency = new MarkovClassifier(m, merged).computeRelativeInconsistency(checker);
}
// A green state next to a red may have many incoming paths, more than in a PTA, some of which may predict its outgoing transition as non-existent.
// When a merge happens this state may be merged into the one with a similar surroundings. In this way, two states with the same in-out inconsistency
// are merged into the one with that inconsistency, turning two inconsistencies into one and hence reducing the total number of inconsistencies.
score=genScore;
if (relativeInconsistency > 5 || relativeInconsistency > genScore)
score=-1;
*/
//System.out.println("pair: "+p+" score: "+score);
/*
if (score < 0 && wrongPairs.isEmpty())
System.out.println("incorrectly blocked merge of "+p+" a="+a+" b="+b+" inconsistency = "+currentInconsistency+" relative: "+relativeInconsistency+" genscore is "+genScore);
if (score >= 0 && correctPairs.isEmpty())
System.out.println("invalid merge of "+p+" a="+a+" b="+b+" inconsistency = "+currentInconsistency+" relative: "+relativeInconsistency+" genscore is "+genScore);
*/
return score;
}
/** This one returns a set of transitions in all directions. */
@Override
public Collection<Entry<Label, CmpVertex>> getSurroundingTransitions(CmpVertex currentRed)
{
return null;//obtainSurroundingTransitions(coregraph,inverseGraph,currentRed);
}
});
actualAutomaton = learnerOfPairs.learnMachine(new LinkedList<List<Label>>(),new LinkedList<List<Label>>());
}
{
LinkedList<AMEquivalenceClass<CmpVertex,LearnerGraphCachedData>> verticesToMerge = new LinkedList<AMEquivalenceClass<CmpVertex,LearnerGraphCachedData>>();
int genScore = actualAutomaton.pairscores.computePairCompatibilityScore_general(null, constructPairsToMergeBasedOnSetsToMerge(actualAutomaton.transitionMatrix.keySet(),verticesToMergeBasedOnInitialPTA), verticesToMerge);
assert genScore >= 0;
actualAutomaton = MergeStates.mergeCollectionOfVertices(actualAutomaton, null, verticesToMerge);
long chains = 0,tails=0,doubleChains=0;
for(Entry<CmpVertex,Map<Label,CmpVertex>> entry:actualAutomaton.transitionMatrix.entrySet())
{
if (entry.getValue().isEmpty())
++tails;
if (entry.getValue().size() == 1)
{
++chains;
CmpVertex target=entry.getValue().values().iterator().next();
if (actualAutomaton.transitionMatrix.get(target).size() == 1)
++doubleChains;
}
}
//System.out.println("Chains: "+chains+" Tails: "+tails+" Double chains: "+doubleChains);
//System.out.println("Inconsistency for the original: "+new MarkovClassifier(ptaClassifier.model, trimmedReference).countPossibleInconsistencies(checker)+" and for the learnt: "+new MarkovClassifier(ptaClassifier.model, actualAutomaton).countPossibleInconsistencies(checker));
//actualAutomaton = formLoops(actualAutomaton, m, directionForwardOrInverse);
}
SampleData dataSample = new SampleData(null,null);
//dataSample.difference = new DifferenceToReferenceDiff(0, 0);
//dataSample.differenceForReferenceLearner = new DifferenceToReferenceDiff(0, 0);
VertID rejectVertexID = null;
for(CmpVertex v:actualAutomaton.transitionMatrix.keySet())
if (!v.isAccept())
{
assert rejectVertexID == null : "multiple reject vertices in learnt automaton, such as "+rejectVertexID+" and "+v;
rejectVertexID = v;break;
}
if (rejectVertexID == null)
rejectVertexID = actualAutomaton.nextID(false);
actualAutomaton.pathroutines.completeGraphPossiblyUsingExistingVertex(rejectVertexID);// we need to complete the graph, otherwise we are not matching it with the original one that has been completed.
dataSample.actualLearner = estimateDifference(referenceGraph,actualAutomaton,testSet);
LearnerGraph outcomeOfReferenceLearner = new ReferenceLearner(learnerEval,referenceGraph,ptaCopy,false).learnMachine(new LinkedList<List<Label>>(),new LinkedList<List<Label>>());
dataSample.referenceLearner = estimateDifference(referenceGraph, outcomeOfReferenceLearner,testSet);
System.out.println("actual: "+actualAutomaton.getStateNumber()+" from reference learner: "+outcomeOfReferenceLearner.getStateNumber()+ " difference actual is "+dataSample.actualLearner+ " difference ref is "+dataSample.referenceLearner);
outcome.samples.add(dataSample);
}
return outcome;
}