evalFuncs2.add(new Task1Score());
evalFuncs2.add(new MeanSquaredError());
evalFuncs2.add(new MeanAbsoluteError());
LibLINEARParameters linParms = new LibLINEARParameters(LibLINEARParameters.SVC_DUAL, cs);
linParms.setEvalFunction(new Task1ScoreForBothBins(bins));
linParms.setDoCrossValidation(false);
linParms.setSplitFraction((float) 0.8);
linParms.setEps(0.1);
linParms.setPs(ps1);
Map<Double, Double> counts = EvaluationUtils.computeClassCounts(targetBins);
int[] wLabels = new int[counts.size()];
double[] weights = new double[counts.size()];
for (double label : counts.keySet()) {
wLabels[(int) label - 1] = (int) label;
weights[(int) label - 1] = 1 / counts.get(label);
}
linParms.setWeightLabels(wLabels);
linParms.setWeights(weights);
LibLINEARParameters linParms2 = new LibLINEARParameters(LibLINEARParameters.SVR_DUAL, cs);
linParms2.setEvalFunction(new Task1Score());
linParms2.setDoCrossValidation(false);
linParms2.setSplitFraction((float) 0.8);
linParms2.setEps(0.1);
linParms2.setPs(ps2);
linParms2.setBias(1);
for (int d : depths) {
resTable.newRow("ITP BoW, depth="+d);
RDFFeatureVectorKernel kernel = new RDFIntersectionTreeEdgeVertexPathWithTextKernel(d, false, inference, false);
System.out.println("Running PathWithText kernel: " + d );
Map<EvaluationFunction, double[]> resultMap = new HashMap<EvaluationFunction,double[]>();
Map<EvaluationFunction, double[]> resultMap2 = new HashMap<EvaluationFunction,double[]>();
List<Result> results = new ArrayList<Result>();
for (EvaluationFunction evalFunc : evalFuncs1) {
Result res = new Result();
double[] resA = new double[seeds.length];
res.setLabel(evalFunc.getLabel());
res.setScores(resA);
res.setHigherIsBetter(evalFunc.isHigherIsBetter());
results.add(res);
resultMap.put(evalFunc, resA);
}
for (EvaluationFunction evalFunc : evalFuncs2) {
Result res = new Result();
double[] resA = new double[seeds.length];
res.setLabel(evalFunc.getLabel());
res.setScores(resA);
res.setHigherIsBetter(evalFunc.isHigherIsBetter());
results.add(res);
resultMap2.put(evalFunc, resA);
}
Result compR = new Result();
results.add(compR);
long tic, toc;
List<Double> tempLabels = new ArrayList<Double>();
List<Double> tempLabelsBins = new ArrayList<Double>();
tempLabels.addAll(target);
tempLabelsBins.addAll(targetBins);
tic = System.currentTimeMillis();
SparseVector[] fv = kernel.computeFeatureVectors(dataset, instances, blackList);
toc = System.currentTimeMillis();
fv = TextUtils.computeTFIDF(Arrays.asList(fv)).toArray(new SparseVector[1]);
fv = KernelUtils.normalize(fv);
List<SparseVector> fvList = Arrays.asList(fv);
compR.setLabel("kernel comp time");
for (int j = 0; j < seeds.length; j++) {
Collections.shuffle(fvList, new Random(seeds[j]));
Collections.shuffle(tempLabels, new Random(seeds[j]));
Collections.shuffle(tempLabelsBins, new Random(seeds[j]));
fv = fvList.toArray(new SparseVector[1]);
double[] targetA = new double[tempLabels.size()];
double[] targetABins = new double[tempLabelsBins.size()];
for (int i = 0; i < targetA.length; i++) {
targetA[i] = tempLabels.get(i);
targetABins[i] = tempLabelsBins.get(i);
}
Prediction[] pred = LibLINEAR.trainTestSplit(fv, targetABins, linParms, linParms.getSplitFraction());
Prediction[] pred2 = LibLINEAR.trainTestSplit(fv, targetA, linParms2, linParms2.getSplitFraction());
double[] targetSplit = LibLINEAR.splitTestTarget(targetA, linParms.getSplitFraction());
for (EvaluationFunction ef : evalFuncs1) {
resultMap.get(ef)[j] = ef.computeScore(targetSplit, pred);
}
for (EvaluationFunction ef : evalFuncs2) {
resultMap2.get(ef)[j] = ef.computeScore(targetSplit, pred2);
}
}
double[] comp = {toc - tic};
compR.setScores(comp);
for (Result res : results) {
resTable.addResult(res);
}
}
for (int d : depths) {
for (int it : iterations) {
resTable.newRow("RDF WL BoW, depth="+d);
/*
List<RDFFeatureVectorKernel> kernels = new ArrayList<RDFFeatureVectorKernel>();
kernels.add(new RDFWLSubTreeKernel(it,d, inference, false));
kernels.add(new RDFSimpleTextKernel(d, inference, false));
RDFFeatureVectorKernel kernel = new RDFCombinedKernel(kernels, true);
*/
RDFFeatureVectorKernel kernel = new RDFWLSubTreeWithTextKernel(it, d, inference, false);
System.out.println("Running RDFWL + text kernel: " + d + " " + it);
Map<EvaluationFunction, double[]> resultMap = new HashMap<EvaluationFunction,double[]>();
Map<EvaluationFunction, double[]> resultMap2 = new HashMap<EvaluationFunction,double[]>();
List<Result> results = new ArrayList<Result>();
for (EvaluationFunction evalFunc : evalFuncs1) {
Result res = new Result();
double[] resA = new double[seeds.length];
res.setLabel(evalFunc.getLabel());
res.setScores(resA);
res.setHigherIsBetter(evalFunc.isHigherIsBetter());
results.add(res);
resultMap.put(evalFunc, resA);
}
for (EvaluationFunction evalFunc : evalFuncs2) {
Result res = new Result();
double[] resA = new double[seeds.length];
res.setLabel(evalFunc.getLabel());
res.setScores(resA);
res.setHigherIsBetter(evalFunc.isHigherIsBetter());
results.add(res);
resultMap2.put(evalFunc, resA);
}
Result compR = new Result();
results.add(compR);
long tic, toc;
List<Double> tempLabels = new ArrayList<Double>();
List<Double> tempLabelsBins = new ArrayList<Double>();
tempLabels.addAll(target);
tempLabelsBins.addAll(targetBins);
tic = System.currentTimeMillis();
SparseVector[] fv = kernel.computeFeatureVectors(dataset, instances, blackList);
toc = System.currentTimeMillis();
fv = TextUtils.computeTFIDF(Arrays.asList(fv)).toArray(new SparseVector[1]);
fv = KernelUtils.normalize(fv);
List<SparseVector> fvList = Arrays.asList(fv);
compR.setLabel("kernel comp time");
for (int j = 0; j < seeds.length; j++) {
Collections.shuffle(fvList, new Random(seeds[j]));
Collections.shuffle(tempLabels, new Random(seeds[j]));
Collections.shuffle(tempLabelsBins, new Random(seeds[j]));
fv = fvList.toArray(new SparseVector[1]);
double[] targetA = new double[tempLabels.size()];
double[] targetABins = new double[tempLabelsBins.size()];
for (int i = 0; i < targetA.length; i++) {
targetA[i] = tempLabels.get(i);
targetABins[i] = tempLabelsBins.get(i);
}
Prediction[] pred = LibLINEAR.trainTestSplit(fv, targetABins, linParms, linParms.getSplitFraction());
Prediction[] pred2 = LibLINEAR.trainTestSplit(fv, targetA, linParms2, linParms2.getSplitFraction());
double[] targetSplit = LibLINEAR.splitTestTarget(targetA, linParms.getSplitFraction());
for (EvaluationFunction ef : evalFuncs1) {
resultMap.get(ef)[j] = ef.computeScore(targetSplit, pred);