//
// Now evaluate the dictionary using the "test" set.
// Be sure to keep a lot of statistics about match failures
//
System.err.println("Testing schema dictionary...");
SchemaSuggest ss = new SchemaSuggest(dictDir);
ss.setUseAttributeLabels(false);
TreeMap<Integer, Integer> overallSizes = new TreeMap<Integer, Integer>();
TreeMap<Integer, Integer> failureSizes = new TreeMap<Integer, Integer>();
List<Schema> failedSchemas = new ArrayList<Schema>();
List<SchemaStatisticalSummary> failedSummaries = new ArrayList<SchemaStatisticalSummary>();
double totalReciprocalRank = 0;
int i = 0;
int failures = 0;
// Iterate through all files in the test dir
System.err.println("Examining: " + testDbDir);
for (File f: testDbDir.listFiles()) {
try {
if (f.getName().endsWith(".avro")) {
String testName = f.getName();
SchemaStatisticalSummary testSummary = new SchemaStatisticalSummary("input");
Schema testSchema = testSummary.createSummaryFromData(f);
int schemaSize = testSchema.getFields().size();
Integer sizeCount = overallSizes.get(schemaSize);
if (sizeCount == null) {
sizeCount = new Integer(0);
}
overallSizes.put(schemaSize, new Integer(sizeCount.intValue() + 1));
System.err.println("Testing against " + testName);
System.err.println("Schema size is " + schemaSize);
// Go through the top-MAX_MAPPINGS related schemas, as returned by SchemaDictionary
int rank = 1;
long startTime = System.currentTimeMillis();
List<DictionaryMapping> mappings = ss.inferSchemaMapping(f, MAX_MAPPINGS);
long endTime = System.currentTimeMillis();
System.err.println(" it took " + ((endTime - startTime) / 1000.0) + ", returned " + mappings.size() + " elts");
double scores[] = new double[mappings.size()];
boolean foundGoal = false;