File testFile = new File("E:/dbpa/data/Person_newSurrogates/wikipediaTest."+(new Double(percentageSplit*100)).intValue()+"."+targetType+".amb.tsv");
// using the next few lines, to create "confusable-with", split in training and testing
File instancesFile = new File("data/dbpedia/instance_types_en.nt");
File surrogateIndexDir = new File("data/SurrogateIndex.TitRedDisOcc.lowerCase");
LuceneManager manager = new LuceneManager.CaseInsensitiveSurfaceForms(FSDirectory.open(surrogateIndexDir));
LuceneCandidateSearcher surrogateSearcher = new LuceneCandidateSearcher(manager, false);
Set<String> surfaceForms = getConfusableSurfaceForms(targetType, instancesFile, surrogateSearcher);
DatasetSplitter splitter = new BySurfaceForm(trainingFile, testFile, minSize, percentageSplit, surfaceForms);
//DatasetSplitter splitter = new BySize(trainingFile, testFile, minSize, percentageSplit);