public static void main(String[] args) throws IOException, JSONException, ConfigurationException, InitializationException, org.json.JSONException, SpottingException {
File dictionary = new File("/Users/jodaiber/Desktop/lrec_2012_spotting/surface_forms-Wikipedia-TitRedDis.thresh3.spotterDictionary");
Spotter spotter = null;
//
if (args.length==0)
{
LOG.error("server.properties is requested to continue...");
return;
}
SpotlightConfiguration configuration = new SpotlightConfiguration(args[0]);
int spotterNr = 0;
switch(spotterNr) {
case 0: {
String openNLPDir = "/Users/jodaiber/Desktop/DBpedia/";
SurfaceFormDictionary sfDictProbThresh3 = ExactSurfaceFormDictionary.fromLingPipeDictionary(dictionary, false);
System.out.println("Dictionary size: " + sfDictProbThresh3.size());
File stopwordsFile = new File(openNLPDir+"stopwords.txt");
spotter = OpenNLPChunkerSpotter.fromDir(openNLPDir,configuration.getI18nLanguageCode(),sfDictProbThresh3,stopwordsFile);
break;
}
case 1: {
spotter = new LingPipeSpotter(dictionary, configuration.getAnalyzer());
break;
}
}
System.out.println("Using Spotter " + spotter.getName());
System.out.println("Running GC.");
System.gc(); System.gc(); System.gc(); System.gc();
int i = 0;
LinkedList<Long> consumption = new LinkedList<Long>();
for (File textFile : new File("/data/spotlight/csaw/original/crawledDocs").listFiles()) {
if (!textFile.getName().endsWith(".txt"))
continue;
i++;
if (i == 100)
break;
spotter.extract(
new Text(
new Scanner(textFile).useDelimiter("\\A").next()
)
);