//read config properties
Properties config = new Properties();
try {
config.load(new FileInputStream(new File(fileName)));
} catch (IOException e) {
throw new ConfigurationException("Cannot find configuration file " + fileName, e);
}
DEFAULT_NAMESPACE = config.getProperty("org.dbpedia.spotlight.default_namespace", DEFAULT_NAMESPACE);
dbpediaResource = config.getProperty("org.dbpedia.spotlight.default_namespace", dbpediaResource);
DEFAULT_ONTOLOGY_PREFIX = config.getProperty("org.dbpedia.spotlight.default_ontology", DEFAULT_ONTOLOGY_PREFIX);
dbpediaOntology =config.getProperty("org.dbpedia.spotlight.default_ontology", dbpediaOntology);
DEFAULT_LANGUAGE_I18N_CODE = config.getProperty("org.dbpedia.spotlight.language_i18n_code", DEFAULT_LANGUAGE_I18N_CODE);
i18nLanguageCode = config.getProperty("org.dbpedia.spotlight.language_i18n_code", "en");
//Read the spotter configuration from the properties file
spotterConfiguration = new SpotterConfiguration(fileName);
disambiguatorConfiguration = new DisambiguatorConfiguration(fileName);
//set spotterFile, indexDir...
contextIndexDirectory = disambiguatorConfiguration.contextIndexDirectory;
//optionally use separate candidate map
candidateMapDirectory = config.getProperty("org.dbpedia.spotlight.candidateMap.dir", "").trim();
if (candidateMapDirectory == null || !new File(candidateMapDirectory).isDirectory()) {
LOG.warn("Could not use the candidateMap.dir provided. Will use index.dir both for context and candidate searching.");
candidateMapDirectory = contextIndexDirectory;
}
candidateMapInMemory = config.getProperty("org.dbpedia.spotlight.candidateMap.loadToMemory", "false").trim().equals("true");
try {
BufferedReader r = new BufferedReader(new FileReader(new File(contextIndexDirectory, similarityThresholdsFile)));
String line;
similarityThresholds = new ArrayList<Double>();
while ((line = r.readLine()) != null) {
similarityThresholds.add(Double.parseDouble(line));
}
} catch (FileNotFoundException e) {
throw new ConfigurationException("Similarity threshold file '" + similarityThresholdsFile + "' not found in index directory " + contextIndexDirectory, e);
} catch (NumberFormatException e) {
throw new ConfigurationException("Error parsing similarity value in '" + contextIndexDirectory + "/" + similarityThresholdsFile, e);
} catch (IOException e) {
throw new ConfigurationException("Error reading '" + contextIndexDirectory + "/" + similarityThresholdsFile, e);
}
taggerFile = config.getProperty("org.dbpedia.spotlight.tagging.hmm", "").trim();
if (taggerFile == null || !new File(taggerFile).isFile()) {
throw new ConfigurationException("Cannot find POS tagger model file " + taggerFile);
}
language = config.getProperty("org.dbpedia.spotlight.language", "English");
stopWordsFile = config.getProperty("org.dbpedia.spotlight.data.stopWords." + language.toLowerCase(), "").trim();
if ((stopWordsFile == null) || !new File(stopWordsFile.trim()).isFile()) {
LOG.warn("Cannot find stopwords file '" + stopWordsFile + "'. Using default Lucene Analyzer StopWords.");
} else {
try {
BufferedReader bufferedReader = new BufferedReader(new FileReader(stopWordsFile.trim()));
String line = null;
stopWords = new HashSet<String>();
while ((line = bufferedReader.readLine()) != null) {
stopWords.add(line.trim());
}
bufferedReader.close();
} catch (Exception e1) {
LOG.error("Could not read stopwords file. Using default Lucene Analyzer StopWords");
}
}
analyzer = Factory.analyzer().from(
config.getProperty("org.dbpedia.spotlight.lucene.analyzer", "org.apache.lucene.analysis.standard.StandardAnalyzer"),
config.getProperty("org.dbpedia.spotlight.lucene.version", "LUCENE_36"), stopWords);
serverURI = config.getProperty("org.dbpedia.spotlight.web.rest.uri", "").trim();
if (serverURI != null && !serverURI.endsWith("/")) {
serverURI = serverURI.concat("/");
}
try {
new URI(serverURI);
} catch (URISyntaxException e) {
throw new ConfigurationException("Server URI not valid.", e);
}
// Configure lucene to accept a larger number of or queries
BooleanQuery.setMaxClauseCount(3072);