private static void addProfile(Language language) {
final String PROFILE_SUFFIX = ".ngp";
final String PROFILE_ENCODING = "UTF-8";
try {
final LanguageProfile profile = new LanguageProfile();
final String languageCode = language.getShortName();
final String detectionFile = "/" + languageCode + "/" + languageCode + PROFILE_SUFFIX;
if (!JLanguageTool.getDataBroker().resourceExists(detectionFile)) {
// that's okay, not every language comes with its own detection file,
// as Tika supports most languages out of the box.
return;
}
final InputStream stream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(detectionFile);
try {
final InputStreamReader in = new InputStreamReader(stream, PROFILE_ENCODING);
final BufferedReader reader =
new BufferedReader(in);
String line = reader.readLine();
while (line != null) {
if (line.length() > 0 && !line.startsWith("#")) {
final int space = line.indexOf(' ');
profile.add(
line.substring(0, space),
Long.parseLong(line.substring(space + 1)));
}
line = reader.readLine();
}