String mimeType = (String) property.getMetadata().get(LumifyProperties.MIME_TYPE.getPropertyName());
return !(mimeType == null || !mimeType.startsWith("text"));
}
private static AhoCorasick loadDictionaries(FileSystem fs, String pathPrefix) throws IOException {
AhoCorasick tree = new AhoCorasick();
Path hdfsDirectory = new Path(pathPrefix, "dictionaries");
if (!fs.exists(hdfsDirectory)) {
fs.mkdirs(hdfsDirectory);
}
for (FileStatus dictionaryFileStatus : fs.listStatus(hdfsDirectory)) {
Path hdfsPath = dictionaryFileStatus.getPath();
if (hdfsPath.getName().startsWith(".") || !hdfsPath.getName().endsWith(".dict")) {
continue;
}
LOGGER.info("Loading known entity dictionary %s", hdfsPath.toString());
String conceptName = FilenameUtils.getBaseName(hdfsPath.getName());
conceptName = URLDecoder.decode(conceptName, "UTF-8");
InputStream dictionaryInputStream = fs.open(hdfsPath);
try {
addDictionaryEntriesToTree(tree, conceptName, dictionaryInputStream);
} finally {
dictionaryInputStream.close();
}
}
tree.prepare();
return tree;
}