parsers.put("application/vnd.openxmlformats-officedocument.wordprocessingml.document", parser);
parsers.put("application/vnd.openxmlformats-officedocument.presentationml.presentation", parser);
parsers.put("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", parser);
} else if (name.equals(
"org.apache.jackrabbit.extractor.OpenOfficeTextExtractor")) {
Parser parser = new OpenOfficeParser();
parsers.put("application/vnd.oasis.opendocument.database", parser);
parsers.put("application/vnd.oasis.opendocument.formula", parser);
parsers.put("application/vnd.oasis.opendocument.graphics", parser);
parsers.put("application/vnd.oasis.opendocument.presentation", parser);
parsers.put("application/vnd.oasis.opendocument.spreadsheet", parser);
parsers.put("application/vnd.oasis.opendocument.text", parser);
parsers.put("application/vnd.sun.xml.calc", parser);
parsers.put("application/vnd.sun.xml.draw", parser);
parsers.put("application/vnd.sun.xml.impress", parser);
parsers.put("application/vnd.sun.xml.writer", parser);
} else if (name.equals(
"org.apache.jackrabbit.extractor.PdfTextExtractor")) {
parsers.put("application/pdf", new PDFParser());
} else if (name.equals(
"org.apache.jackrabbit.extractor.PlainTextExtractor")) {
parsers.put("text/plain", new TXTParser());
} else if (name.equals(
"org.apache.jackrabbit.extractor.PngTextExtractor")) {
Parser parser = new ImageParser();
parsers.put("image/png", parser);
parsers.put("image/apng", parser);
parsers.put("image/mng", parser);
} else if (name.equals(
"org.apache.jackrabbit.extractor.RTFTextExtractor")) {
Parser parser = new RTFParser();
parsers.put("application/rtf", parser);
parsers.put("text/rtf", parser);
} else if (name.equals(
"org.apache.jackrabbit.extractor.XMLTextExtractor")) {
Parser parser = new XMLParser();
parsers.put("application/xml", parser);
parsers.put("text/xml", parser);
} else {
logger.warn("Ignoring unknown text extractor class: {}", name);
}
}
parser.setParsers(parsers);
}