package fr.inra.lipm.jezlucene;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.xml.sax.SAXException;
import com.google.common.collect.Sets;
import fr.inra.lipm.jezlucene.action.Index;
import fr.inra.lipm.jezlucene.action.Remove;
import fr.inra.lipm.jezlucene.action.Search;
/**
* @author llegrand
*/
public class Main {
private static Set<String> ACTIONS = Sets.newHashSet("Index", "Search", "Remove");
public static void main(final String[] args) throws Exception {
if (args.length == 0 || !ACTIONS.contains(args[0])) {
for (final String action : ACTIONS) {
final CmdLineParser parser = new CmdLineParser(Class.forName("fr.inra.lipm.jezlucene.action." + action).newInstance());
System.out.println(action);
parser.setUsageWidth(120);
parser.printUsage(System.out);
}
System.exit(2);
}
final String[] parameters = Arrays.copyOfRange(args, 1, args.length);
final Main main = new Main();
if (args[0].equalsIgnoreCase("index")) {
final Index indexer = new Index();
main.parseParameters(indexer, parameters);
indexer.index();
}
else if (args[0].equalsIgnoreCase("search")) {
final Search searcher = new Search();
main.parseParameters(searcher, parameters);
System.out.println(searcher.search());
}
else if (args[0].equalsIgnoreCase("remove")) {
final Remove remove = new Remove();
main.parseParameters(remove, parameters);
remove.remove();
}
else if (args[0].equalsIgnoreCase("test")) {
final File dir = new File("/home/llegrand/Documents/personnel/Dropbox/Camera Uploads");
for (final File file : dir.listFiles()) {
final org.apache.tika.parser.Parser parser = new AutoDetectParser();
final BodyContentHandler handler = new BodyContentHandler();
final Metadata metadata = new Metadata();
try {
parser.parse(new FileInputStream(file), handler, metadata, new ParseContext());
}
catch (final IOException e) {
System.out.println("Unable to open input file: " + e.getMessage());
}
catch (SAXException | TikaException e) {
System.out.println("Unable to extract file content: " + e.getMessage());
}
for (final String key : metadata.names()) {
System.out.println(key + " -> " + metadata.get(key));
}
System.out.println(" ---------------------- ");
}
}
}
private void parseParameters(final Object action, final String[] parameters) {
final CmdLineParser parser = new CmdLineParser(action);
try {
parser.parseArgument(parameters);
}
catch (final CmdLineException e) {
System.err.println(e.getMessage());
parser.printUsage(System.out);
System.exit(2);
}
}
/*
* "archive_sequence-date:[0 TO 20130601] AND archive_sequence-sample-title:genotypage"
* "archive_sequence-date:[20130701 TO 2014000] AND archive_sequence-sample-title:genotypage"
* "archive_sequence-funding-source-institution:ANR AND archive_sequence-date:[0 TO 20130601] AND archive_sequence-sequencing-protocol-library-average_insert:[250 TO 300]"
* "archive_sequence-sample-organisms-organism-species:helientus~ AND archive_sequence-sample-title:(\"RNA-Seq PSC8\" OR \"RNA-Seq XRQ\")"
*/
}