package fr.inra.lipm.jezlucene.action;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.List;
import javax.xml.stream.XMLStreamException;
import javax.xml.transform.TransformerException;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.NIOFSDirectory;
import org.kohsuke.args4j.Option;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import fr.inra.lipm.jezlucene.cfg.Configuration;
import fr.inra.lipm.jezlucene.parser.Parser;
/**
* @author llegrand
*/
public class Index extends Action {
private static Logger logger = LoggerFactory.getLogger(Index.class);
@Option(required = true, name = "-i", aliases = { "--input", "-input" }, metaVar = "FILE or DIR", usage = "input file")
private File input;
@Option(name = "-supplementary_fields", usage = "supplementary fields")
private String userFields;
@Option(name = "-ctime", usage = "File's status was last changed n*24 hours ago")
private Integer lastModification;
@Option(name = "--fulltext", usage = "fulltext indexation")
private boolean fulltext;
public void index() throws IOException, XMLStreamException, TransformerException {
File[] files = { this.input };
if (this.input.isDirectory()) {
files = this.input.listFiles();
}
final IndexWriter writer = new IndexWriter(new NIOFSDirectory(this.store), Configuration.WRITER_CFG);
for (final File file : files) {
if (this.lastModification != null) {
final Date date = new Date();
final Long limit = date.getTime() - (86400000 * this.lastModification);
if (file.lastModified() < limit) {
logger.debug("Too old file: {}", file.getName());
continue;
}
}
final Configuration conf = new Configuration(this.parser);
conf.addUserFields(this.userFields);
conf.setFulltext(this.fulltext);
final Parser myparser = new Parser(conf);
final List<Document> documents = myparser.parse(file);
for (final Document doc : documents) {
logger.debug("Document " + doc.get(conf.getID()));
final Term docid = new Term(conf.getID(), doc.get(conf.getID()));
writer.updateDocument(docid, doc);
}
}
writer.close();
}
}