package fr.inra.lipm.jezlucene.cfg;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import fr.inra.lipm.jezlucene.cfg.DocField.ValueType;
/**
* @author llegrand
*/
public class Configuration {
private static Logger logger = LoggerFactory.getLogger(Configuration.class);
public static enum IndexUnit {
FILE,
LINE
}
// $1 = field name
private final Pattern fieldname = Pattern.compile("^field:([\\w-]+)", Pattern.CASE_INSENSITIVE);
// $1 = field type, $2 = field value, $3 = hash
private final Pattern fieldvalue = Pattern.compile("(\\w+)\\.(.+?)(@md5)?$", Pattern.CASE_INSENSITIVE);
// $1 = field name, $2 = keyword or not, $3 = field value
private final Pattern userField = Pattern.compile("^(\\S+):(Keyword)?\\.?(.+)$", Pattern.CASE_INSENSITIVE);
public static final Set<String> reservedWords = ImmutableSet.of("file", "abs_path", "filepath");
public static final Version VERSION = Version.LUCENE_43;
public static final Analyzer ANALYZER = new StandardAnalyzer(Version.LUCENE_43);
public static final IndexWriterConfig WRITER_CFG = new IndexWriterConfig(Configuration.VERSION, Configuration.ANALYZER);
static {
WRITER_CFG.setMaxThreadStates(2);
}
private final IniLoader config;
private final List<DocField> fields = Lists.newArrayList();
private final List<DocField> userFields = Lists.newArrayList();
private final List<DocField> specialFields = Lists.newArrayList();
private final IndexUnit indexUnit;
private final String ID;
private final String parser;
private final String entrySeparator;
private final String orderBy;
private final String defaultSearchField;
private boolean fulltext = false;
public Configuration(final File conf) throws IOException {
this.config = new IniLoader(conf);
final Iterator<String> keyit = config.getAll().iterator();
while (keyit.hasNext()) {
final String key = keyit.next();
final Matcher keymatch = fieldname.matcher(key);
final Matcher valuematch = fieldvalue.matcher(config.get(key));
if (keymatch.find()) {
if (valuematch.find()) {
final String pattern = valuematch.group(2);
final String option = (valuematch.group(3) != null) ? "md5" : null;
final DocField field = buildField(keymatch.group(1), pattern, valuematch.group(1), option);
if (reservedWords.contains(pattern)) {
this.specialFields.add(field);
}
else {
this.fields.add(field);
}
}
else {
logger.warn("Malformed field: " + key);
}
}
else {
logger.trace("Ignored key: " + key);
}
}
this.ID = config.get("docid");
this.parser = config.get("parser");
this.entrySeparator = config.get("entry_separator");
this.orderBy = config.get("order_by");
this.indexUnit = (config.get("index_unit") != null) ? IndexUnit.valueOf(config.get("index_unit")) : IndexUnit.FILE;
this.defaultSearchField = config.get("default_search");
}
public List<DocField> getFields() {
return ImmutableList.copyOf(this.fields);
}
public List<DocField> getUserFields() {
return ImmutableList.copyOf(this.userFields);
}
public List<DocField> getSpecialFields() {
return ImmutableList.copyOf(this.specialFields);
}
public String getID() {
return this.ID;
}
public String getParser() {
return this.parser;
}
public String getEntrySeparator() {
return this.entrySeparator;
}
public String getOrderBy() {
return this.orderBy;
}
public String getDefaultSearchField() {
return this.defaultSearchField;
}
public boolean addUserFields(final String userField) {
if (Strings.isNullOrEmpty(userField)) {
return false;
}
// field:Keyword.data,field:data,...
final String[] fields = userField.split(",");
for (final String field : fields) {
final Matcher userMatcher = this.userField.matcher(field);
if (userMatcher.find()) {
final String type = (userMatcher.group(2) != null) ? "Keyword" : "Text";
final DocField currentField = buildField(userMatcher.group(1), "", type, null);
currentField.setValue(userMatcher.group(3));
this.userFields.add(currentField);
}
else {
logger.warn("Malformed field: " + field);
return false;
}
}
return true;
}
public void setFulltext(final boolean fulltext) {
this.fulltext = fulltext;
}
public boolean isFulltext() {
return this.fulltext;
}
public IndexUnit getIndexUnit() {
return this.indexUnit;
}
private DocField buildField(final String name, final String pattern, final String type, final String option) {
FieldType fieldType;
ValueType valueType;
if (type.equals("Keyword")) {
fieldType = StringField.TYPE_STORED;
valueType = ValueType.STRING;
}
else if (type.equals("UnIndexed")) {
fieldType = StoredField.TYPE;
valueType = ValueType.BYTES;
}
else if (type.equals("UnStored")) {
fieldType = org.apache.lucene.document.TextField.TYPE_NOT_STORED;
valueType = ValueType.STRING;
}
else {
fieldType = org.apache.lucene.document.TextField.TYPE_STORED;
valueType = ValueType.STRING;
}
final DocField docField = new DocField(name, pattern, fieldType, valueType, option);
return docField;
}
}