Package fr.inra.lipm.jezlucene.cfg

Source Code of fr.inra.lipm.jezlucene.cfg.Configuration

package fr.inra.lipm.jezlucene.cfg;

import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;

import fr.inra.lipm.jezlucene.cfg.DocField.ValueType;

/**
* @author llegrand
*/
public class Configuration {

    private static Logger logger = LoggerFactory.getLogger(Configuration.class);

    public static enum IndexUnit {
        FILE,
        LINE
    }

    // $1 = field name
    private final Pattern fieldname = Pattern.compile("^field:([\\w-]+)", Pattern.CASE_INSENSITIVE);
    // $1 = field type, $2 = field value, $3 = hash
    private final Pattern fieldvalue = Pattern.compile("(\\w+)\\.(.+?)(@md5)?$", Pattern.CASE_INSENSITIVE);
    // $1 = field name, $2 = keyword or not, $3 = field value
    private final Pattern userField = Pattern.compile("^(\\S+):(Keyword)?\\.?(.+)$", Pattern.CASE_INSENSITIVE);

    public static final Set<String> reservedWords = ImmutableSet.of("file", "abs_path", "filepath");
    public static final Version VERSION = Version.LUCENE_43;
    public static final Analyzer ANALYZER = new StandardAnalyzer(Version.LUCENE_43);
    public static final IndexWriterConfig WRITER_CFG = new IndexWriterConfig(Configuration.VERSION, Configuration.ANALYZER);

    static {
        WRITER_CFG.setMaxThreadStates(2);
    }

    private final IniLoader config;
    private final List<DocField> fields = Lists.newArrayList();
    private final List<DocField> userFields = Lists.newArrayList();
    private final List<DocField> specialFields = Lists.newArrayList();
    private final IndexUnit indexUnit;
    private final String ID;
    private final String parser;
    private final String entrySeparator;
    private final String orderBy;
    private final String defaultSearchField;
    private boolean fulltext = false;

    public Configuration(final File conf) throws IOException {
        this.config = new IniLoader(conf);

        final Iterator<String> keyit = config.getAll().iterator();
        while (keyit.hasNext()) {
            final String key = keyit.next();

            final Matcher keymatch = fieldname.matcher(key);
            final Matcher valuematch = fieldvalue.matcher(config.get(key));
            if (keymatch.find()) {
                if (valuematch.find()) {
                    final String pattern = valuematch.group(2);
                    final String option = (valuematch.group(3) != null) ? "md5" : null;
                    final DocField field = buildField(keymatch.group(1), pattern, valuematch.group(1), option);

                    if (reservedWords.contains(pattern)) {
                        this.specialFields.add(field);
                    }
                    else {
                        this.fields.add(field);
                    }
                }
                else {
                    logger.warn("Malformed field: " + key);
                }
            }
            else {
                logger.trace("Ignored key: " + key);
            }
        }

        this.ID = config.get("docid");
        this.parser = config.get("parser");
        this.entrySeparator = config.get("entry_separator");
        this.orderBy = config.get("order_by");
        this.indexUnit = (config.get("index_unit") != null) ? IndexUnit.valueOf(config.get("index_unit")) : IndexUnit.FILE;
        this.defaultSearchField = config.get("default_search");
    }

    public List<DocField> getFields() {
        return ImmutableList.copyOf(this.fields);
    }

    public List<DocField> getUserFields() {
        return ImmutableList.copyOf(this.userFields);
    }

    public List<DocField> getSpecialFields() {
        return ImmutableList.copyOf(this.specialFields);
    }

    public String getID() {
        return this.ID;
    }

    public String getParser() {
        return this.parser;
    }

    public String getEntrySeparator() {
        return this.entrySeparator;
    }

    public String getOrderBy() {
        return this.orderBy;
    }

    public String getDefaultSearchField() {
        return this.defaultSearchField;
    }

    public boolean addUserFields(final String userField) {
        if (Strings.isNullOrEmpty(userField)) {
            return false;
        }
        // field:Keyword.data,field:data,...
        final String[] fields = userField.split(",");
        for (final String field : fields) {
            final Matcher userMatcher = this.userField.matcher(field);
            if (userMatcher.find()) {
                final String type = (userMatcher.group(2) != null) ? "Keyword" : "Text";
                final DocField currentField = buildField(userMatcher.group(1), "", type, null);
                currentField.setValue(userMatcher.group(3));
                this.userFields.add(currentField);
            }
            else {
                logger.warn("Malformed field: " + field);
                return false;
            }
        }
        return true;
    }

    public void setFulltext(final boolean fulltext) {
        this.fulltext = fulltext;
    }

    public boolean isFulltext() {
        return this.fulltext;
    }

    public IndexUnit getIndexUnit() {
        return this.indexUnit;
    }

    private DocField buildField(final String name, final String pattern, final String type, final String option) {
        FieldType fieldType;
        ValueType valueType;

        if (type.equals("Keyword")) {
            fieldType = StringField.TYPE_STORED;
            valueType = ValueType.STRING;
        }
        else if (type.equals("UnIndexed")) {
            fieldType = StoredField.TYPE;
            valueType = ValueType.BYTES;
        }
        else if (type.equals("UnStored")) {
            fieldType = org.apache.lucene.document.TextField.TYPE_NOT_STORED;
            valueType = ValueType.STRING;
        }
        else {
            fieldType = org.apache.lucene.document.TextField.TYPE_STORED;
            valueType = ValueType.STRING;
        }
        final DocField docField = new DocField(name, pattern, fieldType, valueType, option);

        return docField;
    }
}
TOP

Related Classes of fr.inra.lipm.jezlucene.cfg.Configuration

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.