/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.solr.schema;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Map;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.Version;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.schema.FieldTypePluginLoader;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.util.DOMUtil;
import org.apache.solr.util.plugin.AbstractPluginLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
* Read a datatype's analyzer configuration.
* <p>
* Code taken from {@link FieldTypePluginLoader} and adapted for the SIREn's
* use case.
*/
public class AnalyzerConfigReader {
public static final String LUCENE_MATCH_VERSION_PARAM = IndexSchema.LUCENE_MATCH_VERSION_PARAM;
private static final
Logger logger = LoggerFactory.getLogger(AnalyzerConfigReader.class);
/**
* Read an analyzer definition and instantiate an {@link Analyzer} object.
*
* <p> Code taken from {@link IndexSchema#readAnalyzer()}
*
* @param node An analyzer node from the config file
* @return An analyzer
* @throws XPathExpressionException If an XPath expression cannot be evaluated
*/
protected static Analyzer readAnalyzer(final Node node,
final SolrResourceLoader loader,
final Version luceneMatchVersion)
throws XPathExpressionException {
if (node == null) return null;
final NamedNodeMap attrs = node.getAttributes();
final String analyzerName = DOMUtil.getAttr(attrs, "class");
// check for all of these up front, so we can error if used in
// conjunction with an explicit analyzer class.
final XPath xpath = XPathFactory.newInstance().newXPath();
final NodeList charFilterNodes = (NodeList) xpath.evaluate
("./charFilter", node, XPathConstants.NODESET);
final NodeList tokenizerNodes = (NodeList) xpath.evaluate
("./tokenizer", node, XPathConstants.NODESET);
final NodeList tokenFilterNodes = (NodeList) xpath.evaluate
("./filter", node, XPathConstants.NODESET);
if (analyzerName != null) {
// explicitly check for child analysis factories instead of
// just any child nodes, because the user might have their
// own custom nodes (ie: <description> or something like that)
if (0 != charFilterNodes.getLength() ||
0 != tokenizerNodes.getLength() ||
0 != tokenFilterNodes.getLength()) {
throw new SolrException
( SolrException.ErrorCode.SERVER_ERROR,
"Configuration Error: Analyzer class='" + analyzerName +
"' can not be combined with nested analysis factories");
}
try {
// No need to be core-aware as Analyzers are not in the core-aware list
final Class<? extends Analyzer> clazz = loader.findClass(analyzerName, Analyzer.class);
try {
// first try to use a ctor with version parameter (needed for many new Analyzers that have no default one anymore)
final Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class);
final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM);
final Version matchVersion = (matchVersionStr == null) ?
luceneMatchVersion : Config.parseLuceneVersionString(matchVersionStr);
if (matchVersion == null) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"Configuration Error: Analyzer '" + clazz.getName() +
"' needs a 'luceneMatchVersion' parameter");
}
return cnstr.newInstance(matchVersion);
}
catch (final NoSuchMethodException nsme) {
// otherwise use default ctor
return clazz.newInstance();
}
}
catch (final Exception e) {
logger.error("Cannot load analyzer: "+analyzerName, e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Cannot load analyzer: "+analyzerName, e);
}
}
// Load the CharFilters
// --------------------------------------------------------------------------------
final ArrayList<CharFilterFactory> charFilters = new ArrayList<CharFilterFactory>();
final AbstractPluginLoader<CharFilterFactory> charFilterLoader =
new AbstractPluginLoader<CharFilterFactory>("[analyzerConfig] analyzer/charFilter",
CharFilterFactory.class, false, false )
{
@Override
protected void init(final CharFilterFactory plugin, final Node node) throws Exception {
if (plugin != null) {
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
final String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion,
plugin.getClass().getSimpleName(), luceneMatchVersion));
plugin.init( params );
charFilters.add( plugin );
}
}
@Override
protected CharFilterFactory register(final String name, final CharFilterFactory plugin) {
return null; // used for map registration
}
};
charFilterLoader.load(loader, charFilterNodes);
// Load the Tokenizer
// Although an analyzer only allows a single Tokenizer, we load a list to make sure
// the configuration is ok
// --------------------------------------------------------------------------------
final ArrayList<TokenizerFactory> tokenizers = new ArrayList<TokenizerFactory>(1);
final AbstractPluginLoader<TokenizerFactory> tokenizerLoader =
new AbstractPluginLoader<TokenizerFactory>("[analyzerConfig] analyzer/tokenizer",
TokenizerFactory.class, false, false )
{
@Override
protected void init(final TokenizerFactory plugin, final Node node)
throws Exception {
if (!tokenizers.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Multiple tokenizers defined for: "+node);
}
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(),"class");
final String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion,
plugin.getClass().getSimpleName(), luceneMatchVersion));
plugin.init(params);
tokenizers.add(plugin);
}
@Override
protected TokenizerFactory register(final String name, final TokenizerFactory plugin) {
return null; // used for map registration
}
};
tokenizerLoader.load(loader, tokenizerNodes);
// Make sure something was loaded
if (tokenizers.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"analyzer without class or tokenizer & filter list");
}
// Load the Filters
// --------------------------------------------------------------------------------
final ArrayList<TokenFilterFactory> filters = new ArrayList<TokenFilterFactory>();
final AbstractPluginLoader<TokenFilterFactory> filterLoader =
new AbstractPluginLoader<TokenFilterFactory>("[analyzerConfig] analyzer/filter",
TokenFilterFactory.class, false, false)
{
@Override
protected void init(final TokenFilterFactory plugin, final Node node) throws Exception {
if (plugin != null) {
final Map<String,String> params = DOMUtil.toMapExcept(node.getAttributes(), "class");
final String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion,
plugin.getClass().getSimpleName(), luceneMatchVersion));
plugin.init(params);
filters.add(plugin);
}
}
@Override
protected TokenFilterFactory register(final String name, final TokenFilterFactory plugin)
throws Exception {
return null; // used for map registration
}
};
filterLoader.load(loader, tokenFilterNodes);
return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]),
tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()]));
}
private static Version parseConfiguredVersion(final String configuredVersion, final String pluginClassName, final Version luceneMatchVersion) {
final Version version = (configuredVersion != null) ?
Config.parseLuceneVersionString(configuredVersion) : luceneMatchVersion;
if (!version.onOrAfter(Version.LUCENE_40)) {
logger.warn(pluginClassName + " is using deprecated " + version +
" emulation. You should at some point declare and reindex to at least 4.0, because " +
"3.x emulation is deprecated and will be removed in 5.0");
}
return version;
}
}