/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.qparser.keyword;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.sindice.siren.search.AbstractTestSirenScorer.BooleanClauseBuilder.must;
import static org.sindice.siren.search.AbstractTestSirenScorer.BooleanClauseBuilder.not;
import static org.sindice.siren.search.AbstractTestSirenScorer.BooleanClauseBuilder.should;
import static org.sindice.siren.search.AbstractTestSirenScorer.BooleanQueryBuilder.bq;
import static org.sindice.siren.search.AbstractTestSirenScorer.NodeBooleanQueryBuilder.nbq;
import static org.sindice.siren.search.AbstractTestSirenScorer.NodePhraseQueryBuilder.npq;
import static org.sindice.siren.search.AbstractTestSirenScorer.NodeTermQueryBuilder.ntq;
import static org.sindice.siren.search.AbstractTestSirenScorer.TwigQueryBuilder.twq;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.config.ConfigurationKey;
import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorPipeline;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.Operator;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Ignore;
import org.junit.Test;
import org.sindice.siren.analysis.AnyURIAnalyzer;
import org.sindice.siren.analysis.DoubleNumericAnalyzer;
import org.sindice.siren.analysis.FloatNumericAnalyzer;
import org.sindice.siren.analysis.IntNumericAnalyzer;
import org.sindice.siren.analysis.LongNumericAnalyzer;
import org.sindice.siren.analysis.filter.ASCIIFoldingExpansionFilter;
import org.sindice.siren.qparser.keyword.config.KeywordQueryConfigHandler;
import org.sindice.siren.qparser.keyword.config.KeywordQueryConfigHandler.KeywordConfigurationKeys;
import org.sindice.siren.qparser.keyword.nodes.TwigQueryNode;
import org.sindice.siren.qparser.keyword.nodes.WildcardNodeQueryNode;
import org.sindice.siren.qparser.keyword.processors.NodeNumericQueryNodeProcessor;
import org.sindice.siren.qparser.keyword.processors.NodeNumericRangeQueryNodeProcessor;
import org.sindice.siren.search.node.LuceneProxyNodeQuery;
import org.sindice.siren.search.node.MultiNodeTermQuery;
import org.sindice.siren.search.node.NodeBooleanClause;
import org.sindice.siren.search.node.NodeBooleanClause.Occur;
import org.sindice.siren.search.node.NodeBooleanQuery;
import org.sindice.siren.search.node.NodeFuzzyQuery;
import org.sindice.siren.search.node.NodeNumericRangeQuery;
import org.sindice.siren.search.node.NodePhraseQuery;
import org.sindice.siren.search.node.NodePrefixQuery;
import org.sindice.siren.search.node.NodePrimitiveQuery;
import org.sindice.siren.search.node.NodeQuery;
import org.sindice.siren.search.node.NodeRegexpQuery;
import org.sindice.siren.search.node.NodeTermQuery;
import org.sindice.siren.search.node.NodeTermRangeQuery;
import org.sindice.siren.search.node.NodeWildcardQuery;
import org.sindice.siren.search.node.TwigQuery;
import org.sindice.siren.util.JSONDatatype;
import org.sindice.siren.util.SirenTestCase;
import org.sindice.siren.util.XSDDatatype;
@SuppressWarnings("rawtypes")
public class KeywordQueryParserTest {
/**
* Helper method to parse a query string using the {@link KeywordQueryParser}
*/
@SuppressWarnings("unchecked")
public Query parse(final HashMap<ConfigurationKey, Object> keys, final String query)
throws QueryNodeException {
final KeywordQueryParser parser = new KeywordQueryParser();
if (keys != null) {
final KeywordQueryConfigHandler config = new KeywordQueryConfigHandler();
for (Entry<ConfigurationKey, Object> key: keys.entrySet()) {
config.set(key.getKey(), key.getValue());
}
parser.setQueryConfigHandler(config);
}
return parser.parse(query, SirenTestCase.DEFAULT_TEST_FIELD);
}
private void _assertSirenQuery(final Query expected, final String query)
throws Exception {
assertEquals(expected, this.parse(null, query));
assertEquals(expected, this.parse(null, expected.toString()));
}
private void _assertSirenQuery(final HashMap<ConfigurationKey, Object> keys,
final Query expected,
final String query)
throws Exception {
assertEquals(expected, parse(keys, query));
assertEquals(expected, parse(keys, expected.toString()));
}
private Properties loadQNamesFile(final String qnamesFile) throws IOException {
final Properties qnames = new Properties();
qnames.load(new FileInputStream(new File(qnamesFile)));
return qnames;
}
@Test
public void testQuerySyntax()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final HashMap<String, Analyzer> dtAnalyzers = new HashMap<String, Analyzer>();
dtAnalyzers.put(XSDDatatype.XSD_STRING, new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dtAnalyzers);
Query bq = bq(must("term", "term", "term")).getQuery();
this._assertSirenQuery(config, bq, "term term term");
bq = bq(must("t�rm", "term", "term")).getQuery();
this._assertSirenQuery(config, bq, "t�rm term term");
Query q = ntq("�mlaut").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "�mlaut");
bq = bq(must("a", "b")).getQuery();
this._assertSirenQuery(config, bq, "a AND b");
this._assertSirenQuery(config, bq, "(a AND b)");
this._assertSirenQuery(config, bq, "a && b");
bq = bq(must("a"), not("b")).getQuery();
this._assertSirenQuery(config, bq, "a AND NOT b");
this._assertSirenQuery(config, bq, "a AND -b");
this._assertSirenQuery(config, bq, "a AND !b");
this._assertSirenQuery(config, bq, "a && ! b");
/*
* For the OR queries, the #toString outputs "a b". Because the default
* operator of KeywordQueryParser is AND, parsing it back gives "+a +b".
* TODO Find a way around this ? Maybe an operator for SHOULD.
*/
bq = bq(should("a", "b")).getQuery();
assertEquals(bq, parse(config, "a OR b"));
assertEquals(bq, parse(config, "a || b"));
bq = bq(should(ntq("a")), not(ntq("b"))).getQuery();
assertEquals(bq, parse(config, "a OR !b"));
assertEquals(bq, parse(config, "a OR ! b"));
assertEquals(bq, parse(config, "a OR -b"));
bq = bq(must("term"), must(npq("phrase", "phrase"))).getQuery();
this._assertSirenQuery(config, bq, "term AND \"phrase phrase\"");
q = npq("hello", "there").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "\"hello there\"");
}
@Test
public void testEscaped()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final HashMap<String, Analyzer> dtAnalyzers = new HashMap<String, Analyzer>();
dtAnalyzers.put(XSDDatatype.XSD_STRING, new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dtAnalyzers);
Query q = ntq("*").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "\\*");
q = ntq("a").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "\\a");
q = ntq("a-b").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "a\\-b");
q = ntq("a+b").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "a\\+b");
q = ntq("a:b").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "a\\:b");
q = ntq("a\\b").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "a\\\\b");
q = bq(must("a", "b-c")).getQuery();
this._assertSirenQuery(config, q, "a b\\-c");
q = bq(must("a", "b+c")).getQuery();
this._assertSirenQuery(config, q, "a b\\+c");
q = bq(must("a", "b:c")).getQuery();
this._assertSirenQuery(config, q, "a b\\:c");
q = bq(must("a", "b\\c")).getQuery();
this._assertSirenQuery(config, q, "a b\\\\c");
q = ntq("a\\+b").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "a\\\\\\+b");
q = bq(must("a", "\"b", "c\"", "d")).getQuery();
this._assertSirenQuery(config, q, "a \\\"b c\\\" d");
q = npq("a", "\"b\"aa\"", "c\"", "d").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "\"a \\\"b\\\"aa\\\" c\\\" d\"");
q = npq("a", "+b", "c", "d").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "\"a \\+b c d\"");
}
@Test
public void testQueryType()
throws Exception {
final KeywordQueryParser parser = new KeywordQueryParser();
parser.setAllowTwig(false);
Query query = parser.parse("aaa AND bbb", "a");
assertTrue(query instanceof NodeBooleanQuery);
query = parser.parse("hello", "a");
assertTrue(query instanceof NodeTermQuery);
query = parser.parse("\"hello Future\"", "a");
assertTrue(query instanceof NodePhraseQuery);
}
@Test
public void testRemoveTopLevelQueryNode()
throws Exception {
// Twigs are disabled
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
config.put(KeywordConfigurationKeys.ALLOW_TWIG, false);
final HashMap<String, Analyzer> dtAnalyzers = new HashMap<String, Analyzer>();
dtAnalyzers.put(XSDDatatype.XSD_STRING, new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dtAnalyzers);
final Query q1 = nbq(must("a"), must("b"), should("c")).getNodeQuery();
this._assertSirenQuery(config, q1, "+a +\"b\" OR \"c\"");
// Twigs are enabled
config.put(KeywordConfigurationKeys.ALLOW_TWIG, true);
final Query q2 = bq(must("a"),must("b"), should("c")).getQuery();
this._assertSirenQuery(config, q2, "+a +\"b\" OR \"c\"");
}
@Test
public void testRegexps() throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
config.put(KeywordConfigurationKeys.ALLOW_TWIG, false);
final String df = SirenTestCase.DEFAULT_TEST_FIELD;
final NodeRegexpQuery q = new NodeRegexpQuery(new Term(SirenTestCase.DEFAULT_TEST_FIELD, "[a-z][123]"));
this._assertSirenQuery(config, q, "/[a-z][123]/");
config.put(ConfigurationKeys.LOWERCASE_EXPANDED_TERMS, true);
this._assertSirenQuery(config, q, "/[A-Z][123]/");
q.setBoost(0.5f);
this._assertSirenQuery(config, q, "/[A-Z][123]/^0.5");
q.setRewriteMethod(MultiNodeTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
config.put(KeywordConfigurationKeys.MULTI_NODE_TERM_REWRITE_METHOD, MultiNodeTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
this._assertSirenQuery(config, q, "/[A-Z][123]/^0.5");
config.put(KeywordConfigurationKeys.MULTI_NODE_TERM_REWRITE_METHOD, MultiNodeTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
final Query escaped = new NodeRegexpQuery(new Term(df, "[a-z]\\/[123]"));
this._assertSirenQuery(config, escaped, "/[a-z]\\/[123]/");
final Query escaped2 = new NodeRegexpQuery(new Term(df, "[a-z]\\*[123]"));
this._assertSirenQuery(config, escaped2, "/[a-z]\\*[123]/");
final HashMap<String, Analyzer> dtAnalyzers = new HashMap<String, Analyzer>();
dtAnalyzers.put(XSDDatatype.XSD_STRING, new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dtAnalyzers);
final NodeBooleanQuery complex = new NodeBooleanQuery();
complex.add(new NodeRegexpQuery(new Term(df, "[a-z]\\/[123]")), NodeBooleanClause.Occur.MUST);
complex.add(new NodeTermQuery(new Term(df, "/etc/init.d/")), Occur.MUST);
complex.add(new NodeTermQuery(new Term(df, "/etc/init[.]d/lucene/")), Occur.SHOULD);
this._assertSirenQuery(config, complex, "+/[a-z]\\/[123]/ +\"/etc/init.d/\" OR \"/etc\\/init\\[.\\]d/lucene/\" ");
Query re = new NodeRegexpQuery(new Term(df, "http.*"));
this._assertSirenQuery(config, re, "/http.*/");
re = new NodeRegexpQuery(new Term(df, "http~0.5"));
this._assertSirenQuery(config, re, "/http~0.5/");
re = new NodeRegexpQuery(new Term(df, "boo"));
this._assertSirenQuery(config, re, "/boo/");
this._assertSirenQuery(config, new NodeTermQuery(new Term(df, "/boo/")), "\"/boo/\"");
this._assertSirenQuery(config, new NodeTermQuery(new Term(df, "/boo/")), "\\/boo\\/");
config.put(ConfigurationKeys.DEFAULT_OPERATOR, Operator.OR);
final NodeBooleanQuery two = new NodeBooleanQuery();
two.add(new NodeRegexpQuery(new Term(df, "foo")), Occur.SHOULD);
two.add(new NodeRegexpQuery(new Term(df, "bar")), Occur.SHOULD);
this._assertSirenQuery(config, two, "/foo/ /bar/");
final NodeRegexpQuery regexpQueryexp = new NodeRegexpQuery(new Term(df, "[abc]?[0-9]"));
this._assertSirenQuery(config, regexpQueryexp, "/[abc]?[0-9]/");
}
@Test
public void testQueryTermAtSamePosition()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Analyzer analyser = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(final String fieldName,
final Reader reader) {
final WhitespaceTokenizer t = new WhitespaceTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader);
final TokenStream ts = new ASCIIFoldingExpansionFilter(t);
return new TokenStreamComponents(t, ts);
}
};
config.put(ConfigurationKeys.DEFAULT_OPERATOR, Operator.OR);
final HashMap<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("exp", analyser);
dts.put(XSDDatatype.XSD_STRING, new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
/*
* Here we cannot parse the toString output, because the query
* has been expanded by DatatypeAnalyzerProcessor
*/
Query q = bq(
should(ntq("latte")),
must(bq(should(ntq("cafe").setDatatype("exp")),
should(ntq("café").setDatatype("exp")))),
should("the")
).getQuery();
assertEquals(q, parse(config, "latte +exp(café) the"));
q = bq(
must(bq(should(ntq("cafe").setDatatype("exp")),
should(ntq("café").setDatatype("exp"))))
).getQuery();
assertEquals(q, parse(config, "+exp(café)"));
q = bq(
must(bq(should(ntq("cafe").setDatatype("exp")),
should(ntq("café").setDatatype("exp")))),
must(bq(should(ntq("mate").setDatatype("exp")),
should(ntq("maté").setDatatype("exp"))))
).getQuery();
assertEquals(q, parse(config, "exp(+café +maté)"));
q = bq(
must(bq(should(ntq("cafe").setDatatype("exp")),
should(ntq("café").setDatatype("exp")))),
not(bq(should(ntq("mate").setDatatype("exp")),
should(ntq("maté").setDatatype("exp"))))
).getQuery();
assertEquals(q, parse(config, "exp(+café -maté)"));
q = bq(
should(bq(should(ntq("cafe").setDatatype("exp")),
should(ntq("café").setDatatype("exp")))),
should(bq(should(ntq("mate").setDatatype("exp")),
should(ntq("maté").setDatatype("exp"))))
).getQuery();
assertEquals(q, parse(config, "exp(café maté)"));
}
@Test
public void testSingleWord()
throws Exception {
final Query q = ntq("hello").getLuceneProxyQuery();
this._assertSirenQuery(q, "hello");
}
@Test(expected=QueryNodeException.class)
public void testParseEmpty()
throws Exception {
this.parse(null, "");
}
@Test(expected=IllegalArgumentException.class)
public void testTwigQueryNodeWithMoreThan2Children()
throws Exception {
final KeywordQueryParser parser = new KeywordQueryParser();
final QueryNodeProcessorPipeline pipeline = new QueryNodeProcessorPipeline();
pipeline.add(new QueryNodeProcessorImpl() {
@Override
protected List<QueryNode> setChildrenOrder(final List<QueryNode> children)
throws QueryNodeException {
return children;
}
@Override
protected QueryNode preProcessNode(final QueryNode node)
throws QueryNodeException {
if (node instanceof TwigQueryNode) {
node.add(new FieldQueryNode("field", "text", 0, 4));
}
return node;
}
@Override
protected QueryNode postProcessNode(final QueryNode node)
throws QueryNodeException {
return node;
}
});
parser.setQueryNodeProcessor(pipeline);
parser.parse("a : b", SirenTestCase.DEFAULT_TEST_FIELD);
}
@Test
public void testQName()
throws Exception {
final String qnames = "./src/test/resources/conf/qnames";
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
config.put(KeywordConfigurationKeys.QNAMES, this.loadQNamesFile(qnames));
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("uri", new AnyURIAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final Query q = ntq("http://xmlns.com/foaf/0.1/name")
.setDatatype("uri")
.getLuceneProxyQuery();
this._assertSirenQuery(config, q, "uri('foaf:name')");
}
@Test
public void testQNameInDatatype()
throws Exception {
final String qnames = "./src/test/resources/conf/qnames";
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
config.put(KeywordConfigurationKeys.QNAMES, this.loadQNamesFile(qnames));
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put(XSDDatatype.XSD_LONG, new LongNumericAnalyzer(4));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final NodeNumericRangeQuery q = NodeNumericRangeQuery.newLongRange(SirenTestCase.DEFAULT_TEST_FIELD,
4, 50l, 60l, true, false);
q.setDatatype(XSDDatatype.XSD_LONG);
this._assertSirenQuery(config, new LuceneProxyNodeQuery(q), "xsd:long([50 TO 60})");
}
@Test
public void testNotQName() throws Exception {
final String qnames = "./src/test/resources/conf/qnames";
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
config.put(KeywordConfigurationKeys.QNAMES, this.loadQNamesFile(qnames));
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("ws", new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final Query q = ntq("mailto:aidan.hogan@deri.org")
.setDatatype("ws")
.getLuceneProxyQuery();
this._assertSirenQuery(config, q, "ws('mailto:aidan.hogan@deri.org')");
}
@Test
public void testInvalidQName() throws Exception {
final String query = "ws('http:' 'foaf:2' 'foaf:-qw')";
final String qnames = "./src/test/resources/conf/qnames";
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
config.put(KeywordConfigurationKeys.QNAMES, this.loadQNamesFile(qnames));
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("ws", new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final Query q = bq(
must(ntq("http:").setDatatype("ws")),
must(ntq("foaf:2").setDatatype("ws")),
must(ntq("foaf:-qw").setDatatype("ws"))
).getQuery();
this._assertSirenQuery(config, q, query);
}
@Test
public void testQNameHTTP() throws Exception {
final String query = "uri('http://ns/#s' 'http://ns/p' 'http://ns/o')";
final String qnames = "./src/test/resources/conf/qnames";
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
config.put(KeywordConfigurationKeys.QNAMES, this.loadQNamesFile(qnames));
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("uri", new AnyURIAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final Query q = bq(
must(ntq("http://ns/#s").setDatatype("uri")),
must(ntq("http://ns/p").setDatatype("uri")),
must(ntq("http://ns/o").setDatatype("uri"))
).getQuery();
this._assertSirenQuery(config, q, query);
}
@Test
public void testEmptyLeafTwig() throws Exception {
final Query q = twq(1).root(ntq("hello")).getLuceneProxyQuery();
this._assertSirenQuery(q, "hello : *");
}
@Test
public void testEmptyInternalNodeTwig()
throws Exception {
final Query q = twq(1).root(ntq("hello"))
.with(twq(2).with(ntq("world")))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "hello : * : world");
}
@Test
public void testTwigQueryNodeParent()
throws Exception {
final TwigQueryNode twig = new TwigQueryNode(new WildcardNodeQueryNode(),
new WildcardNodeQueryNode());
final FieldQueryNode term = new FieldQueryNode("field", "term", 0, 4);
assertTrue(term.getParent() == null);
assertEquals(twig, twig.getRoot().getParent());
assertEquals(twig, twig.getChild().getParent());
twig.setRoot(term);
twig.setChild(term);
assertEquals(twig, twig.getRoot().getParent());
assertEquals(twig, twig.getChild().getParent());
}
@Test
public void testEmptyDescendantTwig()
throws Exception {
final Query q = twq(1).root(ntq("aaa"))
.with(twq(2).with(twq(3).with(ntq("b"))))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "aaa : * : * : b");
}
@Test(expected=QueryNodeException.class)
public void testEmptyTwig()
throws QueryNodeException {
this.parse(null, "* : *");
}
@Test(expected=QueryNodeException.class)
public void testBadObjectQuery() throws QueryNodeException {
this.parse(null, "{ a }");
}
@Test
public void testMultipleWords()
throws Exception {
final Query q = bq(must("hello", "world")).getQuery();
this._assertSirenQuery(q, "hello world");
}
@Test(expected=QueryNodeException.class)
public void testUnsupportedSlopQuery() throws QueryNodeException {
this.parse(null, "\"hello world\"~2");
}
@Test
public void testURIsWithDefaultOR()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
config.put(ConfigurationKeys.DEFAULT_OPERATOR, Operator.OR);
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("uri", new AnyURIAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final Query q = bq(
should(ntq("http://www.google.com").setDatatype("uri")),
should(ntq("http://hello.world#me").setDatatype("uri"))
).getQuery();
this._assertSirenQuery(config, q, "uri('http://www.google.com' 'http://hello.world#me')");
}
@Test
public void testURIsWithDefaultAND()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
config.put(ConfigurationKeys.DEFAULT_OPERATOR, Operator.AND);
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("uri", new AnyURIAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final Query q = bq(
must(ntq("http://www.google.com").setDatatype("uri")),
must(ntq("http://hello.world#me").setDatatype("uri"))
).getQuery();
this._assertSirenQuery(config, q, "uri('http://www.google.com' 'http://hello.world#me')");
}
@Test
public void testCompoundQuery()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put(XSDDatatype.XSD_STRING, new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
dts.put("uri", new AnyURIAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final Query q = bq(
should(ntq("http://www.google.com").setDatatype("uri")),
must(ntq("hello")),
not(ntq("world"))
).getQuery();
this._assertSirenQuery(config, q, "uri('http://www.google.com/') +hello -world");
}
@Test(expected=QueryNodeException.class)
public void testFuzzyQuery1()
throws Exception {
final KeywordQueryParser parser = new KeywordQueryParser();
parser.setAllowFuzzyAndWildcard(false);
parser.parse("miche~", SirenTestCase.DEFAULT_TEST_FIELD);
}
@Test
public void testFuzzyQuery2()
throws Exception {
final NodeQuery q1 = new NodeFuzzyQuery(new Term(SirenTestCase.DEFAULT_TEST_FIELD, "michel"));
this._assertSirenQuery(new LuceneProxyNodeQuery(q1), "michel~");
final TwigQuery q2 = new TwigQuery(1);
q2.addChild(q1, NodeBooleanClause.Occur.MUST);
this._assertSirenQuery(new LuceneProxyNodeQuery(q2), "* : michel~");
final int numEdits = FuzzyQuery.floatToEdits(0.8f, "michel".codePointCount(0, "michel".length()));
final NodeQuery q3 = new NodeFuzzyQuery(new Term(SirenTestCase.DEFAULT_TEST_FIELD, "michel"), numEdits);
this._assertSirenQuery(new LuceneProxyNodeQuery(q3), "michel~0.8");
// first tilde is escaped, not the second one
final NodeQuery q4 = new NodeFuzzyQuery(new Term(SirenTestCase.DEFAULT_TEST_FIELD, "http://sw.deri.org/~aida"));
this._assertSirenQuery(new LuceneProxyNodeQuery(q4), "'http://sw.deri.org/~aida'~");
}
@Test(expected=QueryNodeException.class)
public void testWildcardQuery1() throws Exception {
final KeywordQueryParser parser = new KeywordQueryParser();
parser.setAllowFuzzyAndWildcard(false);
parser.parse("miche*", SirenTestCase.DEFAULT_TEST_FIELD);
}
@Test
public void testWildcardQuery2()
throws Exception {
final NodeQuery q1 = new NodeWildcardQuery(new Term(SirenTestCase.DEFAULT_TEST_FIELD, "st*e.ca?as"));
this._assertSirenQuery(new LuceneProxyNodeQuery(q1), "st*e.ca?as");
}
@Test
public void testWildcardInURI() throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("uri", new AnyURIAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
Query q = ntq("http://example.com/~foo=bar").setDatatype("uri").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "uri('http://example.com/~foo=bar')");
q = ntq("http://example.com/?foo=bar").setDatatype("uri").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "uri('http://example.com/?foo=bar')");
}
@Test
public void testEncoding() throws Exception {
final Query q = ntq("möller").getLuceneProxyQuery();
this._assertSirenQuery(q, "möller");
}
@Test
public void testDashedURI() throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("uri", new AnyURIAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final Query q = ntq("http://semantic-conference.com/session/569")
.setDatatype("uri").getLuceneProxyQuery();
this._assertSirenQuery(config, q, "uri('http://semantic-conference.com/session/569/')");
}
@Test
public void testDisabledFieldQuery()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("ws", new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final Query q = bq(
must(ntq("foaf:name").setDatatype("ws")),
not(ntq("foaf\\:person").setDatatype("ws")),
should(ntq("domain:dbpedia.org").setDatatype("ws")),
should(ntq("http://test.org/").setDatatype("ws")),
should(ntq("http://test2.org/").setDatatype("ws"))
).getQuery();
this._assertSirenQuery(config, q, "ws(+'foaf:name' -'foaf\\:person' 'domain:dbpedia.org' 'http://test.org/' 'http://test2.org/')");
}
@Test
public void testMailtoURI()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("ws", new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final Query q1 = ntq("mailto:stephane.campinas@deri.org")
.setDatatype("ws").getLuceneProxyQuery();
this._assertSirenQuery(config, q1, "ws('mailto:stephane.campinas@deri.org')");
final Query q2 = bq(must(ntq("mailto:stephane.campinas@deri.org").setDatatype("ws")),
must(ntq("domain:dbpedia.org").setDatatype("ws"))
).getQuery();
this._assertSirenQuery(config, q2, "ws('mailto:stephane.campinas@deri.org' 'domain:dbpedia.org')");
}
/**
* Test for special Lucene characters within URIs.
*/
@Test
public void testLuceneSpecialCharacter()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("ws", new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
/*
* Test special tilde character
*/
Query q = ntq("http://sw.deri.org/~aidanh").setDatatype("ws")
.getLuceneProxyQuery();
this._assertSirenQuery(config, q, "ws('http://sw.deri.org/~aidanh')");
/*
* ? Wildcard
*/
q = ntq("http://example.com/?foo=bar").setDatatype("ws")
.getLuceneProxyQuery();
this._assertSirenQuery(config, q, "ws('http://example.com/?foo=bar')");
q = ntq("http://example.com/?foo=bar").setDatatype("ws")
.getLuceneProxyQuery();
this._assertSirenQuery(config, q, "ws('http://example.com/?foo=bar')");
}
@Test
public void testPhraseQuery()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("ws", new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final Query q = npq("a", "simple", "literal").setDatatype("ws")
.getLuceneProxyQuery();
this._assertSirenQuery(config, q, "ws(\"a simple literal\")");
}
@Test
public void testNestedGroups()
throws Exception {
final Query q = bq(
must(ntq("test")),
must(
bq(must(
bq(should("literal", "uri", "resource")),
bq(should("pattern", "patterns", "query"))
))
)
).getQuery();
this._assertSirenQuery(q, "Test AND ((literal OR uri OR resource) AND (pattern OR patterns OR query))");
}
@Test
public void testNestedGroups2()
throws Exception {
final Query q = bq(
must(ntq("test")),
must(
bq(must(
bq(should(ntq("literal")), must(ntq("uri")), not(ntq("resource"))),
bq(should(ntq("pattern")), must(ntq("patterns")), not(ntq("query")))
))
)
).getQuery();
this._assertSirenQuery(q, "Test AND ((literal OR +uri OR -resource) AND (pattern OR +patterns OR -query))");
}
@Test
public void testBoostQuery()
throws Exception {
final BooleanQuery q = new BooleanQuery();
q.add(ntq("c").getLuceneProxyQuery(), BooleanClause.Occur.MUST);
final NodeQuery nq = ntq("b").getNodeQuery();
nq.setBoost(2);
q.add(new LuceneProxyNodeQuery(nq), BooleanClause.Occur.MUST);
this._assertSirenQuery(q, "c b^2");
}
@Test
public void testTwigQuery()
throws Exception {
final Query q = twq(1).root(ntq("aaa"))
.with(ntq("b"))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "aaa : b");
this._assertSirenQuery(q, "aaa:b");
}
@Test
public void testTwigQueryDatatype()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("ws", new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
dts.put(JSONDatatype.JSON_FIELD, new StandardAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
// json:field is always applied on the top level node of the twig.
final Query q = twq(1).root(ntq("aaa"))
.with(ntq("b").setDatatype("ws"))
.getLuceneProxyQuery();
this._assertSirenQuery(config, q, "AAA : ws(b)");
}
@Test
public void testTwigQueryDatatypeOnRoot()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("ws", new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
// json:field is always applied on the top level node of the twig.
final Query q = twq(1).root(ntq("AAA").setDatatype("ws"))
.with(ntq("b").setDatatype("ws"))
.getLuceneProxyQuery();
this._assertSirenQuery(config, q, "ws(AAA) : ws(b)");
this._assertSirenQuery(config, q, "ws(AAA : b)");
}
@Test
public void testTwigQueryStopWord()
throws Exception {
final Query q = twq(1).root(ntq("aaa"))
.with(twq(2).with(ntq("coffee")))
.getLuceneProxyQuery();
// The word "the" is a stop word, and is therefore removed by the standard
// analyzer associated to xsd:string.
this._assertSirenQuery(q, "aaa : the : coffee");
}
@Test(expected=QueryNodeException.class)
public void testTwigQueryBothStopWords()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final HashMap<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put(JSONDatatype.JSON_FIELD, new StandardAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
dts.put(XSDDatatype.XSD_STRING, new StandardAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
// Word "a" and "the" are stop words, and are therefore removed by the
// standard analyzer associated to json:field and xsd:string, respectively.
parse(config, "a : the");
}
@Test
public void testArrayQueryStopWord()
throws Exception {
final Query q = twq(1).root(ntq("aaa"))
.with(ntq("coffee"))
.getLuceneProxyQuery();
// The word "the" is a stop word, and is therefore removed by the standard
// analyzer associated to xsd:string.
this._assertSirenQuery(q, "aaa : [ the , coffee ]");
}
@Test
public void testRootLevelTwigQuery()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
config.put(KeywordConfigurationKeys.ROOT_LEVEL, 2);
final Query q = twq(2).root(ntq("aaa"))
.with(ntq("b"))
.getLuceneProxyQuery();
this._assertSirenQuery(config, q, "aaa : b");
}
@Test
public void testBooleanTwigQuery()
throws Exception {
final Query q = twq(1).root(nbq(must("aaa", "b")))
.with(nbq(should("c", "d")))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "aaa AND b : c OR d");
}
@Test
public void testBooleanTwigQuery2()
throws Exception {
final Query bq = bq(
must(ntq("e")),
must(twq(1).root(ntq("b"))
.with(nbq(should("c", "d")))
)
).getQuery();
this._assertSirenQuery(bq, "e AND (b : c OR d)");
}
@Test
public void testBooleanTwigQuery3()
throws Exception {
final Query q = twq(1).root(ntq("aaa"))
.with(nbq(must("c", "d")))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "aaa : c AND d");
}
@Test
public void testBooleanTwigQuery4()
throws Exception {
final Query q = twq(1).root(ntq("aaa"))
.with(nbq(not("c"), must("d")))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "aaa : -c AND d");
}
@Test
public void testTwigQueriesConjunction()
throws Exception {
final Query bq = bq(must(
twq(1).root(ntq("aaa"))
.with(ntq("c"))), must(
twq(1).root(ntq("b"))
.with(ntq("d"))
)).getQuery();
this._assertSirenQuery(bq, "(aaa : c) AND (b : d)");
}
@Test
public void testTwigQueriesDisjunction()
throws Exception {
final Query bq = bq(should(
twq(1).root(ntq("aaa"))
.with(ntq("c"))), should(
twq(1).root(ntq("b"))
.with(ntq("d"))
)).getQuery();
this._assertSirenQuery(bq, "(aaa : c) OR (b : d)");
}
@Test
public void testTwigQueriesComplement()
throws Exception {
final Query bq = bq(must(
twq(1).root(ntq("aaa"))
.with(ntq("c"))
), not(
twq(1).root(ntq("b"))
.with(ntq("d"))
)).getQuery();
this._assertSirenQuery(bq, "(aaa : c) - (b : d)");
}
/**
* SRN-91
*/
@Test
public void testTwigComplement2()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("ws", new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
dts.put(JSONDatatype.JSON_FIELD, new StandardAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final Query bq = bq(must(
twq(1)
.with(ntq("literal").setDatatype("ws"))
), not(
twq(1)
.with(ntq("http://o.org").setDatatype("ws"))
)).getQuery();
this._assertSirenQuery(config, bq, "ws((* : literal) NOT (* : 'http://o.org'))");
}
@Test
public void testTwigQueryLineFeed()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put(XSDDatatype.XSD_STRING, new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
dts.put("ws", new WhitespaceAnalyzer(LuceneTestCase.TEST_VERSION_CURRENT));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
final Query bq = bq(must(
twq(1)
.with(ntq("literal"))), must(
twq(1)
.with(ntq("http://o.org").setDatatype("ws"))
)).getQuery();
this._assertSirenQuery(config, bq, "(* : literal) AND\r\n (* \n\r : \n ws('http://o.org'))");
}
@Test
public void testPrefixQuery()
throws Exception {
final Query ntq = new LuceneProxyNodeQuery(
new NodePrefixQuery(new Term(SirenTestCase.DEFAULT_TEST_FIELD, "lit"))
);
this._assertSirenQuery(ntq, "lit*");
final TwigQuery twq = new TwigQuery(1);
twq.addChild(new NodePrefixQuery(new Term(SirenTestCase.DEFAULT_TEST_FIELD, "lit")),
NodeBooleanClause.Occur.MUST);
this._assertSirenQuery(new LuceneProxyNodeQuery(twq), "* : lit*");
}
@Test
public void testEmptyRootQuery()
throws Exception {
final Query q = twq(1)
.with(ntq("b"))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "* : b");
}
@Test
public void testNestedTwigQuery()
throws Exception {
final Query q = twq(1).root(ntq("aaa"))
.with(twq(2).root(ntq("b")).with(ntq("c")))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "aaa : b : c");
}
@Test
public void testNestedTwigQuery2()
throws Exception {
final Query q = twq(1).root(ntq("aaa"))
.with(twq(2).root(nbq(must("d", "b")))
.with(ntq("c")))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "aaa : d AND b : c");
}
@Test
public void testArrayQuery()
throws Exception {
final Query q = twq(1)
.with(ntq("b"))
.with(ntq("c"))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "* : [ b, c ]");
}
@Test
public void testArrayQueryWithModifiers()
throws Exception {
final Query q = twq(1)
.with(ntq("aaa"))
.without(ntq("b"))
.with(ntq("c"))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "* : [ aaa, -b, +c ]");
}
@Test
public void testArrayQueryWithModifiers2()
throws Exception {
final Query q = twq(1)
.with(twq(2)
.with(ntq("aaa"))
.with(ntq("b")))
.without(twq(2)
.with(ntq("c"))
.with(ntq("d")))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "* : [ * : [ aaa, b ], -(*:[ c, d ]) ]");
}
// TODO: issue GH-52
@Ignore
@Test
public void testArrayQueryWithModifiers3()
throws QueryNodeException {
this.parse(null, "* : [ a, -[ c, d ] ]");
}
@Test
public void testNestedArrayQuery()
throws Exception {
final Query q = twq(1)
.with(ntq("b"))
.with(twq(2)
.with(ntq("c"))
.with(ntq("d")))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "* : [ b, * : [ c, d ] ]");
this._assertSirenQuery(q, "* : [ b, [ c , d ] ]");
}
/**
* Tests for a nested array with a single child
*/
@Test
public void testNestedArrayQuery2()
throws Exception {
final Query q = twq(1)
.with(ntq("aaa"))
.with(twq(2)
.with(ntq("b")))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "* : [ aaa, [ b ] ]");
}
/**
* A grouping of children is not possible
*/
@Test(expected=QueryNodeException.class)
public void testWrongArrayQuery1()
throws QueryNodeException {
this.parse(null, "* : b AND [ c , d ]");
}
/**
* A grouping of children is not possible
*/
@Test(expected=QueryNodeException.class)
public void testWrongArrayQuery2()
throws QueryNodeException {
this.parse(null, "* : [a, b] AND [ c , d ]");
}
/**
* A grouping of children is not possible
*/
@Test(expected=QueryNodeException.class)
public void testWrongArrayQuery3()
throws QueryNodeException {
this.parse(null, "* : +[a, b] -[ c , d ]");
}
/**
* An array query is only possible inside a Twig query
*/
@Test(expected=QueryNodeException.class)
public void testWrongArrayQuery4()
throws QueryNodeException {
this.parse(null, "[ c , d ]");
}
/**
* An array query is only possible inside a Twig query
*/
@Test(expected=QueryNodeException.class)
public void testWrongArrayQuery5()
throws QueryNodeException {
this.parse(null, "a AND [ c , d ]");
}
/**
* An array query is only possible as the value of a Twig query
*/
@Test(expected=QueryNodeException.class)
public void testWrongArrayQuery6()
throws QueryNodeException {
this.parse(null, "a :: [ c , d ] :: e");
}
@Test
public void testArrayQueryWithBoolean()
throws Exception {
final Query q = twq(1)
.with(ntq("b"))
.with(nbq(must("c", "d")))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "* : [ b, c AND d ]");
}
/**
* issue GH-50
*/
@Test
public void testObjectQuery1()
throws Exception {
final Query q = twq(1)
.with(twq(2).root(ntq("aaa"))
.with(ntq("b")))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "{ aaa : b }");
}
/**
* issue GH-50
*/
@Test
public void testObjectQuery2()
throws Exception {
final Query q = twq(1).with(twq(2).with(twq(3).root(ntq("aaa")).with(ntq("b"))))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "* : { aaa : b }");
}
/**
* issue GH-50
*/
@Test
public void testObjectQuery3()
throws Exception {
final Query q = twq(1).with(twq(2).with(ntq("b")))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "{ * : b }");
}
/**
* issue GH-50
*/
@Test
public void testObjectQuery4()
throws Exception {
final Query q = twq(1).with(twq(2).root(ntq("aaa"))).getLuceneProxyQuery();
this._assertSirenQuery(q, "{ aaa : * }");
}
/**
* issue GH-50
*/
@Test(expected=QueryNodeException.class)
public void testObjectQuery5()
throws QueryNodeException {
this.parse(null, "{ * : * }");
}
@Test
public void testObjectQueryWithMultipleFields1()
throws Exception {
final Query q = twq(1).root(ntq("p"))
.with(twq(2).with(twq(3).root(ntq("aaa")).with(ntq("b")))
.with(twq(3).root(ntq("c")).with(ntq("d"))))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "p : { aaa : b , c : d }");
}
@Test
public void testObjectQueryWithMultipleFields2()
throws Exception {
final Query q = twq(1).root(ntq("p"))
.with(twq(2).with(twq(3).root(ntq("aaa")))
.with(twq(3).with(ntq("d"))))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "p : { aaa : * , * : d }");
}
@Test
public void testObjectWithArrayQuery()
throws Exception {
final Query q = twq(1).root(ntq("aaa"))
.with(ntq("d"))
.with(twq(3).with(twq(4).with(ntq("b"))))
.getLuceneProxyQuery();
this._assertSirenQuery(q, "aaa : [ d, { * : b } ]");
}
@Test(expected=QueryNodeException.class)
public void testMultiPhraseQuery()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Analyzer analyser = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(final String fieldName,
final Reader reader) {
final WhitespaceTokenizer t = new WhitespaceTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader);
final TokenStream ts = new ASCIIFoldingExpansionFilter(t);
return new TokenStreamComponents(t, ts);
}
};
final HashMap<String, Analyzer> dts = new HashMap<String, Analyzer>();
dts.put("exp", analyser);
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, dts);
this.parse(config, "exp(\"café coffe\")");
}
@Test
public void testRangeQueries()
throws Exception {
NodeQuery q = new NodeTermRangeQuery(SirenTestCase.DEFAULT_TEST_FIELD,
new BytesRef("a"), new BytesRef("b"), true, true);
this._assertSirenQuery(new LuceneProxyNodeQuery(q), "[ a TO b ]");
q = new NodeTermRangeQuery(SirenTestCase.DEFAULT_TEST_FIELD,
new BytesRef("a"), new BytesRef("b"), false, true);
this._assertSirenQuery(new LuceneProxyNodeQuery(q), "{ a TO b ]");
q = new NodeTermRangeQuery(SirenTestCase.DEFAULT_TEST_FIELD,
new BytesRef("a"), new BytesRef("b"), true, false);
this._assertSirenQuery(new LuceneProxyNodeQuery(q), "[ a TO b }");
q = new NodeTermRangeQuery(SirenTestCase.DEFAULT_TEST_FIELD,
new BytesRef("a"), new BytesRef("b"), false, false);
this._assertSirenQuery(new LuceneProxyNodeQuery(q), "{ a TO b }");
final TwigQuery twq1 = new TwigQuery(1);
twq1.addChild(q, NodeBooleanClause.Occur.MUST);
// TODO parsing the output of #toString of twq1 is not possible because of GH-52
assertEquals(new LuceneProxyNodeQuery(twq1), this.parse(null, "* : { a TO b }"));
final TwigQuery twq2 = new TwigQuery(1);
twq2.addChild(new NodeTermRangeQuery(SirenTestCase.DEFAULT_TEST_FIELD,
new BytesRef("a"), new BytesRef("b"), true, true), NodeBooleanClause.Occur.MUST);
twq2.addChild(q, NodeBooleanClause.Occur.MUST);
assertEquals(new LuceneProxyNodeQuery(twq2), this.parse(null, "* : [ [ a TO b ], { a TO b } ]"));
}
@Test
public void testRegexQueries()
throws Exception {
final Query reg = new LuceneProxyNodeQuery(
new NodeRegexpQuery(new Term(SirenTestCase.DEFAULT_TEST_FIELD, "s*e"))
);
this._assertSirenQuery(reg, "/s*e/");
}
@Test
public void testPhrase1term()
throws Exception {
final Query q = ntq("test").getLuceneProxyQuery();
this._assertSirenQuery(q, "\"test\"");
}
@Test(expected=IllegalArgumentException.class)
public void testNotRegisteredDatatype()
throws Exception {
this.parse(null, "notRegistered(aaa)");
}
@Test
public void testDatatypes1()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
// Set the default datatypes
final Map<String, Analyzer> datatypes = new HashMap<String, Analyzer>();
datatypes.put("xsd:int", new IntNumericAnalyzer(4));
final Analyzer dateAnalyser = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(final String fieldName,
final Reader reader) {
final WhitespaceTokenizer t = new WhitespaceTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader);
final TokenStream ts = new LowerCaseFilter(LuceneTestCase.TEST_VERSION_CURRENT, t);
return new TokenStreamComponents(t, ts);
}
};
datatypes.put("xsd:date", dateAnalyser);
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, datatypes);
// Test for custom datatypes
final BooleanQuery bq1 = new BooleanQuery();
final NodePrimitiveQuery range1 = NodeNumericRangeQuery
.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 12, 21, true, true);
range1.setDatatype("xsd:int");
bq1.add(new LuceneProxyNodeQuery(range1), BooleanClause.Occur.MUST);
final Query npq = npq("12", "oct", "2012").setDatatype("xsd:date")
.getLuceneProxyQuery();
bq1.add(npq, BooleanClause.Occur.MUST);
this._assertSirenQuery(config, bq1, "xsd:int([12 TO 21]) xsd:date(\"12 Oct 2012\")");
}
@Test
public void testDatatypes2()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
// Set the default datatypes
final Map<String, Analyzer> datatypes = new HashMap<String, Analyzer>();
datatypes.put("xsd:int", new IntNumericAnalyzer(4));
final Analyzer dateAnalyser = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(final String fieldName,
final Reader reader) {
final WhitespaceTokenizer t = new WhitespaceTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader);
final TokenStream ts = new LowerCaseFilter(LuceneTestCase.TEST_VERSION_CURRENT, t);
return new TokenStreamComponents(t, ts);
}
};
datatypes.put("xsd:date", dateAnalyser);
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, datatypes);
// Test for custom datatypes
final BooleanQuery bq1 = new BooleanQuery();
final BooleanQuery bq2 = new BooleanQuery();
final NodePrimitiveQuery range1 = NodeNumericRangeQuery
.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 12, 21, true, true);
range1.setDatatype("xsd:int");
bq2.add(new LuceneProxyNodeQuery(range1), BooleanClause.Occur.MUST);
final NodePrimitiveQuery range2 = NodeNumericRangeQuery
.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 12, 12, true, true);
range2.setDatatype("xsd:int");
bq2.add(new LuceneProxyNodeQuery(range2), BooleanClause.Occur.MUST);
bq1.add(bq2, BooleanClause.Occur.MUST);
final Query npq = npq("12", "oct", "2012").setDatatype("xsd:date")
.getLuceneProxyQuery();
bq1.add(npq, BooleanClause.Occur.MUST);
this._assertSirenQuery(config, bq1, "xsd:int([12 TO 21] '12') xsd:date(\"12 Oct 2012\")");
}
/**
* Multiple terms in a datatype are in a group
*/
@Test
public void testDatatypes3()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
// Set the default datatypes
final Map<String, Analyzer> datatypes = new HashMap<String, Analyzer>();
datatypes.put("xsd:int", new IntNumericAnalyzer(4));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, datatypes);
// Test for custom datatypes
final BooleanQuery bq1 = new BooleanQuery();
final NodePrimitiveQuery range1To10 = NodeNumericRangeQuery
.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 1, 10, true, true);
range1To10.setDatatype("xsd:int");
final NodePrimitiveQuery range20To40 = NodeNumericRangeQuery
.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 20, 40, true, true);
range20To40.setDatatype("xsd:int");
bq1.add(new LuceneProxyNodeQuery(range1To10), BooleanClause.Occur.SHOULD);
bq1.add(new LuceneProxyNodeQuery(range20To40), BooleanClause.Occur.SHOULD);
final BooleanQuery bq2 = new BooleanQuery();
final NodePrimitiveQuery range10To15 = NodeNumericRangeQuery
.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 10, 15, true, true);
range10To15.setDatatype("xsd:int");
final NodePrimitiveQuery range50To55 = NodeNumericRangeQuery
.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 50, 55, true, true);
range50To55.setDatatype("xsd:int");
bq2.add(new LuceneProxyNodeQuery(range10To15), BooleanClause.Occur.SHOULD);
bq2.add(new LuceneProxyNodeQuery(range50To55), BooleanClause.Occur.SHOULD);
final BooleanQuery bq3 = new BooleanQuery();
bq3.add(bq1, BooleanClause.Occur.MUST);
bq3.add(bq2, BooleanClause.Occur.MUST);
this._assertSirenQuery(config, bq3, "xsd:int([1 TO 10] OR [20 TO 40]) AND xsd:int([10 TO 15] OR [50 TO 55])");
}
@Test
public void testDatatypes4()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
// Set the default datatypes
final Map<String, Analyzer> datatypes = new HashMap<String, Analyzer>();
datatypes.put("xsd:int", new IntNumericAnalyzer(4));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, datatypes);
// Test for custom datatypes
final BooleanQuery bq1 = new BooleanQuery();
final NodePrimitiveQuery range1 = NodeNumericRangeQuery
.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 1, 1, true, true);
range1.setDatatype("xsd:int");
bq1.add(new LuceneProxyNodeQuery(range1), BooleanClause.Occur.MUST);
final NodePrimitiveQuery range2 = NodeNumericRangeQuery
.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 2, 2, true, true);
range2.setDatatype("xsd:int");
bq1.add(new LuceneProxyNodeQuery(range2), BooleanClause.Occur.MUST_NOT);
this._assertSirenQuery(config, bq1, "+xsd:int(1) -xsd:int(2)");
this._assertSirenQuery(config, bq1, "xsd:int(+1 -2)");
}
@Test(expected=AssertionError.class)
public void testNumericDatatypeWrongPrecision()
throws Exception {
final KeywordQueryParser parser = new KeywordQueryParser();
// Set the default datatypes
final Map<String, Analyzer> datatypes = new HashMap<String, Analyzer>();
datatypes.put("int", new IntNumericAnalyzer(4));
parser.setDatatypeAnalyzers(datatypes);
final NodeQuery rangeWrong = NodeNumericRangeQuery.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 42, 12, 21, true, true);
assertEquals(new LuceneProxyNodeQuery(rangeWrong), parser.parse("int([12 TO 21])", SirenTestCase.DEFAULT_TEST_FIELD));
}
/**
* Test for incorrect numeric values at query time.
* <p>
* Numeric ranges get processed with {@link NodeNumericRangeQueryNodeProcessor}.
* Single numeric values are processed with {@link NodeNumericQueryNodeProcessor}.
*/
@Test(expected=QueryNodeException.class)
public void testNumericQuery1()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> datatypes = new HashMap<String, Analyzer>();
datatypes.put("int", new IntNumericAnalyzer(4));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, datatypes);
parse(config, "int([10 TO bla])");
}
/**
* Test for wildcard bounds.
* <p>
* Numeric ranges get processed with {@link NodeNumericRangeQueryNodeProcessor}.
* Single numeric values are processed with {@link NodeNumericQueryNodeProcessor}.
*/
@Test
public void testNumericQuery2()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> datatypes = new HashMap<String, Analyzer>();
datatypes.put("int4", new IntNumericAnalyzer(4));
datatypes.put("float4", new FloatNumericAnalyzer(4));
datatypes.put("long4", new LongNumericAnalyzer(4));
datatypes.put("double4", new DoubleNumericAnalyzer(4));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, datatypes);
// Integer
assertOpenRange(config, "int4");
// Float
assertOpenRange(config, "float4");
// Long
assertOpenRange(config, "long4");
// Double
assertOpenRange(config, "double4");
}
private void assertOpenRange(final HashMap<ConfigurationKey, Object> config,
final String datatype)
throws Exception {
final NodePrimitiveQuery openLeft;
final NodePrimitiveQuery openRight;
if (datatype.equals("int4")) {
openLeft = NodeNumericRangeQuery
.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, null, 10, true, true);
openLeft.setDatatype(datatype);
openRight = NodeNumericRangeQuery
.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 10, null, true, true);
openRight.setDatatype(datatype);
} else if (datatype.equals("float4")) {
openLeft = NodeNumericRangeQuery
.newFloatRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, null, 10f, true, true);
openLeft.setDatatype(datatype);
openRight = NodeNumericRangeQuery
.newFloatRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 10f, null, true, true);
openRight.setDatatype(datatype);
} else if (datatype.equals("long4")) {
openLeft = NodeNumericRangeQuery
.newLongRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, null, 10l, true, true);
openLeft.setDatatype(datatype);
openRight = NodeNumericRangeQuery
.newLongRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 10l, null, true, true);
openRight.setDatatype(datatype);
} else {
openLeft = NodeNumericRangeQuery
.newDoubleRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, null, 10d, true, true);
openLeft.setDatatype(datatype);
openRight = NodeNumericRangeQuery
.newDoubleRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 10d, null, true, true);
openRight.setDatatype(datatype);
}
this._assertSirenQuery(config, new LuceneProxyNodeQuery(openLeft), datatype + "([* TO 10])");
this._assertSirenQuery(config, new LuceneProxyNodeQuery(openRight), datatype + "([10 TO *])");
}
/**
* Boolean of ranges.
* <p>
* Numeric ranges get processed with {@link NodeNumericRangeQueryNodeProcessor}.
* Single numeric values are processed with {@link NodeNumericQueryNodeProcessor}.
*/
@Test
public void testNumericQuery3()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> datatypes = new HashMap<String, Analyzer>();
datatypes.put("int", new IntNumericAnalyzer(4));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, datatypes);
config.put(ConfigurationKeys.DEFAULT_OPERATOR, Operator.OR);
final NodePrimitiveQuery r1 = NodeNumericRangeQuery
.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 50, 100, true, true);
r1.setDatatype("int");
final NodePrimitiveQuery r2 = NodeNumericRangeQuery
.newIntRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 100, 500, true, true);
r2.setDatatype("int");
final BooleanQuery bq = new BooleanQuery();
bq.add(new LuceneProxyNodeQuery(r1), BooleanClause.Occur.SHOULD);
bq.add(new LuceneProxyNodeQuery(r2), BooleanClause.Occur.SHOULD);
this._assertSirenQuery(config, bq, "int([50 TO 100] OR [100 TO 500])");
}
/**
* Test for float.
* <p>
* Numeric ranges get processed with {@link NodeNumericRangeQueryNodeProcessor}.
* Single numeric values are processed with {@link NodeNumericQueryNodeProcessor}.
*/
@Test
public void testNumericQuery4()
throws Exception {
final HashMap<ConfigurationKey, Object> config = new HashMap<ConfigurationKey, Object>();
final Map<String, Analyzer> datatypes = new HashMap<String, Analyzer>();
datatypes.put("float", new FloatNumericAnalyzer(4));
config.put(KeywordConfigurationKeys.DATATYPES_ANALYZERS, datatypes);
final NodePrimitiveQuery q = NodeNumericRangeQuery
.newFloatRange(SirenTestCase.DEFAULT_TEST_FIELD, 4, 50.5f, 1000.34f, true, true);
q.setDatatype("float");
this._assertSirenQuery(config, new LuceneProxyNodeQuery(q), "float([50.5 TO 1000.34])");
}
}