/*
* JBoss DNA (http://www.jboss.org/dna)
* See the COPYRIGHT.txt file distributed with this work for information
* regarding copyright ownership. Some portions may be licensed
* to Red Hat, Inc. under one or more contributor license agreements.
* See the AUTHORS.txt file in the distribution for a full listing of
* individual contributors.
*
* JBoss DNA is free software. Unless otherwise indicated, all code in JBoss DNA
* is licensed to you under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* JBoss DNA is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.jboss.dna.search.lucene;
import static org.hamcrest.core.Is.is;
import static org.hamcrest.core.IsInstanceOf.instanceOf;
import static org.hamcrest.core.IsNull.notNullValue;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Field;
import org.jboss.dna.common.math.Duration;
import org.jboss.dna.common.statistic.Stopwatch;
import org.jboss.dna.graph.ExecutionContext;
import org.jboss.dna.graph.Graph;
import org.jboss.dna.graph.Location;
import org.jboss.dna.graph.Subgraph;
import org.jboss.dna.graph.connector.RepositoryConnection;
import org.jboss.dna.graph.connector.RepositoryConnectionFactory;
import org.jboss.dna.graph.connector.RepositoryContext;
import org.jboss.dna.graph.connector.RepositorySourceException;
import org.jboss.dna.graph.connector.inmemory.InMemoryRepositorySource;
import org.jboss.dna.graph.observe.Changes;
import org.jboss.dna.graph.observe.Observer;
import org.jboss.dna.graph.property.Name;
import org.jboss.dna.graph.property.Path;
import org.jboss.dna.graph.query.QueryResults;
import org.jboss.dna.graph.query.QueryResults.Columns;
import org.jboss.dna.graph.query.model.And;
import org.jboss.dna.graph.query.model.Constraint;
import org.jboss.dna.graph.query.model.Limit;
import org.jboss.dna.graph.query.model.Query;
import org.jboss.dna.graph.query.model.QueryCommand;
import org.jboss.dna.graph.query.model.Selector;
import org.jboss.dna.graph.query.model.SelectorName;
import org.jboss.dna.graph.query.model.Source;
import org.jboss.dna.graph.query.model.TypeSystem;
import org.jboss.dna.graph.query.parse.SqlQueryParser;
import org.jboss.dna.graph.query.process.QueryResultColumns;
import org.jboss.dna.graph.query.validate.ImmutableSchemata;
import org.jboss.dna.graph.query.validate.Schemata;
import org.jboss.dna.graph.request.AccessQueryRequest;
import org.jboss.dna.graph.request.FullTextSearchRequest;
import org.jboss.dna.graph.request.processor.RequestProcessor;
import org.jboss.dna.graph.search.SearchEngineIndexer;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.xml.sax.SAXException;
/**
* These tests verify that the {@link LuceneSearchEngine} is able to properly update the content based upon
* {@link LuceneSearchEngine#index(org.jboss.dna.graph.ExecutionContext, Iterable) observations}.
*/
public class LuceneSearchEngineObservationTest {
private String sourceName;
private String workspaceName1;
private String workspaceName2;
private ExecutionContext context;
private TypeSystem typeSystem;
private InMemoryRepositorySource source;
private InMemoryRepositorySource unsearchedSource;
private RepositoryConnectionFactory connectionFactory;
private Graph content;
private Graph unsearchedContent;
private LuceneSearchEngine searchEngine;
private Schemata schemata;
private SqlQueryParser sql;
private Map<String, Object> variables;
private Stopwatch sw;
/** Controls whether the results from each test should be printed to System.out */
private boolean print = false;
@Before
public void beforeEach() {
context = new ExecutionContext();
typeSystem = context.getValueFactories().getTypeSystem();
workspaceName1 = "cars";
workspaceName2 = "aircraft";
sw = new Stopwatch();
sourceName = "source";
source = new InMemoryRepositorySource();
source.setName(sourceName);
content = Graph.create(source, context);
unsearchedSource = new InMemoryRepositorySource();
unsearchedSource.setName(sourceName);
unsearchedContent = Graph.create(unsearchedSource, context);
// Create the workspaces ...
content.createWorkspace().named(workspaceName1);
content.createWorkspace().named(workspaceName2);
unsearchedContent.createWorkspace().named(workspaceName1);
unsearchedContent.createWorkspace().named(workspaceName2);
connectionFactory = new RepositoryConnectionFactory() {
/**
* {@inheritDoc}
*
* @see org.jboss.dna.graph.connector.RepositoryConnectionFactory#createConnection(java.lang.String)
*/
@SuppressWarnings( "synthetic-access" )
public RepositoryConnection createConnection( String name ) throws RepositorySourceException {
assertThat(sourceName, is(name));
return source.getConnection();
}
};
// Set up the provider and the search engine ...
IndexRules.Builder rulesBuilder = IndexRules.createBuilder(LuceneSearchEngine.DEFAULT_RULES);
rulesBuilder.defaultTo(Field.Store.YES, Field.Index.NOT_ANALYZED);
rulesBuilder.stringField(name("model"), Field.Store.YES, Field.Index.ANALYZED);
rulesBuilder.integerField(name("year"), Field.Store.YES, Field.Index.NOT_ANALYZED, 1990, 2020);
rulesBuilder.floatField(name("userRating"), Field.Store.YES, Field.Index.NOT_ANALYZED, 0.0f, 10.0f);
rulesBuilder.integerField(name("mpgCity"), Field.Store.YES, Field.Index.NOT_ANALYZED, 0, 50);
rulesBuilder.integerField(name("mpgHighway"), Field.Store.YES, Field.Index.NOT_ANALYZED, 0, 50);
// rulesBuilder.analyzeAndStoreAndFullText(name("maker"));
IndexRules rules = rulesBuilder.build();
LuceneConfiguration luceneConfig = LuceneConfigurations.inMemory();
// LuceneConfiguration luceneConfig = LuceneConfigurations.using(new File("target/testIndexes"));
Analyzer analyzer = null;
searchEngine = new LuceneSearchEngine(sourceName, connectionFactory, false, luceneConfig, rules, analyzer);
// Initialize the source so that the search engine observes the events ...
@SuppressWarnings( "synthetic-access" )
RepositoryContext repositoryContext = new RepositoryContext() {
public Subgraph getConfiguration( int depth ) {
return null;
}
public ExecutionContext getExecutionContext() {
return context;
}
public RepositoryConnectionFactory getRepositoryConnectionFactory() {
return connectionFactory;
}
public Observer getObserver() {
return new Observer() {
public void notify( Changes changes ) {
// -----------------------------------------------------------
// NOTE THAT THE SEARCH ENGINE IS UPDATED IN-THREAD !!!!!!!!!!
// -----------------------------------------------------------
// This means the indexing should be done before the graph operations return
searchEngine.index(context, changes.getChangeRequests());
}
};
}
};
source.initialize(repositoryContext);
// Create the schemata for the workspaces ...
schemata = ImmutableSchemata.createBuilder(typeSystem)
.addTable("__ALLNODES__", "maker", "model", "year", "msrp", "mpgHighway", "mpgCity")
.makeSearchable("__ALLNODES__", "maker")
.build();
// And create the SQL parser ...
sql = new SqlQueryParser();
variables = new HashMap<String, Object>();
}
@After
public void afterEach() {
searchEngine = null;
content = null;
context = null;
source = null;
}
protected Name name( String name ) {
return context.getValueFactories().getNameFactory().create(name);
}
protected Path path( String path ) {
return context.getValueFactories().getPathFactory().create(path);
}
protected void loadContent( Graph graph ) {
try {
// Load the content ...
graph.useWorkspace(workspaceName1);
graph.importXmlFrom(getClass().getClassLoader().getResourceAsStream("cars.xml")).into("/");
graph.useWorkspace(workspaceName2);
graph.importXmlFrom(getClass().getClassLoader().getResourceAsStream("aircraft.xml")).into("/");
} catch (IOException e) {
fail(e.getMessage());
} catch (SAXException e) {
fail(e.getMessage());
}
}
protected void assertRowCount( QueryResults results,
int rowCount ) {
assertThat(results.getProblems().isEmpty(), is(true));
if (print) {
System.out.println(results);
}
assertThat(results.getTuples().size(), is(rowCount));
}
protected QueryResults search( String workspaceName,
String searchExpression,
int maxResults,
int offset ) {
RequestProcessor processor = searchEngine.createProcessor(context, null, true);
try {
FullTextSearchRequest request = new FullTextSearchRequest(searchExpression, workspaceName, maxResults, offset);
processor.process(request);
if (request.hasError()) {
fail(request.getError().getMessage());
return null;
}
assertThat(request.getResultColumns().getColumnCount(), is(0));
assertThat(request.getResultColumns().getLocationCount(), is(1));
assertThat(request.getResultColumns().hasFullTextSearchScores(), is(true));
// Convert the results to a List<Location>
List<Object[]> tuples = request.getTuples();
List<Location> results = new ArrayList<Location>(tuples.size());
for (Object[] tuple : tuples) {
results.add((Location)tuple[0]);
Float score = (Float)tuple[1];
assertThat(score, is(notNullValue()));
}
return new org.jboss.dna.graph.query.process.QueryResults(request.getResultColumns(), request.getStatistics(),
request.getTuples());
} finally {
processor.close();
}
}
protected QueryResults query( String workspaceName,
String sql ) {
QueryCommand command = this.sql.parseQuery(sql, typeSystem);
assertThat(command, is(instanceOf(Query.class)));
Query query = (Query)command;
Source source = query.getSource();
assertThat(source, is(instanceOf(Selector.class)));
SelectorName tableName = ((Selector)source).getName();
Constraint constraint = query.getConstraint();
Columns resultColumns = new QueryResultColumns(query.getColumns(), QueryResultColumns.includeFullTextScores(constraint));
List<Constraint> andedConstraints = getAndedConstraint(constraint, new ArrayList<Constraint>());
Limit limit = query.getLimits();
RequestProcessor processor = searchEngine.createProcessor(context, null, true);
try {
AccessQueryRequest request = new AccessQueryRequest(workspaceName, tableName, resultColumns, andedConstraints, limit,
schemata, variables);
processor.process(request);
if (request.hasError()) {
request.getError().printStackTrace(System.out);
fail(request.getError().getMessage());
}
return new org.jboss.dna.graph.query.process.QueryResults(request.resultColumns(), request.getStatistics(),
request.getTuples());
} finally {
processor.close();
}
}
protected List<Constraint> getAndedConstraint( Constraint constraint,
List<Constraint> andedConstraints ) {
if (constraint != null) {
if (constraint instanceof And) {
And and = (And)constraint;
getAndedConstraint(and.getLeft(), andedConstraints);
getAndedConstraint(and.getRight(), andedConstraints);
} else {
andedConstraints.add(constraint);
}
}
return andedConstraints;
}
@Test
public void shouldInitializeWithoutAddingContentToSource() {
}
@Test
public void shouldEstimateTimeToIndexContent() {
// Prime the reading of the files ...
InMemoryRepositorySource prime = new InMemoryRepositorySource();
prime.setName(sourceName);
Graph primeGraph = Graph.create(prime, context);
primeGraph.createWorkspace().named(workspaceName1);
primeGraph.createWorkspace().named(workspaceName2);
// Prime the search engine ...
sw.reset();
sw.start();
SearchEngineIndexer indexer = new SearchEngineIndexer(context, searchEngine, connectionFactory);
indexer.indexAllWorkspaces();
indexer.close();
sw.stop();
Duration zeroth = sw.getTotalDuration();
System.out.println("Time to prime search engine: " + zeroth);
// First load the content into the unsearched source ...
sw.reset();
sw.start();
loadContent(unsearchedContent);
sw.stop();
Duration first = sw.getTotalDuration();
// Now load the same content into the searchable source ...
sw.reset();
sw.start();
loadContent(content);
sw.stop();
Duration second = sw.getTotalDuration();
// And measure the time required to re-index ...
sw.reset();
sw.start();
indexer = new SearchEngineIndexer(context, searchEngine, connectionFactory);
indexer.indexAllWorkspaces();
indexer.close();
sw.stop();
Duration third = sw.getTotalDuration();
int percentOfLoading = (int)(((second.floatValue() / first.floatValue())) * 100.0f);
System.out.println("Time to load content without indexing: " + first);
System.out.println("Time to load content and updating indexes: " + second + " (" + percentOfLoading
+ "% of loading w/o indexing)");
Duration loadingDiff = second.subtract(first);
System.out.println("Time to update indexes during loading: " + loadingDiff);
int percentChange = (int)((((third.floatValue() - loadingDiff.floatValue()) / loadingDiff.floatValue())) * 100.0f);
if (percentChange >= 0) {
System.out.println("Time to re-index all content: " + third + " (" + percentChange
+ "% more than indexing time during loading)");
} else {
System.out.println("Time to re-index all content: " + third + " (" + percentChange
+ "% less than indexing time during loading)");
}
// Make sure we're finding the results ...
// print = true;
QueryResults results = search(workspaceName1, "Toyota Prius", 10, 0);
assertThat(results, is(notNullValue()));
assertRowCount(results, 2);
Location location1 = (Location)(results.getTuples().get(0)[0]);
Location location2 = (Location)(results.getTuples().get(1)[0]);
assertThat(location1.getPath(), is(path("/Cars/Hybrid/Toyota Prius")));
assertThat(location2.getPath(), is(path("/Cars/Hybrid/Toyota Highlander")));
}
@Test
public void shouldUpdateIndexesWhenPropertiesAreSetOnRootInSource() {
content.set("year").on("/").to("2009").and();
}
@Test
public void shouldUpdateIndexesWhenMultipleNodesAreAdded() {
content.batch()
.create("/TheEnzo")
.with("year", 2009)
.and("model", "Enzo")
.and()
.create("/TheEsto")
.with("year", 2009)
.and("model", "Esto")
.and()
.execute();
}
@Test
public void shouldUpdateIndexesWhenDeletingNodesInSource() {
loadContent(content);
// Make sure we're finding the results ...
QueryResults results = search(workspaceName1, "Toyota Prius", 10, 0);
assertThat(results, is(notNullValue()));
assertRowCount(results, 2);
Location location1 = (Location)(results.getTuples().get(0)[0]);
Location location2 = (Location)(results.getTuples().get(1)[0]);
assertThat(location1.getPath(), is(path("/Cars/Hybrid/Toyota Prius")));
assertThat(location2.getPath(), is(path("/Cars/Hybrid/Toyota Highlander")));
String query = "SELECT model, maker FROM __ALLNODES__ WHERE PATH() LIKE '/Cars[%]/Hy%/Toyota%' OR PATH() LIKE '/Cars[1]/Utility[1]/%'";
results = query(workspaceName1, query);
assertRowCount(results, 6);
content.useWorkspace(workspaceName1);
content.delete("/Cars/Hybrid/Toyota Prius");
// Make sure we don't find the 'Prius' anymore, but we still should find the 'Highlander' ...
results = search(workspaceName1, "Toyota Prius", 10, 0);
assertThat(results, is(notNullValue()));
assertRowCount(results, 1);
location1 = (Location)(results.getTuples().get(0)[0]);
assertThat(location1.getPath(), is(path("/Cars/Hybrid/Toyota Highlander")));
query = "SELECT model, maker FROM __ALLNODES__ WHERE PATH() LIKE '/Cars[%]/Hy%/Toyota%' OR PATH() LIKE '/Cars[1]/Utility[1]/%'";
results = query(workspaceName1, query);
assertRowCount(results, 5);
}
@Test
public void shouldUpdateIndexesWhenUpdatingPropertiesInSource() {
loadContent(content);
// Make sure we're finding the results ...
QueryResults results = search(workspaceName1, "Toyota Prius", 10, 0);
assertThat(results, is(notNullValue()));
assertRowCount(results, 2);
Location location1 = (Location)(results.getTuples().get(0)[0]);
Location location2 = (Location)(results.getTuples().get(1)[0]);
assertThat(location1.getPath(), is(path("/Cars/Hybrid/Toyota Prius")));
assertThat(location2.getPath(), is(path("/Cars/Hybrid/Toyota Highlander")));
String query = "SELECT model, maker, year FROM __ALLNODES__ WHERE PATH() LIKE '/Cars[1]/Utility[1]/Ford F-150[1]' AND year = 2008";
results = query(workspaceName1, query);
assertRowCount(results, 1);
content.useWorkspace(workspaceName1);
content.set("year").on("/Cars/Utility/Ford F-150").to(2011).and();
// Make sure we DO find the F-150 with the updated year ...
query = "SELECT model, maker, year FROM __ALLNODES__ WHERE PATH() LIKE '/Cars[1]/Utility[1]/Ford F-150[1]' AND year = 2011";
results = query(workspaceName1, query);
assertRowCount(results, 1);
// Make sure we do NOT find the F-150 anymore with the old year ...
query = "SELECT model, maker, year FROM __ALLNODES__ WHERE PATH() LIKE '/Cars[1]/Utility[1]/Ford F-150[1]' AND year = 2008";
results = query(workspaceName1, query);
assertRowCount(results, 0);
// We should find this since it still matches the criteria ...
query = "SELECT model, maker, year FROM __ALLNODES__ WHERE PATH() LIKE '/Cars[1]/Utility[1]/Ford F-150[1]' AND year >= 2010";
results = query(workspaceName1, query);
assertRowCount(results, 1);
// Try some queries that should NOT work ...
query = "SELECT model, maker, year FROM __ALLNODES__ WHERE year <= 1899";
results = query(workspaceName1, query);
assertRowCount(results, 0);
}
}