/*******************************************************************************
* Copyright 2009, 2010 Innovation Gate GmbH. All Rights Reserved.
*
* This file is part of the OpenWGA server platform.
*
* OpenWGA is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* In addition, a special exception is granted by the copyright holders
* of OpenWGA called "OpenWGA plugin exception". You should have received
* a copy of this exception along with OpenWGA in file COPYING.
* If not, see <http://www.openwga.com/gpl-plugin-exception>.
*
* OpenWGA is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenWGA in file COPYING.
* If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
package de.innovationgate.wgpublisher.lucene;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.lang.ref.Reference;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.WeakReference;
import java.text.DateFormat;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.Semaphore;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import javax.servlet.http.HttpServletRequest;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.QueryTermExtractor;
import org.apache.lucene.search.highlight.QueryTermScorer;
import org.apache.lucene.search.highlight.WeightedTerm;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import de.innovationgate.utils.WGUtils;
import de.innovationgate.webgate.api.MetaInfo;
import de.innovationgate.webgate.api.WGAPIException;
import de.innovationgate.webgate.api.WGBackendException;
import de.innovationgate.webgate.api.WGConfigurationException;
import de.innovationgate.webgate.api.WGContent;
import de.innovationgate.webgate.api.WGContentEvent;
import de.innovationgate.webgate.api.WGContentEventListener;
import de.innovationgate.webgate.api.WGContentIterator;
import de.innovationgate.webgate.api.WGContentKey;
import de.innovationgate.webgate.api.WGDatabase;
import de.innovationgate.webgate.api.WGDatabaseConnectListener;
import de.innovationgate.webgate.api.WGDatabaseEvent;
import de.innovationgate.webgate.api.WGDocument;
import de.innovationgate.webgate.api.WGFactory;
import de.innovationgate.webgate.api.WGIllegalArgumentException;
import de.innovationgate.webgate.api.WGLanguage;
import de.innovationgate.webgate.api.WGQueryException;
import de.innovationgate.webgate.api.WGRelationData;
import de.innovationgate.webgate.api.WGResultSet;
import de.innovationgate.webgate.api.WGStructEntry;
import de.innovationgate.webgate.api.WGSystemException;
import de.innovationgate.webgate.api.WGUnavailableException;
import de.innovationgate.webgate.api.WGUpdateLog;
import de.innovationgate.webgate.api.WGWrongRevisionException;
import de.innovationgate.wga.common.beans.LuceneConfiguration;
import de.innovationgate.wga.common.beans.LuceneIndexFileRule;
import de.innovationgate.wga.common.beans.LuceneIndexItemRule;
import de.innovationgate.wga.common.beans.csconfig.v1.PluginConfig;
import de.innovationgate.wga.config.ContentDatabase;
import de.innovationgate.wga.config.ContentStore;
import de.innovationgate.wga.config.LuceneManagerConfiguration;
import de.innovationgate.wgpublisher.WGACore;
import de.innovationgate.wgpublisher.lucene.analysis.FileHandler;
import de.innovationgate.wgpublisher.lucene.analysis.FileHandlerException;
public class LuceneManager implements WGContentEventListener, WGDatabaseConnectListener {
public static final String VIRTUALMETA_PATH = "PATH";
public static final String VIRTUALMETA_PARENT = "PARENT";
public static final String VIRTUALMETA_AREA = "AREA";
public static final String VIRTUALMETA_CONTENTTYPE = "CONTENTTYPE";
public static final String VIRTUALMETA_KEY = "KEY";
public static final String VIRTUALMETA_DBKEY = "DBKEY";
public static final String[] VIRTUALMETAS = new String[] {
VIRTUALMETA_AREA,
VIRTUALMETA_CONTENTTYPE,
VIRTUALMETA_DBKEY,
VIRTUALMETA_KEY,
VIRTUALMETA_PARENT,
VIRTUALMETA_PATH
};
/**
* search_scope for lucene search
* Set to String SEARCHSCOPE_DOMAIN, SEARCHSCOPE_WGA, SEARCHSCOPE_DB, SEARCHSCOPE_DB_LIST
*/
public static final String QUERYOPTION_SEARCHSCOPE = "lucene_search_scope";
/**
* comma separated list of db_keys to search in if search scope is SEARCHSCOPE_DB_LIST
*/
public static final String QUERYOPTION_SEARCHDBKEYS = "lucene_search_db_keys";
/**
* if this option is given as native query option - virtual content will be included in search results
* per default virtual content is excluded
*/
public static final String NATIVE_QUERYOPTION_INCLUDEVIRTUALCONTENT = "includeVirtualContent";
/**
* values for QUERYOPTION_SEARCHSCOPE
*/
public static final String SEARCHSCOPE_DOMAIN = "lucene_search_scope_domain";
public static final String SEARCHSCOPE_WGA = "lucene_search_scope_wga";
public static final String SEARCHSCOPE_DB = "lucene_search_scope_db";
public static final String SEARCHSCOPE_DB_LIST = "lucene_search_scope_db_list";
/**
* constants for indexing internal fields
*/
private static final String INDEXFIELD_UNIQUEKEY = "$LUCENE_KEY";
private static final String INDEXFIELD_DOCUMENTKEY = "$LUCENE_DOCUMENTKEY";
// also used by LuceneMultiDBResultset
public static final String INDEXFIELD_DBKEY = "$LUCENE_DBKEY";
public static final String INDEXFIELD_CONTENTKEY = "$LUCENE_CONTENTKEY";
public static final String INDEXFIELD_ISVIRTUALCONTENT = "$LUCENE_ISVIRTUALCONTENT";
public static final String INDEXFIELD_ALLCONTENT = "allcontent";
public static final String INDEXFIELD_ALLATTACHMENTS = "allattachments";
private static final String SORTITEM_PREFIX = "$sort_";
// how to index empty dates validFrom and validTo
private static final String EMPTY_VALID_FROM = "00000000000000";
private static final String EMPTY_VALID_TO = "99999999999999";
public static final String SYSPROP_DISABLE_AUTO_INDEX_OPTIMIZATION = "de.innovationgate.wga.lucene.disableAutoIndexOptimization";
private WGACore _core;
private Indexer _indexer;
private File _dir;
private Directory _indexDirectory;
private boolean _rebuildingIndex = false;
private LuceneIndexConfiguration _indexConfig;
// meta keywords for lucene index
// contains metas of an indexDocument which are indexed as keyword
// used by IndexRuleBasedQueryParser during search()
private Set _metaKeywordFields;
// stores all indexed dbKeys as key and contains indexingrules as value
private HashMap _indexedDbs = new HashMap();
private boolean _indexerIsRunning;
private int _booleanQueryMaxClauseCount = BooleanQuery.getMaxClauseCount();
private int _maxDocsPerDBSession = 50;
public static final String TAGINFO_UNSPECIFICQUERY = "LUCENE_TAGINFO_UNSPECIFICQUERY";
public static final String TAGINFO_SIMPLIFIEDQUERY = "LUCENE_TAGINFO_SIMPLYFIEDQUERY";
public static final String RELATION_PREFIX = "$rel_";
// contains opened resultssets per thread
private ThreadLocal _resultsetList = new ThreadLocal();
private boolean _shutdownRequested = false;
// flag if manager has been started (method startup has been called on this instance)
private boolean _started = false;
private static DecimalFormat DECIMALFORMAT_SORTFIELD;
private Object _indexWriteLock = new Object();
static {
//build padding string
String format = "";
String maxLong = new Long(Long.MAX_VALUE).toString();
for (int i=0; i < maxLong.length(); i++) {
format += "0";
}
format += ".";
for (int i=0; i < maxLong.length(); i++) {
format += "#";
}
// create dcf for padding
DECIMALFORMAT_SORTFIELD = new DecimalFormat(format);
};
private static DateFormat DATEFORMAT_SORTFIELD = new SimpleDateFormat("yyyyMMddHHmmss");
private static DateFormat DATEFORMAT_KEYWORD = new SimpleDateFormat("yyyyMMddHHmmss");
// B000047B2
private static NumberFormat NUMBERFORMAT_KEYWORD = DecimalFormat.getNumberInstance(java.util.Locale.ENGLISH);
static {
NUMBERFORMAT_KEYWORD.setGroupingUsed(false);
NUMBERFORMAT_KEYWORD.setMaximumFractionDigits(Integer.MAX_VALUE);
NUMBERFORMAT_KEYWORD.setMaximumIntegerDigits(Integer.MAX_VALUE);
}
// shared index searcher
private volatile IndexSearcher _indexSearcher;
public static final int MAX_CONCURRENT_SEARCHES = 1000;
private final Semaphore _indexSearcherSemaphore = new Semaphore(MAX_CONCURRENT_SEARCHES, true);
/**
* info bean for an indexing run
* used in performAdditions and performDeletions to determine the status of the current indexupdate
*
*/
private class IndexingProcessInfo {
int _deletedDocs = 0;
int _addedDocs = 0;
int _reinsertedRequests = 0;
int _skippedRequests = 0;
public int getAddedDocs() {
return _addedDocs;
}
public int getDeletedDocs() {
return _deletedDocs;
}
public int getReinsertedRequests() {
return _reinsertedRequests;
}
public int getSkippedRequests() {
return _skippedRequests;
}
public void docAdded() {
_addedDocs++;
}
public void docDeleted() {
_deletedDocs++;
}
public void requestReinserted() {
_reinsertedRequests++;
}
public void requestSkipped() {
_skippedRequests++;
}
}
public static LuceneManager retrieve(WGACore core, LuceneManagerConfiguration config) throws IllegalArgumentException, IOException, DocumentException, WGSystemException {
String lucenePath = config.getPath();
File dirFile = core.getOrCreateWGAFolder(lucenePath);
if (dirFile == null || !dirFile.isDirectory()) {
throw new IllegalArgumentException("The file '" + config.getPath() + "' does not exist or is no directory");
}
LuceneManager manager = new LuceneManager(core, dirFile);
manager.setBooleanQueryMaxClauseCount(config.getMaxBooleanClauseCount());
manager.setMaxDocsPerDBSession(config.getMaxDocsPerDBSession());
return manager;
}
private LuceneManager(WGACore core, File dir) throws IOException, DocumentException, WGSystemException {
// Set lock directory to WGA temp dir
System.setProperty("org.apache.lucene.lockDir", core.getWgaTempDir().getPath());
_dir = dir;
_core = core;
// init metakeywords - used during search analysis
Iterator metaInfos = WGFactory.getInstance().getMetaInfos(WGContent.class).values().iterator();
_metaKeywordFields = new HashSet();
while (metaInfos.hasNext()) {
MetaInfo metaInfo = (MetaInfo) metaInfos.next();
//B0000485A
if (metaInfo.getLuceneIndexType() == MetaInfo.LUCENE_INDEXTYPE_KEYWORD) {
if (!_metaKeywordFields.contains(metaInfo.getName())) {
_metaKeywordFields.add(metaInfo.getName());
Iterator synonyms = metaInfo.getSynonyms().iterator();
while (synonyms.hasNext()) {
String synonym = (String) synonyms.next();
_metaKeywordFields.add(synonym);
}
}
}
}
// Add virtual metas
_metaKeywordFields.addAll(Arrays.asList(VIRTUALMETAS));
this.init();
}
private void init() throws IOException, DocumentException {
_indexer = new Indexer();
_timer = new Timer();
_timer.schedule(_indexer, 1000 * 60, 1000 * 5);
_indexConfig = new LuceneIndexConfiguration(_core, _dir);
_indexDirectory = FSDirectory.open(_dir);
if (_indexConfig.isNewConfiguration()) {
IndexWriter writer = new IndexWriter(_indexDirectory, null, true, MaxFieldLength.LIMITED);
writer.close();
}
// Try to remove an existing lock
if (IndexWriter.isLocked(_indexDirectory)) {
_core.getLog().warn("Lucene index directory was locked. Removing lock now");
IndexWriter.unlock(_indexDirectory);
}
}
/**
* disables the timer task, deletes the index_dir and reinit index
*/
public void rebuildIndex() throws IOException, DocumentException {
try {
if (_rebuildingIndex) {
_core.getLog().warn("Rebuilding of lucene index already in progress, please wait ...");
throw new IllegalStateException("Rebuilding of lucene index already in progress, please wait ...");
}
_core.getLog().info("Rebuilding lucene index started ...");
_rebuildingIndex = true;
// destroy timer task
if (_timer != null) {
_timer.cancel();
_timer = null;
}
// stop indexer
_indexer = null;
// Delete all files in _dir
File[] indexFiles = _dir.listFiles();
if (indexFiles != null) {
for (int i = 0; i < indexFiles.length; i++) {
File file = indexFiles[i];
if (!file.delete()) {
_core.getLog().error("Could not delete lucene index file '" + file.getAbsolutePath() + "'");
_rebuildingIndex = false;
throw new IOException("Could not delete lucene index file '" + file.getAbsolutePath() + "'.");
}
}
}
// reinit
this.init();
this.startup();
}
catch (IOException e) {
_rebuildingIndex = false;
throw e;
}
catch (DocumentException e) {
_rebuildingIndex = false;
throw e;
}
}
/**
* reindex given dbkey
* @param dbkey
*/
public void rebuildIndex(String dbkey) {
addDBUpdateRequest(new IndexingRequest(dbkey, null, true));
}
public void destroy() {
// destroy timer task
if (_timer != null) {
_timer.cancel();
// notify indexer about shutdown
_shutdownRequested = true;
// wait until current timerTask is completed to allow a lucene to close all index writers
_core.getLog().info("Waiting for lucene to shutdown...");
while (_indexerIsRunning) {
// wait
Thread.yield();
try {
Thread.sleep(1000);
}
catch (InterruptedException e) {
//ignore
}
}
_timer = null;
_core.getLog().info("Lucene finished shutdown.");
}
}
/**
* if wga starts up it notifies lucene by this method
*/
public synchronized void startup() {
_started = true;
// load indexed dbs from configfile
_indexedDbs = _indexConfig.retrieveIndexedDbs();
/*
* not necessary here - already done in configurationHasChanged()
*
// for each indexed db, register eventlistener if db is connected to the core
Iterator indexedDBKeys = _indexedDbs.keySet().iterator();
while (indexedDBKeys.hasNext()) {
String dbKey = (String) indexedDBKeys.next();
WGDatabase db = (WGDatabase) _core.getContentdbs().get(dbKey);
if (db != null) {
registerForContentEvents(db);
}
else {
_core.getLog().info("Indexed db " + dbKey + " is not connected to WGA");
}
}*/
configurationHasChanged(null);
}
/**
* wga-configuration has changed, read new configuration and do necessary index updates
*/
public synchronized void configurationHasChanged(Set newConnectedDBKeys) {
if (!_started) {
// skip config update if lucene manager has not been started (method startup called) yet
// this happens on an initial WGA startup
return;
}
// check if each DB in _indexedDBKeys is in configfile and enabled by wga
// if not create dropRequest
Iterator itIndexedDbKeys = _indexedDbs.keySet().iterator();
while (itIndexedDbKeys.hasNext()) {
String dbKey = (String) itIndexedDbKeys.next();
ContentStore dbConfig = _core.getWgaConfiguration().getContentStore(dbKey);
if (dbConfig == null) {
// indexed db not found in config, remove db and drop from index
removeDatabase(dbKey, true);
// remove from indexed dbs, cannot be done in removeDatabase() because of current iteration over indexedDbKeys
//itIndexedDbKeys.remove(); // now done in removeDatabase via copy-replace
} else if ( !dbConfig.isEnabled() ) {
// if db was disabled, only remove from indexedDbs - do not drop index
//itIndexedDbKeys.remove();
removeDatabase(dbKey, false);
}
}
// get all active databases from core
Iterator contentDbs = _core.getContentdbs().values().iterator();
while (contentDbs.hasNext()) {
WGDatabase db = (WGDatabase) contentDbs.next();
// use db only if it is a real contentStore (has feature FULLCONTENTFEATURES)
if ( (db != null) && (db.hasFeature(WGDatabase.FEATURE_FULLCONTENTFEATURES)) ) {
// WGA Plugins are not fulltext indexed
if (db.getDbReference().startsWith(PluginConfig.PLUGIN_DBKEY_PREFIX)) {
continue;
}
// If db not yet connected, listen for connect event and execute this method again when it happens
if (!db.isConnected()) {
db.addDatabaseConnectListener(this);
continue;
}
createOrUpdateDBIndex(db, newConnectedDBKeys);
}
}
}
private void createOrUpdateDBIndex(WGDatabase db, Set newConnectedDBKeys) {
LuceneConfiguration config;
// if database is already in index on the filesystem
HashMap currentDBsInIndexOnFileSystem = _indexConfig.retrieveIndexedDbs();
String dbKey = db.getDbReference();
if (currentDBsInIndexOnFileSystem.containsKey(dbKey)) {
LuceneConfiguration oldConfig = (LuceneConfiguration) currentDBsInIndexOnFileSystem.get(dbKey);
LuceneConfiguration newConfig = retrieveLuceneConfig(dbKey);
// if configuration has changed
if (!oldConfig.equals(newConfig)) {
// update database
updateDatabase(dbKey, newConfig);
if (newConfig.isEnabled()) {
registerForContentEvents(db);
// rebuild index of db
_core.getLog().info("Indexing rules for db '" + dbKey + "' has changed. Indexed content will be truncated and reindexed ...");
addDBUpdateRequest(new IndexingRequest(dbKey, null, true));
} else {
// remove db and drop from index
removeDatabase(dbKey, true);
// remove from indexed dbs - this is not done by removeDatabase because of list iteration issue see creating droprequests above
//_indexedDbs.remove(dbKey); // now done in removeDatabase via copy-replace
}
} else {
if (newConnectedDBKeys == null) {
// this is an initial startup - perform incremental updates
registerForContentEvents(db);
addDBUpdateRequest(new IndexingRequest(dbKey, null));
} else if (newConnectedDBKeys.contains(dbKey)) {
// perform incremental update bc. db was connected within this configurationupdate
registerForContentEvents(db);
addToIndexedDbs(dbKey, newConfig);
addDBUpdateRequest(new IndexingRequest(dbKey, null));
}
}
} else {
// if db is not yet indexed --> check config
config = retrieveLuceneConfig(dbKey);
if ( (config != null) && (config.isEnabled()) ) {
// add db
addDatabase(db, config);
}
}
}
private void registerForContentEvents(WGDatabase db) {
if (db != null) {
// ensure that we are registered as contentEventListener
_core.getLog().info("Lucene registered for receiving content changes from db " + db.getDbReference());
// first remove, than register to ensure not to register twice
db.removeContentEventListener(this);
db.addContentEventListener(this);
} else {
_core.getLog().warn("Lucene is unable to register as ContentEventListener for 'null' db.");
}
}
/**
* adds an database to lucene
* @param db WGDatabase
* @param config LuceneConfiguration
*/
private void addDatabase(WGDatabase db, LuceneConfiguration config) {
_core.getLog().info("Lucene index enabled for db '" + db.getDbReference() + "'.");
//update configfile
_indexConfig.addDBConfig(db.getDbReference(), config);
registerForContentEvents(db);
addToIndexedDbs(db.getDbReference(), config);
addDBUpdateRequest(new IndexingRequest(db.getDbReference(), null));
}
private void addToIndexedDbs(String dbkey, LuceneConfiguration config) {
synchronized (_indexedDbs) {
// copy indexed dbs
HashMap newIndexedDbs = new HashMap();
newIndexedDbs.putAll(_indexedDbs);
// put new db
newIndexedDbs.put(dbkey, config);
// switch maps
_indexedDbs = newIndexedDbs;
}
}
private void removeFromIndexedDbs(String dbkey) {
synchronized (_indexedDbs) {
// copy indexed dbs
HashMap newIndexedDbs = new HashMap();
newIndexedDbs.putAll(_indexedDbs);
// remove in copy
newIndexedDbs.remove(dbkey);
// switch maps
_indexedDbs = newIndexedDbs;
}
}
private void removeDatabase(String dbKey, boolean dropindex) {
if (dropindex) {
_core.getLog().info("Indexed db '" + dbKey + "' was removed from wga or disabled for lucene. DB will be dropped from index ...");
addDropDBRequest(new IndexingRequest(dbKey, null));
_additionRequestsMap.remove(dbKey);
}
// try to remove contentEventListener
WGDatabase db = (WGDatabase) _core.getContentdbs().get(dbKey);
if (db != null) {
_core.getLog().info("Removing '" + this.getClass().getName() + "' as ContentEventListener for db " + db.getDbReference() + ".");
db.removeContentEventListener(this);
}
// remove db from indexedDbs
removeFromIndexedDbs(dbKey);
}
private void updateDatabase(String dbKey, LuceneConfiguration config) {
addToIndexedDbs(dbKey, config);
_indexConfig.updateDBConfig(dbKey, config);
}
/**
* get LuceneConfiguration from config file
* @param dbKey DatabaseKey
* @return LuceneConfiguration, null if not found
*/
public LuceneConfiguration retrieveLuceneConfig(String dbKey) {
//Create Lucene Config
ContentStore dbConfig = _core.getWgaConfiguration().getContentStore(dbKey);
if (dbConfig != null) {
de.innovationgate.wga.config.LuceneIndexConfiguration luceneIndexConfig = dbConfig.getLuceneIndexConfiguration();
if (luceneIndexConfig != null) {
LuceneConfiguration config = new LuceneConfiguration();
config.setEnabled(luceneIndexConfig.isEnabled());
List itemRules = LuceneIndexItemRule.getRules(luceneIndexConfig.getItemRules());
List fileRules = LuceneIndexFileRule.getRules(luceneIndexConfig.getFileRules());
config.setItemRules(itemRules);
config.setFileRules(fileRules);
return config;
} else {
_core.getLog().error("Could not retrieve lucene config for db '" + dbKey + "'");
return null;
}
} else {
_core.getLog().error("Could not retrieve lucene config for db '" + dbKey + "'");
return null;
}
}
private synchronized void addAdditionRequest(IndexingRequest request) {
// add to list and set service status running for DB
LinkedList requests = (LinkedList) _additionRequestsMap.get(request.getDbkey());
if (requests == null) {
requests = new LinkedList();
_additionRequestsMap.put(request.getDbkey(), requests);
}
requests.add(request);
}
private void addDBUpdateRequest(IndexingRequest request) {
// if request is not yet in list
if (!_dbUpdateRequests.contains(request)) {
// add to list
_dbUpdateRequests.add(request);
// if index rebuild for this db is needed - we should clear the lastupdate flag in index configuration
// this will ensure that if wga is terminated before the index rebuild finished
// the index will be rebuilded on the next wga runtime
if (request.isIndexRebuildNeeded()) {
_indexConfig.clearLastUpdated(request.getDbkey());
}
}
}
private synchronized void addDeletionRequest(IndexingRequest request) {
// add to list and set service status running for DB
LinkedList requests = (LinkedList) _deletionRequestsMap.get(request.getDbkey());
if (requests == null) {
requests = new LinkedList();
_deletionRequestsMap.put(request.getDbkey(), requests);
}
requests.add(request);
}
private void addDropDBRequest(IndexingRequest request) {
// add to list and set service status running for DB
_dropDBRequests.add(request);
}
private void addTruncateDBRequest(IndexingRequest request) {
// add to list and set service status running for DB
_truncateDBRequests.add(request);
}
public void optimizeIndex() {
synchronized (_indexWriteLock) {
IndexWriter writer = null;
try {
writer = new IndexWriter(_dir, null, false);
_core.getLog().info("optimizing lucene index ...");
long start = System.currentTimeMillis();
writer.optimize();
long end = System.currentTimeMillis();
_core.getLog().info("optimizing lucene index done in " + (end - start) + " ms.");
} catch (IOException e) {
_core.getLog().error(
"Unable to perform index optimization.");
} finally {
if (writer != null) {
try {
writer.close();
writer = null;
} catch (IOException e1) {
_core.getLog().error("Unable to close lucene index writer because of I/O exception: '" + e1.getMessage());
}
}
}
}
}
/**
* @param content
* @return configured lucene analyzer for content language or default analyzer
* @throws WGAPIException
*/
public Analyzer retrieveAnalyzer(WGContent content) throws WGAPIException {
String langCode = content.getLanguage().getName();
Analyzer analyzer = null;
if (langCode != null) {
analyzer = _core.getAnalyzerForLanguageCode(langCode);
if (analyzer == null) {
analyzer = _core.getDefaultAnalyzer();
}
} else {
analyzer = _core.getDefaultAnalyzer();
}
return analyzer;
}
Timer _timer = new Timer();
class Indexer extends TimerTask {
private IndexWriter _writer;
private IndexReader _reader;
public void run() {
try {
_indexerIsRunning = true;
Thread.currentThread().setName("WGA Lucene Indexer");
Thread.currentThread().setPriority(Thread.MIN_PRIORITY);
_core.getLog().debug("Running lucene indexer");
performDbUpdateRequests();
performDropDBRequests();
performTruncateDBRequests();
// ensure that deletions and additions are in sync
// --> threadsave contentHasBeenSaved() and contentHasBeenDeleted()
Object[] keys = _indexedDbs.keySet().toArray();
int currentKeySize = keys.length;
for (int i = 0; i < currentKeySize; i++) {
String dbKey = (String) keys[i];
// if shutdown is requested - skip further index updates and let wga shutdown
// lastupdate flag of index will not be updated - therefore all pending requests will be indexed
// during incremental update on next startup
if (_shutdownRequested) {
break;
}
int currentDeletionsCount = 0;
int currentAdditionsCount = 0;
Comparable revisionOfThisUpdate = null;
synchronized (LuceneManager.this) {
// revision of this indexer run --> result in the lastmodified revision of index config
WGDatabase db = _core.openContentDB(dbKey, null, true);
if (db == null) {
throw new WGIllegalArgumentException("Cannot perform index update on database '" + dbKey + "' because it is not connected");
}
revisionOfThisUpdate = db.getRevision();
// count deletions for db for this run
List deletions = (List) _deletionRequestsMap.get(dbKey);
if (deletions != null) {
currentDeletionsCount = deletions.size();
}
// count additions for db for this run
List additions = (List) _additionRequestsMap.get(dbKey);
if (additions != null) {
currentAdditionsCount = additions.size();
}
}
IndexingProcessInfo processInfo = new IndexingProcessInfo();
performDeletionRequests(dbKey, currentDeletionsCount, processInfo);
performAdditionRequests(dbKey, currentAdditionsCount, processInfo);
// if docs has been deleted or added and no requests has been reinserted or skipped this db is finshed
// the lastupdate flag should be updated in index config
if ((processInfo.getAddedDocs() > 0 || processInfo.getDeletedDocs() > 0) && processInfo.getReinsertedRequests() == 0 && processInfo.getSkippedRequests() == 0) {
_indexConfig.setLastUpdated(dbKey, revisionOfThisUpdate);
}
}
//reset rebuilding index flag
if (_rebuildingIndex) {
_rebuildingIndex = false;
_core.getLog().info("Rebuilding Lucene index finished.");
}
}
catch (WGUnavailableException e) {
if (e.getDatabase() != null) {
_core.getLog().error("Unable to perform index update because database '" + e.getDatabase().getTitle() + "' is unavailable");
}
else {
_core.getLog().error("Unable to perform index update because a database is unavailable");
}
}
catch (WGIllegalArgumentException e) {
_core.getLog().error(e.getMessage());
}
catch (IOException e) {
_core.getLog().error("Unable to perform index update because of I/O exception", e);
}
catch (ParseException e) {
_core.getLog().error("Unable to perform index update because of XML parsing exception: '" + e.getMessage());
}
catch (Exception e) {
_core.getLog().error("Unable to perform index update because of exception", e);
}
finally {
try {
WGFactory.getInstance().closeSessions();
_core.getLog().debug("Finished running lucene indexer");
} catch (Exception e) {
// keep timer alive
}
_indexerIsRunning = false;
}
}
/**
* performs addition requests for the database dbKey
* only requests within currentListSize are processed to ensure additions and deletions are in sync
* @param dbKey database to process
* @param currentListSize number of items from additionrequestsList to process
* @param info IndexingProcessInfo of the current run
* @throws IOException
*/
private void performAdditionRequests(String dbKey, int currentListSize, IndexingProcessInfo info) throws IOException {
// continue only if necessary (requests in list)
if (currentListSize <= 0) {
return;
}
IndexingRequest request = null;
WGDatabase db;
Set alreadyAdded = new HashSet();
_core.getLog().debug("Performing index addition requests for db '" + dbKey + "'.");
// Additions
synchronized (_indexWriteLock) {
try {
_writer = new IndexWriter(_indexDirectory, null, MaxFieldLength.LIMITED);
// additionrequests are grouped by dbKey
// get list from map
LinkedList requests = (LinkedList) _additionRequestsMap.get(dbKey);
// iterate over current listSize to be sure to process addition
// and deletions in sync
// if an request is succesfully completed (indexed), remove it
// from the list
// else add the request again to the end of the list
// elements should not be removed outside this code from lists
// in additionRequestMap
// while indexing process is running
int docsWithinThisSession = 0;
for (int j = 0; j < currentListSize; j++) {
try {
Thread.yield();
// if shutdown is requested skip current request and let wga shutdown
// bc. of skip requests is != 0 lastupdate flag of index will not be updated - therefore all pending requests will be indexed
// during incremental update on next startup
if (_shutdownRequested) {
info.requestSkipped();
break;
}
request = (IndexingRequest) requests.removeFirst();
// check if database for this request is in indexing
// list
// if not, this is an (old) readded request
if (!_indexedDbs.containsKey(dbKey)) {
continue;
}
if (alreadyAdded.contains(request.toString())) {
continue;
}
db = (WGDatabase) _core.getContentdbs().get(request.getDbkey());
if ((db == null) || (!db.isReady())) {
info.requestReinserted();
_core.getLog().info(
"Indexer cannot access database '" + request.getDbkey() + "'. Indexing requests for content " + request.getDocumentKey()
+ " will be tried again later.");
// put request back in list and continue with next
// request
requests.add(request);
continue;
}
if (!db.isSessionOpen() && db.openSession() == WGDatabase.ACCESSLEVEL_NOTLOGGEDIN) {
_core.getLog().info(
"Indexer cannot access database '" + request.getDbkey() + "'. Indexing requests for content " + request.getDocumentKey()
+ " will be tried again later.");
info.requestReinserted();
// put request back in list and continue with next
// request
requests.add(request);
continue;
} else {
// db session is open - ensure not more than <MaxDocsPerDBSession> docs are indexed within this session to ensure gc of hibernate-entities
// @see Bugfix: B000037EA
if (docsWithinThisSession >= getMaxDocsPerDBSession()) {
db.reopenSession();
if (!db.isSessionOpen() && db.openSession() == WGDatabase.ACCESSLEVEL_NOTLOGGEDIN) {
_core.getLog().info(
"Indexer cannot access database '" + request.getDbkey() + "'. Indexing requests for content " + request.getDocumentKey()
+ " will be tried again later.");
info.requestReinserted();
// put request back in list and continue with next
// request
requests.add(request);
continue;
}
docsWithinThisSession = 0;
}
}
db.getSessionContext().setTask("WGA Lucene Indexer");
db.getSessionContext().setBatchProcess(true);
try {
WGContent content = (WGContent) db.getDocumentByDocumentKey(request.getDocumentKey());
if (content != null) {
addToIndex(db, content);
info.docAdded();
docsWithinThisSession++;
// the content-core is not needed anymore for data gathering - so drop core to free HEAP
content.dropCore();
}
else {
// content was deleted
_core.getLog().info("Content " + request.getDocumentKey() + " was deleted during indexer run. Indexing skipped.");
}
} catch (WGBackendException e) {
// check if backendexception is repairable
if (e.isRepairable()) {
// content temporary not accessable bc. of backendexception
_core.getLog().info("Content " + request.getDocumentKey() + " cannot be accessed this time bc. of WGBackendException '" + e.getMessage() + "'. Indexing will be tried again later.");
// raise request timeout
request.raiseRetryCount();
// if not timed out yet
// put request back in list and continue
// with next request
if (!request.isTimedOut()) {
requests.add(request);
} else {
_core.getLog().warn("Content " + request.getDocumentKey() + " could not be accessed " + request.getRetryCount() + " times bc. of WGBackendException '" + e.getMessage() + "'. Indexing request timed out. Luceneindex may be inconsistent.");
continue;
}
} else {
_core.getLog().warn("Unrecoverable error during indexing of content " + request.getDocumentKey() + ". Luceneindex may be inconsistent.", e);
}
}
}
catch (Exception e) {
_core.getLog().error(e.getMessage(), e);
if (request != null) {
info.requestReinserted();
_core.getLog().info(
"Indexer cannot access database '" + request.getDbkey() + "'. Indexing requests for content " + request.getDocumentKey()
+ " will be tried again later.");
// put request back in list and continue with next
// request
addAdditionRequest(request);
continue;
}
}
alreadyAdded.add(request.toString());
}
}
finally {
if (_writer != null) {
try {
_writer.commit();
boolean disableAutoOptimize = Boolean.getBoolean(SYSPROP_DISABLE_AUTO_INDEX_OPTIMIZATION);
if (!disableAutoOptimize) {
_writer.optimize();
}
_writer.close();
_writer = null;
}
catch (IOException e1) {
_core.getLog().error("Unable to close lucene index writer because of I/O exception: '" + e1.getMessage());
}
}
}
}
}
/**
* performs deletion requests for the database dbKey
* only requests within currentListSize are processed to ensure additions and deletions are in sync
* @param dbKey database to process
* @param currentListSize number of items from deletionrequestsList to process
* @param info IndexingProcessInfo of the current run
* @throws IOException
*/
private void performDeletionRequests(String dbKey, int currentListSize, IndexingProcessInfo info) throws IOException {
// continue only if necessary (requests in list)
if (currentListSize <= 0) {
return;
}
IndexingRequest request;
// WGDatabase db;
Set alreadyDeleted = new HashSet();
_core.getLog().debug("Performing index deletion requests for db '" + dbKey + "'.");
// Deletions
try {
_reader = IndexReader.open(_indexDirectory, false);
// deletionrequests are grouped by dbKey
// get list from deletionRequestsMap
LinkedList requests = (LinkedList) _deletionRequestsMap.get(dbKey);
// iterate over current list size to ensure deletions and
// additions are in sync
// elements should not be removed outside this code from lists
// in deletionRequestsMap
// while indexing process is running
for (int j = 0; j < currentListSize; j++) {
// if shutdown is requested skip current request and let wga shutdown
// bc. of skip requests is != 0 lastupdate flag of index will not be updated - therefore all pending requests will be indexed
// during incremental update on next startup
if (_shutdownRequested) {
info.requestSkipped();
break;
}
Thread.yield();
request = (IndexingRequest) requests.removeFirst();
if (request == null) {
break;
}
if (alreadyDeleted.contains(request.toString())) {
continue;
}
deleteContent(request.getDbkey(), request.getDocumentKey());
info.docDeleted();
alreadyDeleted.add(request.toString());
}
}
finally {
if (_reader != null) {
try {
_reader.close();
_reader = null;
}
catch (IOException e1) {
_core.getLog().error("Unable to close lucene index reader because of I/O exception: '" + e1.getMessage());
}
}
}
}
private void performDropDBRequests() throws IOException, WGUnavailableException {
// continue only if requests in list
if (listIsEmpty(_dropDBRequests)) {
return;
}
IndexingRequest request;
Set alreadyDeleted = new HashSet();
_core.getLog().debug("Performing index drop db requests");
// Deletions
try {
_reader = IndexReader.open(_indexDirectory, false);
while (true) {
Thread.yield();
request = fetchNextFromList(_dropDBRequests);
if (request == null) {
break;
}
if (alreadyDeleted.contains(request.toString())) {
continue;
}
dropDatabaseFromIndex(request.getDbkey());
alreadyDeleted.add(request.toString());
}
}
finally {
if (_reader != null) {
try {
_reader.close();
_reader = null;
}
catch (IOException e1) {
_core.getLog().error("Unable to close lucene index reader because of I/O exception: '" + e1.getMessage());
}
}
}
}
private void performTruncateDBRequests() throws IOException, WGUnavailableException {
// continue only if requests in list
if (listIsEmpty(_truncateDBRequests)) {
return;
}
IndexingRequest request;
Set alreadyTruncated = new HashSet();
_core.getLog().debug("Performing index truncate db requests");
// Deletions
try {
_reader = IndexReader.open(_indexDirectory, false);
while (true) {
Thread.yield();
request = fetchNextFromList(_truncateDBRequests);
if (request == null) {
break;
}
if (alreadyTruncated.contains(request.toString())) {
continue;
}
truncateDatabaseFromIndex(request.getDbkey());
alreadyTruncated.add(request.toString());
}
}
finally {
if (_reader != null) {
try {
_reader.close();
_reader = null;
}
catch (IOException e1) {
_core.getLog().error("Unable to close lucene index reader because of I/O exception: '" + e1.getMessage());
}
}
}
}
/**
* @throws WGUnavailableException
* @throws IOException
* @throws ParseException
*/
private boolean performDbUpdateRequests() throws WGUnavailableException, IOException, ParseException {
IndexingRequest request = null;
WGDatabase db;
// Update db requests. Will result in filling other
// IndexingRequest
// lists.
_core.getLog().debug("Performing database update requests");
// iterate over current list size
// if an request is succesfully completed (splitted in addition and deleterequests), remove it from the list
// else add the request again to the end of the list
// elements should not be removed outside this code from _dbUpdateRequests
// while indexing process is running
int currentListSize = _dbUpdateRequests.size();
for (int i=0; i < currentListSize; i++) {
try {
Thread.yield();
request = (IndexingRequest) _dbUpdateRequests.removeFirst();
// check if database for this request is in indexing list
// if not, this is an (old) readded request
if (!_indexedDbs.containsKey(request.getDbkey())) {
continue;
}
db = (WGDatabase) _core.getContentdbs().get(request.getDbkey());
if ( (db == null) || (!db.isReady()) ) {
_core.getLog().info("Indexer cannot access database '" + request.getDbkey() + "'. Indexing request will be tried again later.");
// put request back in list and continue with next request
addDBUpdateRequest(request);
continue;
}
int access = WGDatabase.ACCESSLEVEL_NOTLOGGEDIN;
if (!db.isSessionOpen()) {
access = db.openSession();
} else {
access = db.getSessionContext().getAccessLevel();
}
if ( access == WGDatabase.ACCESSLEVEL_NOTLOGGEDIN ) {
_core.getLog().warn("Cannot update lucene index for database '" + request.getDbkey() + "' because it cannot be opened by master login. Indexing request will be tried again later.");
// put request back in list and continue with next request
addDBUpdateRequest(request);
continue;
}
db.getSessionContext().setTask("WGA Lucene Indexer");
db.getSessionContext().setBatchProcess(true);
updateDb(request, db);
}
catch (Exception e) {
_core.getLog().info("Indexer cannot access database '" + request.getDbkey() + "'. Indexing request will be tried again later." , e);
// put request back in list and continue with next request
addDBUpdateRequest(request);
continue;
}
}
return true;
}
/**
* @return
*/
private IndexingRequest fetchNextFromList(LinkedList list) {
synchronized (LuceneManager.this) {
if (list.size() == 0) {
return null;
}
else {
return (IndexingRequest) list.removeFirst();
}
}
}
/**
* @return
*/
private boolean listIsEmpty(LinkedList list) {
synchronized (LuceneManager.this) {
if (list.size() <= 0) {
return true;
}
else {
return false;
}
}
}
/**
* @param dbkey
* @param documentKey
* @throws IOException
*/
private void deleteContent(String dbkey, String documentKey) throws IOException {
String uniqueIndexKey = buildUniqueIndexKey(dbkey, documentKey);
_reader.deleteDocuments(new Term(INDEXFIELD_UNIQUEKEY, uniqueIndexKey));
_core.getLog().info("Index of " + documentKey + " deleted from db " + dbkey);
}
private void dropDatabaseFromIndex(String dbkey) throws IOException {
_reader.deleteDocuments(new Term(INDEXFIELD_DBKEY, dbkey));
// remove from configfile
_indexConfig.removeDBConfig(dbkey);
_core.getLog().info("Content of db '" + dbkey + "' dropped from index.");
}
private void truncateDatabaseFromIndex(String dbkey) throws IOException {
_reader.deleteDocuments(new Term(INDEXFIELD_DBKEY, dbkey));
_core.getLog().info("Content of db '" + dbkey + "' truncated from index.");
}
/**
* @param dbkey
* @param content
* @throws IOException
* @throws WGAPIException
*/
private void addToIndex(WGDatabase db, WGContent content) throws IOException, WGAPIException {
if (content == null) {
_core.getLog().error("cannot add 'null' content to index");
return;
}
String dbkey = db.getDbReference();
_core.getLog().info("Indexing " + content.getDocumentKey() + " from db " + dbkey);
// build index key
String uniqueIndexKey = buildUniqueIndexKey(dbkey, content.getDocumentKey());
org.apache.lucene.document.Document document = new org.apache.lucene.document.Document();
// index keywords internal use
addKeyword(document, INDEXFIELD_UNIQUEKEY, uniqueIndexKey);
addKeyword(document, INDEXFIELD_DBKEY, dbkey);
addKeyword(document, INDEXFIELD_DOCUMENTKEY, content.getDocumentKey());
addKeyword(document, INDEXFIELD_CONTENTKEY, content.getContentKey().toString());
addKeyword(document, INDEXFIELD_ISVIRTUALCONTENT, content.isVirtual());
// meta keywords
// synonyms for internal fields --> encapsulate internal fields from metas
addKeyword(document, VIRTUALMETA_DBKEY, dbkey);
addSortField(document, VIRTUALMETA_DBKEY, dbkey);
addKeyword(document, VIRTUALMETA_KEY, content.getContentKey().toString());
addSortField(document, VIRTUALMETA_KEY, content.getContentKey().toString());
if (content.hasCompleteRelationships()) {
addKeyword(document, VIRTUALMETA_AREA, content.getStructEntry().getArea().getName());
addSortField(document, VIRTUALMETA_AREA, content.getStructEntry().getArea().getName());
addKeyword(document, VIRTUALMETA_CONTENTTYPE, content.getStructEntry().getContentType().getName());
addSortField(document, VIRTUALMETA_CONTENTTYPE, content.getStructEntry().getContentType().getName());
}
if (!content.getStructEntry().isRoot()) {
WGStructEntry ancestor = content.getStructEntry().getParentEntry();
addKeyword(document, VIRTUALMETA_PARENT, ancestor.getStructKey().toString());
do {
addKeyword(document, VIRTUALMETA_PATH, ancestor.getStructKey().toString());
ancestor = ancestor.getParentEntry();
} while (ancestor != null);
}
Iterator contentMetaNames = content.getMetaNames().iterator();
while (contentMetaNames.hasNext()) {
String metaName = (String) contentMetaNames.next();
MetaInfo metaInfo = (MetaInfo) content.getMetaInfo(metaName);
if (metaInfo == null) {
throw new WGSystemException("MetaInfo for meta '" + metaName + "' not found.");
}
if (!metaInfo.isLuceneSpecialTreatment() && !metaInfo.getLuceneIndexType().equals(MetaInfo.LUCENE_INDEXTYPE_NOINDEX)) {
addMeta(document, metaInfo, content.getMetaData(metaName));
} else {
if (metaInfo.getName().equals(WGContent.META_IS_HIDDEN_FROM)) {
// split hidden from
List isHiddenFrom = content.isHiddenFrom();
addKeyword(document, "HIDDENINNAV", Boolean.valueOf(isHiddenFrom.contains(WGContent.DISPLAYTYPE_NAVIGATOR)), metaInfo.getLuceneBoost());
addSortField(document, "HIDDENINNAV", Boolean.valueOf(isHiddenFrom.contains(WGContent.DISPLAYTYPE_NAVIGATOR)));
addKeyword(document, "HIDDENINSEARCH", Boolean.valueOf(isHiddenFrom.contains(WGContent.DISPLAYTYPE_SEARCH)), metaInfo.getLuceneBoost());
addSortField(document, "HIDDENINSEARCH", Boolean.valueOf(isHiddenFrom.contains(WGContent.DISPLAYTYPE_SEARCH)));
addKeyword(document, "HIDDENINSITEMAP", Boolean.valueOf(isHiddenFrom.contains(WGContent.DISPLAYTYPE_SITEMAP)), metaInfo.getLuceneBoost());
addSortField(document, "HIDDENINSITEMAP", Boolean.valueOf(isHiddenFrom.contains(WGContent.DISPLAYTYPE_SITEMAP)));
} else if (metaInfo.getName().equals(WGContent.META_VALID_FROM)) {
// bugfix B00003432 by tb 10.03.2006
if (content.getValidFrom() != null) {
addKeyword(document, WGContent.META_VALID_FROM, content.getValidFrom(), metaInfo.getLuceneBoost());
addSortField(document, WGContent.META_VALID_FROM, content.getValidFrom());
} else {
// index not set validFrom as 00000 ...
addKeyword(document, WGContent.META_VALID_FROM, EMPTY_VALID_FROM, metaInfo.getLuceneBoost());
addSortField(document, WGContent.META_VALID_FROM, EMPTY_VALID_FROM);
}
} else if (metaInfo.getName().equals(WGContent.META_VALID_TO)) {
// bugfix B00003432 by tb 10.03.2006
if (content.getValidTo() != null) {
addKeyword(document, WGContent.META_VALID_TO, content.getValidTo(), metaInfo.getLuceneBoost());
addSortField(document, WGContent.META_VALID_TO, content.getValidTo());
} else {
// index not set validTo as 99999...
addKeyword(document, WGContent.META_VALID_TO, EMPTY_VALID_TO, metaInfo.getLuceneBoost());
addSortField(document, WGContent.META_VALID_TO, EMPTY_VALID_TO);
}
}
}
}
// index content items
Iterator itemNames = content.getItemNames().iterator();
while (itemNames.hasNext()) {
// retrieve configured indexing rule
String itemName = (String) itemNames.next();
LuceneIndexItemRule rule = retrieveItemRule(db, itemName);
if (rule == null) {
throw new WGConfigurationException("No lucene configuration available for database " + db.getDbReference() +". The database may have been removed from lucene index.");
}
if (rule.getIndexType().equals(LuceneIndexItemRule.INDEX_TYPE_FULLTEXT)) {
addToFulltextIndex(content, document, itemName, rule);
}
if (rule.getIndexType().equals(LuceneIndexItemRule.INDEX_TYPE_KEYWORD)) {
addToKeywordIndex(content, document, itemName, rule);
}
// add item as sort field if configured
if (rule.isSortable()) {
addForSorting(content, document, itemName, rule);
}
}
// index content relations
Iterator<String> relationNames = content.getRelationNames().iterator();
while (relationNames.hasNext()) {
String relationName = relationNames.next();
WGRelationData data = content.getRelationData(relationName);
if (data != null) {
String luceneItemName = RELATION_PREFIX + relationName.toLowerCase();
addKeyword(document, luceneItemName, data.getTargetStructkey().toString() + "." + data.getTargetLanguage());
addSortField(document, luceneItemName, data.getTargetStructkey().toString() + "." + data.getTargetLanguage());
}
}
// index file attachments
List filenames = content.getFileNames();
if (filenames != null) {
Iterator it = filenames.iterator();
while (it.hasNext()) {
String filename = (String) it.next();
if (filename.startsWith(".")) {
// this indicates normally a hidden file so skip it
continue;
}
// retrieve file rule
LuceneIndexFileRule rule = retrieveFileRule(db, filename);
if (rule.getFileSizeLimit() == LuceneIndexFileRule.FILESIZELIMIT_INDEX_NONE) {
// skip file
continue;
}
if (rule.getFileSizeLimit() != LuceneIndexFileRule.FILESIZELIMIT_INDEX_ALL) {
int filesize = content.getFileSize(filename);
// check filesize
if (filesize > (rule.getFileSizeLimit() * 1024) || filesize == -1) {
_core.getLog().info("Skipping file " + filename + " of content " + content.getContentKey().toString() + " - filesizelimit " + rule.getFileSizeLimit() + " kb exceeded.");
// file not found or filesize exceeded limit - skip file
continue;
}
}
// try to retrieve filehandler
FileHandler handler = retrieveFileHandler(filename);
if (handler == null) {
_core.getLog().info("No filehandler found for file " + filename + " of content " + content.getContentKey().toString() + " from db " + db.getDbReference() + ".");
continue;
} else {
// parse text from file
InputStream is = content.getFileData(filename);
if (is != null) {
String text = null;
try {
handler.parse(is);
text = handler.getText();
} catch (FileHandlerException e) {
_core.getLog().error("Unable to extract text from file '" + filename + "' of content '" + content.getContentKey().toString() + "' using filehandler '" + handler.getClass().getName() + "'.", e);
}
if (text != null) {
// index file text
_core.getLog().info("Indexing file " +filename + " of content " + content.getContentKey().toString() + " from db " + db.getDbReference() + ".");
addUnStored(document, INDEXFIELD_ALLATTACHMENTS, text, rule.getBoost());
if (rule.isIncludedInAllContent()) {
addUnStored(document, INDEXFIELD_ALLCONTENT, text, rule.getBoost());
}
}
}
}
}
}
// get analyzer for document language
Analyzer analyzer = retrieveAnalyzer(content);
//analyze document with configured analyzer and add to index
_writer.addDocument(document, analyzer);
_core.getLog().debug("Index of " + content.getDocumentKey() + " from db " + dbkey + " added using analyzer '" + analyzer.getClass().getName());
}
private void addKeyword(org.apache.lucene.document.Document doc, String name, Object value) throws WGIllegalArgumentException {
addKeyword(doc, name, value, 1);
}
private void addKeyword(org.apache.lucene.document.Document doc, String name, Object value, float boost) throws WGIllegalArgumentException {
if ( (name != null) && (value != null) ) {
Field field;
if (value instanceof String) {
field = new Field(name, (String) value, Field.Store.YES, Field.Index.NOT_ANALYZED);
} else if (value instanceof Date) {
field = new Field(name, DATEFORMAT_KEYWORD.format(value), Field.Store.YES, Field.Index.NOT_ANALYZED);
} else if (value instanceof Number) {
// B000047B2
field = new Field(name, NUMBERFORMAT_KEYWORD.format(value), Field.Store.YES, Field.Index.NOT_ANALYZED);
} else if (value instanceof Boolean) {
field = new Field(name, String.valueOf(value), Field.Store.YES, Field.Index.NOT_ANALYZED);
} else {
throw new WGIllegalArgumentException("Unsupported value '" + value.getClass().getName() + "' for keyword '" + name + "'.");
}
field.setBoost(boost);
doc.add(field);
}
}
private void addSortField(org.apache.lucene.document.Document doc, String name, Object value) throws WGIllegalArgumentException {
if (name != null) {
Field field;
String fieldName = SORTITEM_PREFIX + name;
if (value instanceof String) {
if (value != null) {
field = new Field(fieldName, (String)value, Field.Store.YES, Field.Index.NOT_ANALYZED);
} else {
field = new Field(fieldName, "", Field.Store.YES, Field.Index.NOT_ANALYZED);
}
} else if (value instanceof Date) {
if (value != null) {
field = new Field(fieldName, DATEFORMAT_SORTFIELD.format(value), Field.Store.YES, Field.Index.NOT_ANALYZED);
} else {
field = new Field(fieldName, DATEFORMAT_SORTFIELD.format(new Date(0)), Field.Store.YES, Field.Index.NOT_ANALYZED);
}
} else if (value instanceof Number) {
if (value != null) {
field = new Field(fieldName, DECIMALFORMAT_SORTFIELD.format(value), Field.Store.YES, Field.Index.NOT_ANALYZED);
} else {
field = new Field(fieldName, DECIMALFORMAT_SORTFIELD.format(Long.MIN_VALUE), Field.Store.YES, Field.Index.NOT_ANALYZED);
}
} else if (value instanceof Boolean) {
if (value != null) {
field = new Field(fieldName, String.valueOf(value), Field.Store.YES, Field.Index.NOT_ANALYZED);
} else {
field = new Field(fieldName, String.valueOf(false), Field.Store.YES, Field.Index.NOT_ANALYZED);
}
} else {
if (value != null) {
throw new WGIllegalArgumentException("Unsupported value '" + value.getClass().getName() + "' for sortfield '" + fieldName + "'.");
} else {
// sort field must exist
field = new Field(fieldName, "", Field.Store.YES, Field.Index.NOT_ANALYZED);
}
}
doc.add(field);
}
}
private void addMeta(org.apache.lucene.document.Document doc, MetaInfo metaInfo, Object value) throws WGIllegalArgumentException {
if (metaInfo.getLuceneIndexType().equals(MetaInfo.LUCENE_INDEXTYPE_KEYWORD)) {
if (!metaInfo.isMultiple()) {
addKeyword(doc, metaInfo.getName(), value, metaInfo.getLuceneBoost());
addSortField(doc, metaInfo.getName(), value);
Iterator synonyms = metaInfo.getSynonyms().iterator();
while (synonyms.hasNext()) {
String synonym = (String) synonyms.next();
addKeyword(doc, synonym, value, metaInfo.getLuceneBoost());
addSortField(doc, synonym, value);
}
} else {
if (value == null) {
addKeyword(doc, metaInfo.getName(), "");
} else {
Iterator it = ((List)value).iterator();
while (it.hasNext()) {
Object singleValue = it.next();
addKeyword(doc, metaInfo.getName(), singleValue);
Iterator synonyms = metaInfo.getSynonyms().iterator();
while (synonyms.hasNext()) {
String synonym = (String) synonyms.next();
addKeyword(doc, synonym, singleValue);
}
}
}
}
} else if (metaInfo.getLuceneIndexType().equals(MetaInfo.LUCENE_INDEXTYPE_FULLTEXT)) {
if (!metaInfo.isMultiple()) {
addUnStored(doc, metaInfo.getName(), value, metaInfo.getLuceneBoost());
addSortField(doc, metaInfo.getName(), value);
Iterator synonyms = metaInfo.getSynonyms().iterator();
while (synonyms.hasNext()) {
String synonym = (String) synonyms.next();
addUnStored(doc, synonym, value, metaInfo.getLuceneBoost());
addSortField(doc, synonym, value);
}
if (metaInfo.getLuceneAddToAllContent()) {
addUnStored(doc, INDEXFIELD_ALLCONTENT, value, metaInfo.getLuceneBoost());
}
} else {
if (value != null) {
Iterator it = ((List)value).iterator();
while (it.hasNext()) {
Object singleValue = it.next();
addUnStored(doc, metaInfo.getName(), singleValue, metaInfo.getLuceneBoost());
if (metaInfo.getLuceneAddToAllContent()) {
addUnStored(doc, INDEXFIELD_ALLCONTENT, singleValue, metaInfo.getLuceneBoost());
}
Iterator synonyms = metaInfo.getSynonyms().iterator();
while (synonyms.hasNext()) {
String synonym = (String) synonyms.next();
addUnStored(doc, synonym, singleValue, metaInfo.getLuceneBoost());
}
}
}
}
}
}
private void addUnStored(org.apache.lucene.document.Document doc, String name, Object value) throws WGIllegalArgumentException {
addUnStored(doc, name, value, 1.0F);
}
private void addUnStored(org.apache.lucene.document.Document doc, String name, Object value, float boost) throws WGIllegalArgumentException {
if ( (name != null) && (value != null) ) {
Field field;
if (value instanceof String) {
field = new Field(name, (String)value, Field.Store.NO, Field.Index.ANALYZED);
} else {
throw new WGIllegalArgumentException("Unsupported value '" + value.getClass().getName() + "' for unstored field '" + name + "'.");
}
field.setBoost(boost);
doc.add(field);
}
}
/**
* @param filename
* @return configured filehandler for fileextension or null
*/
private FileHandler retrieveFileHandler(String filename) {
int pos = filename.lastIndexOf(".");
if (pos != -1) {
if (filename.length() > 1) {
String extension = filename.substring(pos + 1);
return _core.getFileHandlerForExtension(extension);
} else {
// only a '.'
_core.getLog().warn("Could not optain a filehandler for indexing content of file '" + filename + "'. Unable to parse file extension - there is only a '.'.");
return null;
}
} else {
_core.getLog().warn("Could not optain a filehandler for indexing content of file '" + filename + "'. Unable to parse file extension - there is no '.'.");
return null;
}
}
private void addToFulltextIndex(WGContent content, org.apache.lucene.document.Document document, String itemName, LuceneIndexItemRule rule) throws WGAPIException {
_core.getLog().debug("Adding item " + itemName + " to fulltextIndex.");
Iterator itemElements = content.getItemValueList(itemName).iterator();
while (itemElements.hasNext()) {
Object itemElement = itemElements.next();
if (itemElement instanceof String) {
String textToIndex = (String) itemElement;
textToIndex = rule.parseItemValue(textToIndex);
if (textToIndex != null) {
addUnStored(document, itemName.toLowerCase(), textToIndex, rule.getBoost());
addUnStored(document, INDEXFIELD_ALLCONTENT, textToIndex, rule.getBoost());
}
}
}
}
private void addToKeywordIndex(WGContent content, org.apache.lucene.document.Document document, String itemName, LuceneIndexItemRule rule) throws WGAPIException {
_core.getLog().debug("Adding item " + itemName + " to keywordIndex.");
Iterator itemElements = content.getItemValueList(itemName).iterator();
while (itemElements.hasNext()) {
Object itemElement = itemElements.next();
if (itemElement instanceof String) {
String textToIndex = (String) itemElement;
if (textToIndex != null) {
// if content type html/xml remove tags
if (rule.getContentType().equals(LuceneIndexItemRule.CONTENT_TYPE_HTML_XML)) {
try {
textToIndex = WGUtils.toPlainText(textToIndex, "", false);
}
catch (IOException e) {
// cannot parse, so use original text
}
}
addKeyword(document, itemName.toLowerCase(), textToIndex, rule.getBoost());
}
}
else if (itemElement instanceof Date) {
addKeyword(document, itemName.toLowerCase(), (Date) itemElement, rule.getBoost());
}
else if (itemElement instanceof Number) {
addKeyword(document, itemName.toLowerCase(), (Number) itemElement, rule.getBoost());
}
else if (itemElement != null) {
addKeyword(document, itemName.toLowerCase(), String.valueOf(itemElement), rule.getBoost());
}
}
}
private void addForSorting(WGContent content, org.apache.lucene.document.Document document, String itemName, LuceneIndexItemRule rule) throws WGAPIException {
_core.getLog().debug("Adding item " + itemName + " for sorting purposes.");
Iterator itemElements = content.getItemValueList(itemName).iterator();
// not necessary since bugfix #00000249
/*
if (!itemElements.hasNext()) {
// Sortingfields must exist in index
addSortField(document, itemName.toLowerCase(), (String) null);
}*/
while (itemElements.hasNext()) {
Object itemElement = itemElements.next();
if (itemElement instanceof String) {
addSortField(document, itemName.toLowerCase(), (String) itemElement);
}
else if (itemElement instanceof Date) {
addSortField(document, itemName.toLowerCase(), (Date) itemElement);
}
else if (itemElement instanceof Number) {
addSortField(document, itemName.toLowerCase(), (Number) itemElement);
}
else if (itemElement != null) {
addSortField(document, itemName.toLowerCase(), String.valueOf(itemElement));
}
}
}
private LuceneIndexItemRule retrieveItemRule(WGDatabase db, String itemName) {
LuceneConfiguration config = (LuceneConfiguration) _indexedDbs.get(db.getDbReference());
if (config != null) {
return config.getMatchingItemRule(itemName);
}
else {
return null;
}
}
private LuceneIndexFileRule retrieveFileRule(WGDatabase db, String filename) {
LuceneConfiguration config = (LuceneConfiguration) _indexedDbs.get(db.getDbReference());
return config.getMatchingFileRule(filename);
}
/**
* @param db
* @throws IOException
* @throws ParseException
* @throws WGAPIException
*/
private void updateDb(IndexingRequest request, WGDatabase db) throws IOException, ParseException, WGAPIException {
if (request.isIndexRebuildNeeded()) {
// rebuild the complete index for this db
// --> lucene config has changed
_core.getLog().info("Rebuilding index of db " + request.getDbkey());
initialUpdateDb(request.getDbkey(), db);
} else {
// Fetch the revision of last index update from config doc
Comparable lastUpdated = _indexConfig.getLastUpdated(request.getDbkey());
// New indexing of all documents
if (lastUpdated == null) {
_core.getLog().info("Initial indexing of db " + request.getDbkey());
initialUpdateDb(request.getDbkey(), db);
}
else {
// Test if the stored index revision is comparable to the current revision given by the database
// If it is not we must rebuild the whole index again
boolean comparable = false;
try {
db.getRevisionDate(lastUpdated);
comparable = true;
}
catch (WGWrongRevisionException e) {
}
if (comparable) {
// Indexing of updated and deleted documents
incrementalUpdateDb(db, lastUpdated);
}
else {
// Rebuild index because the revision type has changed and is no more comparable to the stored revision
_core.getLog().info("Initial indexing of db " + request.getDbkey() + " because the type of database revision has changed");
initialUpdateDb(request.getDbkey(), db);
}
}
}
}
/**
* @param dbkey
* @param db
* @param lastUpdated
* @throws WGAPIException
*/
private void incrementalUpdateDb(WGDatabase db, Comparable lastUpdated) throws WGAPIException {
Comparable dbLastChanged = db.getRevision();
if (lastUpdated.compareTo(dbLastChanged) > 0) {
// nothing to do for incremental update
return;
}
_core.getLog().info("Incremental index update of db " + db.getDbReference());
List logs = db.getUpdatedDocumentsSince(lastUpdated);
/*
if ( (logs == null) || (logs.size() == 0) ) {
// try to reset serviceStatus to idle
updateServiceStatus(dbkey);
return;
} */
Iterator logsIt = logs.iterator();
String contentPrefix = WGDocument.TYPENAME_CONTENT + "/";
while (logsIt.hasNext()) {
Thread.yield();
WGUpdateLog log = (WGUpdateLog) logsIt.next();
// old notes-design does not support logs
if ( (log == null) || (log.getDocumentKey() == null) ) {
_core.getLog().warn("Cannot process incremental index update for 'null' log or 'null' log.getDocumentKey() - ensure you use WGAContentStoreDesign >= 3.1.2");
continue;
}
if (!log.getDocumentKey().startsWith(contentPrefix)) {
continue;
}
if (log.getType() == WGUpdateLog.TYPE_DELETE) {
addDeletionRequest(new IndexingRequest(db.getDbReference(), log.getDocumentKey()));
}
else if (log.getType() == WGUpdateLog.TYPE_UPDATE) {
addDeletionRequest(new IndexingRequest(db.getDbReference(), log.getDocumentKey()));
WGContent content;
try {
content = (WGContent) db.getDocumentByDocumentKey(log.getDocumentKey());
if (content != null) {
addAdditionRequest(new IndexingRequest(db.getDbReference(), log.getDocumentKey()));
}
}
catch (WGAPIException e) {
_core.getLog().error("Unable to process log entry of type 'update'.", e);
}
}
}
}
/**
* @param dbkey
* @param db
* @throws WGAPIException
*/
private void initialUpdateDb(String dbkey, WGDatabase db) throws WGAPIException {
//delete all content for db
addTruncateDBRequest(new IndexingRequest(dbkey, null));
// add dbkey to initalIndexDBs set
//_initialIndexDBs.add(dbkey);
//index all content
long time = System.currentTimeMillis();
_core.getLog().info("Fetching all contentkeys from db '" + dbkey + "'.");
WGContentIterator docsIt = (WGContentIterator)db.getAllContent(true);
if (docsIt != null) {
Iterator contentKeys = docsIt.getContentKeys().iterator();
while (contentKeys.hasNext()) {
Thread.yield();
WGContentKey key = (WGContentKey) contentKeys.next();
addAdditionRequest(new IndexingRequest(dbkey, WGDocument.TYPENAME_CONTENT + "/" + key.toString()));
}
}
else {
_core.getLog().error("Fetching all contenkeys from db '" + dbkey + "' returned 'null'. Database will not be indexed.");
}
_core.getLog().info("Fetching all contentkeys from db '" + dbkey + "' done in " + (System.currentTimeMillis() - time) + "ms.");
}
}
class IndexingRequest {
private String _dbkey;
private String _documentKey;
private boolean _indexRebuildNeeded;
private int _retryCount = 0;
private static final int _retryTimeout = 10;
public IndexingRequest(String dbkey, String documentKey) {
_dbkey = dbkey;
_documentKey = documentKey;
_indexRebuildNeeded = false;
}
public IndexingRequest(String dbkey, String documentKey, boolean indexRebuildNeeded) {
_dbkey = dbkey;
_documentKey = documentKey;
_indexRebuildNeeded = indexRebuildNeeded;
}
/**
* @return Returns the contentKey.
*/
public String getDocumentKey() {
return _documentKey;
}
/**
* @return Returns the dbkey.
*/
public String getDbkey() {
return _dbkey;
}
/**
* @return Returns if indexRebuild is needed. --> lucene config has changed
*/
public boolean isIndexRebuildNeeded() {
return _indexRebuildNeeded;
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
public String toString() {
return getDbkey() + "/" + getDocumentKey();
}
public boolean equals(Object obj) {
if (obj instanceof IndexingRequest) {
IndexingRequest req = (IndexingRequest) obj;
String checkDBKey1 = req.getDbkey();
if (checkDBKey1 == null) {
checkDBKey1 = "";
}
String checkDBKey2 = this.getDbkey();
if (checkDBKey2 == null) {
checkDBKey2 = "";
}
String checkDocumentKey1 = req.getDocumentKey();
if (checkDocumentKey1 == null) {
checkDocumentKey1 = "";
}
String checkDocumentKey2 = this.getDocumentKey();
if (checkDocumentKey2 == null) {
checkDocumentKey2 = "";
}
if ( (checkDBKey1.equals(checkDBKey2)) &&
(checkDocumentKey1.equals(checkDocumentKey2)) ) {
return true;
} else {
return false;
}
}
return super.equals(obj);
}
public int getRetryCount() {
return _retryCount;
}
public void raiseRetryCount() {
_retryCount++;
}
public boolean isTimedOut() {
return _retryCount >= IndexingRequest._retryTimeout;
}
}
private LinkedList _dropDBRequests = new LinkedList();
private LinkedList _truncateDBRequests = new LinkedList();
private LinkedList _dbUpdateRequests = new LinkedList();
private HashMap _additionRequestsMap = new HashMap();
private HashMap _deletionRequestsMap = new HashMap();
/*
* (non-Javadoc)
*
* @see de.innovationgate.webgate.api.WGContentEventListener#contentCreated(de.innovationgate.webgate.api.WGContentEvent)
*/
public void contentCreated(WGContentEvent contentEvent) {
}
/*
* (non-Javadoc)
*
* @see de.innovationgate.webgate.api.WGContentEventListener#contentSaved(de.innovationgate.webgate.api.WGContentEvent)
*/
public boolean contentSaved(WGContentEvent contentEvent) {
return true;
}
/*
* (non-Javadoc)
*
* @see de.innovationgate.webgate.api.WGContentEventListener#contentHasBeenDeleted(de.innovationgate.webgate.api.WGContentEvent)
*/
public void contentHasBeenDeleted(WGContentEvent event) {
String dbkey = event.getDatabase().getDbReference();
addDeletionRequest(new IndexingRequest(dbkey, event.getDocumentKey()));
}
/*
* (non-Javadoc)
*
* @see de.innovationgate.webgate.api.WGContentEventListener#contentHasBeenSaved(de.innovationgate.webgate.api.WGContentEvent)
*/
public void contentHasBeenSaved(WGContentEvent event) {
String dbkey = (String) event.getDatabase().getDbReference();
IndexingRequest indexingRequest = new IndexingRequest(dbkey, event.getDocumentKey());
addDeletionRequest(indexingRequest);
addAdditionRequest(indexingRequest);
}
protected void finalize() throws Throwable {
if (_timer != null) {
_timer.cancel();
_timer = null;
}
}
public WGResultSet search(WGDatabase db, String phrase, Map parameters, HttpServletRequest request) throws WGQueryException {
// set max clause count for boolean queries
BooleanQuery.setMaxClauseCount(_booleanQueryMaxClauseCount);
if (this.isRebuildingIndex()) {
throw new WGQueryException(phrase, "Lucene search temporary disabled. Rebuilding lucene index ...");
}
if (phrase == null || phrase.trim().equals("")) {
return null;
}
try {
BooleanQuery wholeQuery = new BooleanQuery();
int max = WGACore.DEFAULT_QUERY_MAXRESULTS;
Integer maxResults = (Integer) parameters.get(WGDatabase.QUERYOPTION_MAXRESULTS);
if (maxResults != null) {
max = maxResults;
}
// handle dboption EXCLUDEDOCUMENT
WGContent excludeContent = (WGContent) parameters.get(WGDatabase.QUERYOPTION_EXCLUDEDOCUMENT);
if (excludeContent != null) {
String uniqueKey = buildUniqueIndexKey(excludeContent.getDatabase().getDbReference(), excludeContent.getDocumentKey());
wholeQuery.add(new TermQuery(new Term(INDEXFIELD_UNIQUEKEY, uniqueKey)), BooleanClause.Occur.MUST_NOT);
}
// list of dbs to search in
String searchScope = (String) parameters.get(LuceneManager.QUERYOPTION_SEARCHSCOPE);
List searchDBKeys = new ArrayList();
if (searchScope.equals(LuceneManager.SEARCHSCOPE_DB)) {
searchDBKeys.add(db.getDbReference());
}
if (searchScope.equals(LuceneManager.SEARCHSCOPE_DOMAIN)) {
Iterator dbs = _core.openContentDBs(request, (String)db.getAttribute(WGACore.DBATTRIB_DOMAIN)).iterator();
while (dbs.hasNext()) {
WGDatabase currentDB = (WGDatabase) dbs.next();
searchDBKeys.add(currentDB.getDbReference());
}
}
if (searchScope.equals(LuceneManager.SEARCHSCOPE_WGA)) {
Iterator dbs = _core.openContentDBs(request).iterator();
while (dbs.hasNext()) {
WGDatabase currentDB = (WGDatabase) dbs.next();
searchDBKeys.add(currentDB.getDbReference());
}
}
if (searchScope.equals(LuceneManager.SEARCHSCOPE_DB_LIST)) {
String dbListCSV = (String) parameters.get(QUERYOPTION_SEARCHDBKEYS);
if (dbListCSV == null || dbListCSV.trim().equals("")) {
throw new WGQueryException(phrase, "Search scope is 'dblist' but no db keys given.");
} else {
Iterator dbkeys = WGUtils.deserializeCollection(dbListCSV, ",").iterator();
while (dbkeys.hasNext()) {
String dbkey = (String) dbkeys.next();
searchDBKeys.add(dbkey.trim().toLowerCase());
}
}
}
// Handle language selection;
List<WGLanguage> languagesPriorityList = null;
boolean filterLanguages = false;
if (parameters.containsKey(WGDatabase.QUERYOPTION_LANGUAGES)) {
List<WGLanguage> langs = (List<WGLanguage>) parameters.get(WGDatabase.QUERYOPTION_LANGUAGES);
if (langs.size() > 1) {
BooleanQuery langQuery = new BooleanQuery();
for (WGLanguage lang : langs) {
langQuery.add(new TermQuery(new Term(WGContent.META_LANGUAGE, lang.getName())), BooleanClause.Occur.SHOULD);
}
wholeQuery.add(langQuery, BooleanClause.Occur.MUST);
languagesPriorityList = langs;
filterLanguages = true;
}
else if (langs.size() == 1){
wholeQuery.add(new TermQuery(new Term(WGContent.META_LANGUAGE, langs.get(0).getName())), BooleanClause.Occur.MUST);
languagesPriorityList = Collections.singletonList(langs.get(0));
}
}
else if (parameters.containsKey(WGDatabase.QUERYOPTION_ONLYLANGUAGE)) {
String language = (String)parameters.get(WGDatabase.QUERYOPTION_ONLYLANGUAGE);
wholeQuery.add(new TermQuery(new Term(WGContent.META_LANGUAGE, language)), BooleanClause.Occur.MUST);
languagesPriorityList = Collections.singletonList(db.getLanguage(language));
}
if (languagesPriorityList == null) {
languagesPriorityList = getLanguagesForSearchDBKeys(searchDBKeys);;
}
// Handle visibility selection
if (!parameters.containsKey(WGDatabase.QUERYOPTION_ENHANCE) || parameters.get(WGDatabase.QUERYOPTION_ENHANCE).equals(new Boolean(true))) {
wholeQuery.add(new TermQuery(new Term(WGContent.META_VISIBLE, "true")), BooleanClause.Occur.MUST);
String role = (String)parameters.get(WGDatabase.QUERYOPTION_ROLE);
if (role != null) {
if (!role.equalsIgnoreCase(WGContent.DISPLAYTYPE_NONE)) {
wholeQuery.add(new TermQuery(new Term("HIDDENIN" + role.toUpperCase(), "false")), BooleanClause.Occur.MUST);
}
}
}
if (parameters.containsKey(WGDatabase.QUERYOPTION_ONLYRELEASED)) {
wholeQuery.add(new TermQuery(new Term(WGContent.META_STATUS, WGContent.STATUS_RELEASE)), BooleanClause.Occur.MUST);
}
// build dbQuery (OR combination of all searchDbs indexed by lucene)
BooleanQuery dbQuery = new BooleanQuery();
Iterator itSearchDBKeys = searchDBKeys.iterator();
while (itSearchDBKeys.hasNext()) {
String currentDBKey = (String) itSearchDBKeys.next();
if (_indexedDbs.containsKey(currentDBKey)) {
dbQuery.add(new TermQuery(new Term(INDEXFIELD_DBKEY, currentDBKey)), BooleanClause.Occur.SHOULD);
}
}
wholeQuery.add(dbQuery, BooleanClause.Occur.MUST);
// Add parsed search phrase.
// Search in allcontent for each language using the configured analyzer
// if no analyzer is configured for a language search at least with one
// default analyzer
boolean searchWithDefaultAnalyzer = false;
//if no languages found search at least with DefaultAnalyzer
if (languagesPriorityList.size() <= 0) {
searchWithDefaultAnalyzer = true;
}
//build phrase query
BooleanQuery phraseQuery = new BooleanQuery();
Iterator languageList = languagesPriorityList.iterator();
while (languageList.hasNext()) {
WGLanguage languageItem = (WGLanguage) languageList.next();
Analyzer analyzer = _core.getAnalyzerForLanguageCode(languageItem.getName());
if (analyzer != null) {
QueryParser parser = new IndexingRuleBasedQueryParser(INDEXFIELD_ALLCONTENT, analyzer, _indexedDbs, searchDBKeys, _metaKeywordFields);
Query query = parser.parse(phrase);
BooleanQuery testPhraseAndLangQuery = new BooleanQuery();
testPhraseAndLangQuery.add(query, BooleanClause.Occur.MUST);
testPhraseAndLangQuery.add(new TermQuery(new Term(WGContent.META_LANGUAGE, languageItem.getName())), BooleanClause.Occur.MUST);
phraseQuery.add(testPhraseAndLangQuery, BooleanClause.Occur.SHOULD);
}
else {
searchWithDefaultAnalyzer = true;
}
}
if (searchWithDefaultAnalyzer) {
QueryParser parser = new IndexingRuleBasedQueryParser(INDEXFIELD_ALLCONTENT, _core.getDefaultAnalyzer(), _indexedDbs, searchDBKeys, _metaKeywordFields);
Query query = parser.parse(phrase);
phraseQuery.add(query, BooleanClause.Occur.SHOULD);
}
wholeQuery.add(phraseQuery, BooleanClause.Occur.MUST);
// parse native options
Sort sort = null;
String sortFieldName = "";
String nativeOptionsStr = (String) parameters.get(WGDatabase.QUERYOPTION_NATIVEOPTIONS);
boolean includeVirtualContent = false;
if (nativeOptionsStr != null) {
Iterator nativeOptions = WGUtils.deserializeCollection(nativeOptionsStr, ",", true).iterator();
while (nativeOptions.hasNext()) {
String option = (String) nativeOptions.next();
if (option.startsWith("sort:")) {
sortFieldName = option.substring(5).trim();
boolean reverse = false;
if (sortFieldName.toLowerCase().endsWith("(asc)")) {
sortFieldName = sortFieldName.substring(0, sortFieldName.length() - 5).trim();
}
else if (sortFieldName.toLowerCase().endsWith("(desc)")) {
sortFieldName = sortFieldName.substring(0, sortFieldName.length() - 6).trim();
reverse = true;
}
if (sortFieldName.length() > 0) {
char first = sortFieldName.charAt(0);
if (first >= 'A' && first <= 'Z') {
// meta sort
sortFieldName = sortFieldName.toUpperCase();
} else {
// item sort
sortFieldName = sortFieldName.toLowerCase();
}
}
// sort order currently only german
sort = new Sort(new SortField(SORTITEM_PREFIX + sortFieldName, Locale.GERMANY, reverse));
} else if (option.equalsIgnoreCase(NATIVE_QUERYOPTION_INCLUDEVIRTUALCONTENT)) {
includeVirtualContent = true;
}
}
}
if (!includeVirtualContent) {
wholeQuery.add(new TermQuery(new Term(INDEXFIELD_ISVIRTUALCONTENT, String.valueOf(true))), BooleanClause.Occur.MUST_NOT);
}
TopDocs hits;
//register executed query as output parameter
parameters.put(WGDatabase.QUERYOPTION_RETURNQUERY, wholeQuery.toString());
// simplify query and register as taginfo
parameters.put(TAGINFO_SIMPLIFIEDQUERY, rewrite(wholeQuery));
long timeBefore = System.currentTimeMillis();
if (sort != null) {
try {
hits = search(wholeQuery, max, sort);
} catch (NullPointerException e) {
// lucene bug when sorting for non existing fields with Locale
throw new WGQueryException(wholeQuery.toString(), "Sortfield '" + sortFieldName + "' not indexed.");
}
}
else {
try {
hits = search(wholeQuery, max, null);
} catch (BooleanQuery.TooManyClauses e) {
parameters.put(TAGINFO_UNSPECIFICQUERY, new Boolean(true));
throw new WGQueryException(phrase, "Too many BooleanClauses in query. " +
"Please use a more specific query or increase value of " +
"'booleanQueryMaxClauseCount' via WGAManager. Current value is '" + this.getBooleanQueryMaxClauseCount() + "'.");
}
}
long timeAfter = System.currentTimeMillis();
long executionTime = timeAfter - timeBefore;
LuceneResultSet resultSet;
if (filterLanguages) {
resultSet = new LuceneLanguageChoosingResultSet(hits, _core, request, parameters, wholeQuery, executionTime, languagesPriorityList);
}
else {
resultSet = new LuceneMultiDBResultSet(hits, _core, request, parameters, wholeQuery, executionTime);
}
// put resultset in per thread list
List rsList = (List) _resultsetList.get();
if (rsList == null) {
rsList = new LinkedList();
_resultsetList.set(rsList);
}
rsList.add(resultSet);
return resultSet;
}
catch (org.apache.lucene.queryParser.ParseException e) {
throw new WGQueryException("Unable to parse lucene query", e.getMessage(), e);
}
catch (Exception e) {
_core.getLog().error("Error executing lucene search: " + e.getClass().getName() + " - " + e.getMessage(), e);
throw new WGQueryException(phrase, e.getClass().getName() + ": " + e.getMessage(), e);
}
}
/**
* close opened resultsets for current thread
*/
public void closeOpenedResultSets() {
// get opened resultsSets for current thread
List rsList = (List) _resultsetList.get();
if (rsList != null) {
Iterator it = rsList.iterator();
// close each resultset and remove from list
while (it.hasNext()) {
LuceneResultSet rs = (LuceneResultSet) it.next();
rs.close();
it.remove();
}
}
}
/**
* @param searchDBKeys
* @return list with all languages for searchDBKeys indexed by lucene
* @throws WGAPIException
*/
private List<WGLanguage> getLanguagesForSearchDBKeys(List searchDBKeys) throws WGAPIException {
LinkedList<WGLanguage> languages = new LinkedList<WGLanguage>();
Iterator it = searchDBKeys.iterator();
while (it.hasNext()) {
String dbKey = (String) it.next();
WGDatabase db = (WGDatabase) _core.getContentdbs().get(dbKey);
if (db != null) {
if (_indexedDbs.containsKey(dbKey)) {
languages.addAll(db.getLanguages().values());
}
}
}
return languages;
}
public boolean isRebuildingIndex() {
return _rebuildingIndex;
}
public boolean indexIsEnabled(String dbKey) {
return _indexedDbs.containsKey(dbKey);
}
public int getBooleanQueryMaxClauseCount() {
return _booleanQueryMaxClauseCount;
}
public void setBooleanQueryMaxClauseCount(int booleanQueryMaxClauseCount) {
this._booleanQueryMaxClauseCount = booleanQueryMaxClauseCount;
}
/**
* @param dbkey
* @param content
* @return
*/
private String buildUniqueIndexKey(String dbkey, String documentkey) {
return dbkey + "/" + documentkey;
}
/**
* @return Returns the indexerIsRunning.
*/
public boolean isIndexerRunning() {
return _indexerIsRunning;
}
public void databaseConnected(WGDatabaseEvent event) {
HashSet newConnectedDBKeys = new HashSet();
newConnectedDBKeys.add(event.getDatabase().getDbReference());
createOrUpdateDBIndex(event.getDatabase(), newConnectedDBKeys);
}
/* (non-Javadoc)
* @see de.innovationgate.webgate.api.WGDatabaseConnectListener#databaseConnectionError(de.innovationgate.webgate.api.WGDatabaseEvent)
*/
public void databaseConnectionError(WGDatabaseEvent event) {
}
public int getMaxDocsPerDBSession() {
return _maxDocsPerDBSession;
}
public void setMaxDocsPerDBSession(int maxDocsPerDBSession) {
_maxDocsPerDBSession = maxDocsPerDBSession;
}
public Highlighter createHighlighter(String itemOrMeta, Query query, Formatter formatter) {
if (itemOrMeta == null) {
return null;
}
if (query == null) {
return null;
}
// create scorer
// use only terms for this item and terms for allcontent for scoring
List terms = new ArrayList();
WeightedTerm[] termsForThisItem = QueryTermExtractor.getTerms(query, false, itemOrMeta);
if (termsForThisItem != null) {
for (int i=0; i < termsForThisItem.length; i++) {
terms.add(termsForThisItem[i]);
}
}
WeightedTerm[] termsForAllContent = QueryTermExtractor.getTerms(query, false, LuceneManager.INDEXFIELD_ALLCONTENT);
if (termsForAllContent != null) {
for (int i=0; i < termsForAllContent.length; i++) {
terms.add(termsForAllContent[i]);
}
}
WeightedTerm[] termsArray = new WeightedTerm[terms.size()];
for (int i=0; i < terms.size(); i++) {
WeightedTerm term = (WeightedTerm) terms.get(i);
termsArray[i] = term;
}
QueryTermScorer scorer = new QueryTermScorer(termsArray);
// create highlighter
Highlighter highlighter = new Highlighter(formatter, scorer);
return highlighter;
}
public TokenStream createTokenStream(String text, WGContent content) throws WGAPIException {
// retrieve analyzer
Analyzer analyzer = retrieveAnalyzer(content);
// @see org.apache.lucene.analysis.Analyzer - null for fieldname should be supported
// most analyzers do not use the parameter fieldname
TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(text));
return tokenStream;
}
/**
*
* @param db
* @throws IOException
* @throws WGIllegalArgumentException
* @throws InterruptedException
* @deprecated pending feature - F00003426
*/
public void performCustomDBIndexDeletions(WGDatabase db) throws IOException, WGIllegalArgumentException, InterruptedException {
// check if db is not a full contentstore
// this method is only supported for none fullcontentstores
if (db.hasFeature(WGDatabase.FEATURE_FULLCONTENTFEATURES)) {
throw new WGIllegalArgumentException("Method performCustomDBIndexDeletions() is unsupported for full featured contentstores.");
}
// check if db is already indexed
if (!_indexedDbs.containsKey(db.getDbReference())) {
throw new WGIllegalArgumentException("Cannot perform deletions on database '" + db.getDbReference() + "'. Database is not yet indexed. Ensure performCustomDBIndexUpdates() has been called first.");
}
// collect currently indexed document keys for this db
Set currentlyIndexedDocuments = new HashSet();
BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term(INDEXFIELD_DBKEY, db.getDbReference())), BooleanClause.Occur.MUST);
TopDocs indexedDocuments = search(query, 500, null);
for (ScoreDoc scoreDoc : indexedDocuments.scoreDocs) {
org.apache.lucene.document.Document luceneDoc = getDocument(scoreDoc.doc);
String documentKey = luceneDoc.get(LuceneManager.INDEXFIELD_DOCUMENTKEY);
currentlyIndexedDocuments.add(documentKey);
}
}
public File getIndexDirectory() {
return _dir;
}
public void contentHasBeenMoved(WGContentEvent event) {
String dbkey = (String) event.getDatabase().getDbReference();
IndexingRequest indexingRequest = new IndexingRequest(dbkey, event.getDocumentKey());
addDeletionRequest(indexingRequest);
addAdditionRequest(indexingRequest);
}
private IndexSearcher getIndexSearcher() throws CorruptIndexException, IOException, InterruptedException {
if (_indexSearcher == null) {
synchronized (this) {
if (_indexSearcher == null) {
_indexSearcher = new IndexSearcher(_indexDirectory, true);
}
}
}
if (!_indexSearcher.getIndexReader().isCurrent()) {
synchronized (this) {
if (!_indexSearcher.getIndexReader().isCurrent()) {
// wait for current searches to finish and block further searches
_indexSearcherSemaphore.acquire(MAX_CONCURRENT_SEARCHES);
try {
_indexSearcher.close();
_indexSearcher = new IndexSearcher(_indexDirectory, true);
} finally {
_indexSearcherSemaphore.release(MAX_CONCURRENT_SEARCHES);
}
}
}
}
return _indexSearcher;
}
private TopDocs search(Query query, int max, Sort sort) throws IOException, InterruptedException {
IndexSearcher searcher = getIndexSearcher();
_indexSearcherSemaphore.acquire();
try {
if (sort != null) {
return searcher.search(query, null, max, sort);
} else {
return searcher.search(query, null, max);
}
} finally {
_indexSearcherSemaphore.release();
}
}
private Query rewrite(Query query) throws CorruptIndexException, IOException, InterruptedException {
IndexSearcher searcher = getIndexSearcher();
_indexSearcherSemaphore.acquire();
try {
return query.rewrite(searcher.getIndexReader());
} finally {
_indexSearcherSemaphore.release();
}
}
public org.apache.lucene.document.Document getDocument(int i) throws CorruptIndexException, IOException, InterruptedException {
IndexSearcher searcher = getIndexSearcher();
_indexSearcherSemaphore.acquire();
try {
return searcher.doc(i);
} finally {
_indexSearcherSemaphore.release();
}
}
public Explanation explain(Query query, int doc) throws CorruptIndexException, IOException, InterruptedException {
IndexSearcher searcher = getIndexSearcher();
_indexSearcherSemaphore.acquire();
try {
return searcher.explain(query, doc);
} finally {
_indexSearcherSemaphore.release();
}
}
public void contentStatusChanged(WGContentEvent event) {
}
}