/**
* Bobo Browse Engine - High performance faceted/parametric search implementation
* that handles various types of semi-structured data. Written in Java.
*
* Copyright (C) 2005-2006 John Wang
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* To contact the project administrators for the bobo-browse project,
* please go to https://sourceforge.net/projects/bobo-browse/, or
* send mail to owner@browseengine.com.
*/
package com.browseengine.bobo.api;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.ReaderUtil;
import org.springframework.context.support.FileSystemXmlApplicationContext;
import org.springframework.context.support.GenericApplicationContext;
import com.browseengine.bobo.facets.FacetHandler;
import com.browseengine.bobo.facets.RuntimeFacetHandlerFactory;
/**
* bobo browse index reader
*
*/
public class BoboIndexReader extends FilterIndexReader
{
private static final String SPRING_CONFIG = "bobo.spring";
private static Logger logger = Logger.getLogger(BoboIndexReader.class);
protected Map<String, FacetHandler<?>> _facetHandlerMap;
protected Collection<FacetHandler<?>> _facetHandlers;
protected Collection<RuntimeFacetHandlerFactory<?,?>> _runtimeFacetHandlerFactories;
protected Map<String,RuntimeFacetHandlerFactory<?,?>> _runtimeFacetHandlerFactoryMap;
protected WorkArea _workArea;
protected IndexReader _srcReader;
protected BoboIndexReader[] _subReaders = null;
protected int[] _starts = null;
private Directory _dir = null;
private final Map<String,Object> _facetDataMap = new HashMap<String,Object>();
private final ThreadLocal<Map<String,Object>> _runtimeFacetDataMap = new ThreadLocal<Map<String,Object>>()
{
protected Map<String,Object> initialValue() { return new HashMap<String,Object>(); }
};
/**
* Constructor
*
* @param reader
* Index reader
* @throws IOException
*/
public static BoboIndexReader getInstance(IndexReader reader) throws IOException
{
return BoboIndexReader.getInstance(reader, null, null, new WorkArea());
}
public static BoboIndexReader getInstance(IndexReader reader, WorkArea workArea) throws IOException
{
return BoboIndexReader.getInstance(reader, null, null, workArea);
}
/**
* Constructor.
*
* @param reader
* index reader
* @param facetHandlers
* List of facet handlers
* @throws IOException
*/
public static BoboIndexReader getInstance(IndexReader reader,
Collection<FacetHandler<?>> facetHandlers,
Collection<RuntimeFacetHandlerFactory<?,?>> facetHandlerFactories) throws IOException
{
return BoboIndexReader.getInstance(reader, facetHandlers, facetHandlerFactories, new WorkArea());
}
public static BoboIndexReader getInstance(IndexReader reader,
Collection<FacetHandler<?>> facetHandlers) throws IOException
{
return BoboIndexReader.getInstance(reader, facetHandlers, Collections.EMPTY_LIST, new WorkArea());
}
public static BoboIndexReader getInstance(IndexReader reader,
Collection<FacetHandler<?>> facetHandlers,
Collection<RuntimeFacetHandlerFactory<?,?>> facetHandlerFactories,
WorkArea workArea) throws IOException
{
BoboIndexReader boboReader = new BoboIndexReader(reader, facetHandlers, facetHandlerFactories, workArea);
boboReader.facetInit();
return boboReader;
}
public static BoboIndexReader getInstanceAsSubReader(IndexReader reader) throws IOException
{
return getInstanceAsSubReader(reader, null, null, new WorkArea());
}
public static BoboIndexReader getInstanceAsSubReader(IndexReader reader,
Collection<FacetHandler<?>> facetHandlers,
Collection<RuntimeFacetHandlerFactory<?,?>> facetHandlerFactories) throws IOException
{
return getInstanceAsSubReader(reader, facetHandlers, facetHandlerFactories, new WorkArea());
}
public static BoboIndexReader getInstanceAsSubReader(IndexReader reader,
Collection<FacetHandler<?>> facetHandlers,
Collection<RuntimeFacetHandlerFactory<?,?>> facetHandlerFactories,
WorkArea workArea) throws IOException
{
BoboIndexReader boboReader = new BoboIndexReader(reader, facetHandlers, facetHandlerFactories, workArea, false);
boboReader.facetInit();
return boboReader;
}
@Override
public long getVersion() {
try {
SegmentInfos sinfos = new SegmentInfos();
sinfos.read(_dir);
return sinfos.getVersion();
} catch (Exception e) {
return 0L;
}
}
public IndexReader getInnerReader()
{
return in;
}
@Override
public synchronized IndexReader reopen() throws CorruptIndexException,
IOException {
IndexReader newInner = null;
SegmentInfos sinfos = new SegmentInfos();
sinfos.read(_dir);
int size = sinfos.size();
if (in instanceof MultiReader){
// setup current reader list
List<IndexReader> boboReaderList = new LinkedList<IndexReader>();
ReaderUtil.gatherSubReaders((List<IndexReader>)boboReaderList, in);
Map<String,BoboIndexReader> readerMap = new HashMap<String,BoboIndexReader>();
for (IndexReader reader : boboReaderList){
BoboIndexReader boboReader = (BoboIndexReader)reader;
SegmentReader sreader = (SegmentReader)(boboReader.in);
readerMap.put(sreader.getSegmentName(),boboReader);
}
ArrayList<BoboIndexReader> currentReaders = new ArrayList<BoboIndexReader>(size);
boolean isNewReader = false;
for (int i=0;i<size;++i){
SegmentInfo sinfo = (SegmentInfo)sinfos.get(i);
BoboIndexReader breader = readerMap.remove(sinfo.name);
if (breader!=null){
// should use SegmentReader.reopen
// TODO: see LUCENE-2559
BoboIndexReader newReader = (BoboIndexReader)breader.reopen(true);
if (newReader!=breader){
isNewReader = true;
}
if (newReader!=null){
currentReaders.add(newReader);
}
}
else{
isNewReader = true;
SegmentReader newSreader = SegmentReader.get(true, sinfo, 1);
breader = BoboIndexReader.getInstanceAsSubReader(newSreader,this._facetHandlers,this._runtimeFacetHandlerFactories);
breader._dir = _dir;
currentReaders.add(breader);
}
}
isNewReader = isNewReader || (readerMap.size() != 0);
if (!isNewReader){
return this;
}
else{
MultiReader newMreader = new MultiReader(currentReaders.toArray(new BoboIndexReader[currentReaders.size()]),false);
BoboIndexReader newReader = BoboIndexReader.getInstanceAsSubReader(newMreader,this._facetHandlers,this._runtimeFacetHandlerFactories);
newReader._dir = _dir;
return newReader;
}
}
else if (in instanceof SegmentReader){
// should use SegmentReader.reopen
// TODO: see LUCENE-2559
SegmentReader sreader = (SegmentReader)in;
int numDels = sreader.numDeletedDocs();
SegmentInfo sinfo = null;
boolean sameSeg = false;
//get SegmentInfo instance
for (int i=0;i<size;++i){
SegmentInfo sinfoTmp = (SegmentInfo)sinfos.get(i);
if (sinfoTmp.name.equals(sreader.getSegmentName())){
int numDels2 = sinfoTmp.getDelCount();
sameSeg = numDels==numDels2;
sinfo = sinfoTmp;
break;
}
}
if (sinfo == null){
// segment no longer exists
return null;
}
if (sameSeg){
return this;
}
else{
SegmentReader newSreader = SegmentReader.get(true, sinfo, 1);
return BoboIndexReader.getInstanceAsSubReader(newSreader,this._facetHandlers,this._runtimeFacetHandlerFactories);
}
}
else{
// should not reach here, a catch-all default case
IndexReader reader = in.reopen(true);
if (in!=reader){
return BoboIndexReader.getInstance(newInner, _facetHandlers, _runtimeFacetHandlerFactories, _workArea);
}
else{
return this;
}
}
}
@Override
public synchronized IndexReader reopen(boolean openReadOnly)
throws CorruptIndexException, IOException {
// bobo readers are always readonly
return reopen();
}
public Object getFacetData(String name){
return _facetDataMap.get(name);
}
public Object putFacetData(String name,Object data){
return _facetDataMap.put(name, data);
}
public Object getRuntimeFacetData(String name)
{
Map<String,Object> map = _runtimeFacetDataMap.get();
if(map == null) return null;
return map.get(name);
}
public Object putRuntimeFacetData(String name,Object data)
{
Map<String,Object> map = _runtimeFacetDataMap.get();
if(map == null)
{
map = new HashMap<String,Object>();
_runtimeFacetDataMap.set(map);
}
return map.put(name, data);
}
public void clearRuntimeFacetData()
{
_runtimeFacetDataMap.set(null);
}
@Override
protected void doClose() throws IOException
{
_facetDataMap.clear();
if(_srcReader != null) _srcReader.close();
super.doClose();
}
@Override
protected void doCommit(Map commitUserData) throws IOException
{
if(_srcReader != null) _srcReader.flush(commitUserData);
}
@Override
protected void doDelete(int n) throws CorruptIndexException, IOException
{
if(_srcReader != null) _srcReader.deleteDocument(n);
}
private void loadFacetHandler(String name,
Set<String> loaded,
Set<String> visited,
WorkArea workArea) throws IOException
{
FacetHandler<?> facetHandler = _facetHandlerMap.get(name);
if (facetHandler != null && !loaded.contains(name))
{
visited.add(name);
Set<String> dependsOn = facetHandler.getDependsOn();
if (dependsOn.size() > 0)
{
Iterator<String> iter = dependsOn.iterator();
while (iter.hasNext())
{
String f = iter.next();
if (name.equals(f))
continue;
if (!loaded.contains(f))
{
if (visited.contains(f))
{
throw new IOException("Facet handler dependency cycle detected, facet handler: "
+ name + " not loaded");
}
loadFacetHandler(f, loaded, visited, workArea);
}
if (!loaded.contains(f))
{
throw new IOException("unable to load facet handler: " + f);
}
facetHandler.putDependedFacetHandler(_facetHandlerMap.get(f));
}
}
long start = System.currentTimeMillis();
facetHandler.loadFacetData(this, workArea);
long end = System.currentTimeMillis();
if (logger.isDebugEnabled()){
StringBuffer buf = new StringBuffer();
buf.append("facetHandler loaded: ").append(name).append(", took: ").append(end-start).append(" ms");
logger.debug(buf.toString());
}
loaded.add(name);
}
}
private void loadFacetHandlers(WorkArea workArea, Set<String> toBeRemoved)
{
Set<String> loaded = new HashSet<String>();
Set<String> visited = new HashSet<String>();
for(String name : _facetHandlerMap.keySet())
{
try
{
loadFacetHandler(name, loaded, visited, workArea);
}
catch (Exception ioe)
{
toBeRemoved.add(name);
logger.error("facet load failed: " + name + ": " + ioe.getMessage(), ioe);
}
}
for(String name : toBeRemoved)
{
_facetHandlerMap.remove(name);
}
}
/**
* Find all the leaf sub-readers and wrap each in BoboIndexReader.
* @param reader
* @param workArea
* @return
* @throws IOException
*/
private static IndexReader[] createSubReaders(IndexReader reader, WorkArea workArea) throws IOException
{
List<IndexReader> readerList = new ArrayList<IndexReader>();
ReaderUtil.gatherSubReaders(readerList, reader);
IndexReader[] subReaders = (IndexReader[])readerList.toArray(new IndexReader[readerList.size()]);
BoboIndexReader[] boboReaders;
if(subReaders != null && subReaders.length > 0)
{
boboReaders = new BoboIndexReader[subReaders.length];
for(int i = 0; i < subReaders.length; i++)
{
boboReaders[i] = new BoboIndexReader(subReaders[i], null, null, workArea, false);
}
}
else
{
boboReaders = new BoboIndexReader[]{ new BoboIndexReader(reader, null, null, workArea, false) };
}
return boboReaders;
}
@Override
public Directory directory()
{
return (_subReaders != null ? _subReaders[0].directory() : super.directory());
}
private static Collection<FacetHandler<?>> loadFromIndex(File file,WorkArea workArea) throws IOException
{
// File springFile = new File(file, SPRING_CONFIG);
// FileSystemXmlApplicationContext appCtx =
// new FileSystemXmlApplicationContext("file:" + springFile.getAbsolutePath());
//return (Collection<FacetHandler<?>>) appCtx.getBean("handlers");
Set<Entry<Class<?>,Object>> entries = workArea.map.entrySet();
FileSystemXmlApplicationContext appCtx = new FileSystemXmlApplicationContext();
for (Entry<Class<?>,Object> entry : entries){
Object obj = entry.getValue();
if (obj instanceof ClassLoader){
appCtx.setClassLoader((ClassLoader)obj);
break;
}
}
String absolutePath = file.getAbsolutePath();
String partOne = absolutePath.substring(0, absolutePath.lastIndexOf("/"));
String partTwo = URLEncoder.encode(absolutePath.substring(absolutePath.lastIndexOf("/") + 1), "UTF-8");
absolutePath = partOne + "/" + partTwo;
File springFile = new File(new File(absolutePath), SPRING_CONFIG);
appCtx.setConfigLocation("file:" + springFile.getAbsolutePath());
appCtx.refresh();
return (Collection<FacetHandler<?>>) appCtx.getBean("handlers");
}
protected void initialize(Collection<FacetHandler<?>> facetHandlers) throws IOException
{
if (facetHandlers == null) // try to load from index
{
Directory idxDir = directory();
if (idxDir != null && idxDir instanceof FSDirectory)
{
FSDirectory fsDir = (FSDirectory) idxDir;
File file = fsDir.getFile();
if (new File(file, SPRING_CONFIG).exists())
{
facetHandlers = loadFromIndex(file,_workArea);
}
else
{
facetHandlers = new ArrayList<FacetHandler<?>>();
}
}
else
{
facetHandlers = new ArrayList<FacetHandler<?>>();
}
}
_facetHandlers = facetHandlers;
_facetHandlerMap = new HashMap<String, FacetHandler<?>>();
for (FacetHandler<?> facetHandler : facetHandlers)
{
_facetHandlerMap.put(facetHandler.getName(), facetHandler);
}
}
/**
*
* @param reader
* @param facetHandlers
* @param facetHandlerFactories
* @param workArea
* @throws IOException
*/
protected BoboIndexReader(IndexReader reader,
Collection<FacetHandler<?>> facetHandlers,
Collection<RuntimeFacetHandlerFactory<?,?>> facetHandlerFactories,
WorkArea workArea) throws IOException
{
this(reader, facetHandlers, facetHandlerFactories, workArea, true);
_srcReader = reader;
}
/**
* @param reader
* @param facetHandlers
* @param facetHandlerFactories
* @param workArea
* @param useSubReaders true => we create a MultiReader of all the leaf sub-readers as
* the inner reader. false => we use the given reader as the inner reader.
* @throws IOException
*/
protected BoboIndexReader(IndexReader reader,
Collection<FacetHandler<?>> facetHandlers,
Collection<RuntimeFacetHandlerFactory<?,?>> facetHandlerFactories,
WorkArea workArea,
boolean useSubReaders) throws IOException
{
super(useSubReaders ? new MultiReader(createSubReaders(reader, workArea), false) : reader);
if(useSubReaders)
{
_dir = reader.directory();
BoboIndexReader[] subReaders = (BoboIndexReader[])in.getSequentialSubReaders();
if(subReaders != null && subReaders.length > 0)
{
_subReaders = subReaders;
int maxDoc = 0;
_starts = new int[_subReaders.length + 1];
for (int i = 0; i < _subReaders.length; i++)
{
_subReaders[i]._dir = _dir;
if(facetHandlers != null) _subReaders[i].setFacetHandlers(facetHandlers);
_starts[i] = maxDoc;
maxDoc += _subReaders[i].maxDoc();
}
_starts[_subReaders.length] = maxDoc;
}
}
_runtimeFacetHandlerFactories = facetHandlerFactories;
_runtimeFacetHandlerFactoryMap = new HashMap<String,RuntimeFacetHandlerFactory<?,?>>();
if (_runtimeFacetHandlerFactories!=null)
{
for(RuntimeFacetHandlerFactory<?,?> factory : _runtimeFacetHandlerFactories)
{
_runtimeFacetHandlerFactoryMap.put(factory.getName(), factory);
}
}
_facetHandlers = facetHandlers;
_workArea = workArea;
}
protected void facetInit() throws IOException
{
facetInit(new HashSet<String>());
}
protected void facetInit(Set<String> toBeRemoved) throws IOException
{
initialize(_facetHandlers);
if(_subReaders == null)
{
loadFacetHandlers(_workArea, toBeRemoved);
}
else
{
for(BoboIndexReader r : _subReaders)
{
r.facetInit(toBeRemoved);
}
for(String name : toBeRemoved)
{
_facetHandlerMap.remove(name);
}
}
}
protected void setFacetHandlers(Collection<FacetHandler<?>> facetHandlers)
{
_facetHandlers = facetHandlers;
}
/**
* @deprecated use {@link org.apache.lucene.search.MatchAllDocsQuery} instead.
* @return query that matches all docs in the index
*/
public Query getFastMatchAllDocsQuery()
{
return new MatchAllDocsQuery();
}
/**
* Utility method to dump out all fields (name and terms) for a given index.
*
* @param outFile
* File to dump to.
* @throws IOException
*/
public void dumpFields(File outFile) throws IOException
{
FileWriter writer = null;
try
{
writer = new FileWriter(outFile);
PrintWriter out = new PrintWriter(writer);
Set<String> fieldNames = getFacetNames();
for (String fieldName : fieldNames)
{
TermEnum te = terms(new Term(fieldName, ""));
out.write(fieldName + ":\n");
while (te.next())
{
Term term = te.term();
if (!fieldName.equals(term.field()))
{
break;
}
out.write(term.text() + "\n");
}
out.write("\n\n");
}
}
finally
{
if (writer != null)
{
writer.close();
}
}
}
/**
* Gets all the facet field names
*
* @return Set of facet field names
*/
public Set<String> getFacetNames()
{
return _facetHandlerMap.keySet();
}
/**
* Gets a facet handler
*
* @param fieldname
* name
* @return facet handler
*/
public FacetHandler<?> getFacetHandler(String fieldname)
{
return _facetHandlerMap.get(fieldname);
}
@Override
public IndexReader[] getSequentialSubReaders() {
return _subReaders;
}
/**
* Gets the facet handler map
*
* @return facet handler map
*/
public Map<String, FacetHandler<?>> getFacetHandlerMap()
{
return _facetHandlerMap;
}
/**
* @return the map of RuntimeFacetHandlerFactories
*/
public Map<String,RuntimeFacetHandlerFactory<?, ?>> getRuntimeFacetHandlerFactoryMap()
{
return _runtimeFacetHandlerFactoryMap;
}
public void rewrap(IndexReader in){
if(_subReaders != null)
{
throw new UnsupportedOperationException("this BoboIndexReader has subreaders");
}
super.in = in;
}
@Override
public Document document(int docid) throws IOException
{
if(_subReaders != null)
{
int readerIndex = readerIndex(docid, _starts, _subReaders.length);
BoboIndexReader subReader = _subReaders[readerIndex];
return subReader.document(docid - _starts[readerIndex]);
}
else
{
Document doc = super.document(docid);
Collection<FacetHandler<?>> facetHandlers = _facetHandlerMap.values();
for (FacetHandler<?> facetHandler : facetHandlers)
{
String[] vals = facetHandler.getFieldValues(this,docid);
if (vals != null)
{
String[] values = doc.getValues(facetHandler.getName());
Set<String> storedVals = new HashSet<String>(Arrays.asList(values));
for (String val : vals)
{
storedVals.add(val);
}
doc.removeField(facetHandler.getName());
for (String val : storedVals){
doc.add(new Field(facetHandler.getName(),
val,
Field.Store.NO,
Field.Index.NOT_ANALYZED));
}
}
}
return doc;
}
}
private static int readerIndex(int n, int[] starts, int numSubReaders)
{
int lo = 0;
int hi = numSubReaders - 1;
while (hi >= lo)
{
int mid = (lo + hi) >>> 1;
int midValue = starts[mid];
if (n < midValue)
hi = mid - 1;
else if (n > midValue)
lo = mid + 1;
else
{
while (mid+1 < numSubReaders && starts[mid+1] == midValue)
{
mid++;
}
return mid;
}
}
return hi;
}
/**
* Work area for loading
*/
public static class WorkArea
{
HashMap<Class<?>, Object> map = new HashMap<Class<?>, Object>();
@SuppressWarnings("unchecked")
public <T> T get(Class<T> cls)
{
T obj = (T) map.get(cls);
return obj;
}
public void put(Object obj)
{
map.put(obj.getClass(), obj);
}
public void clear()
{
map.clear();
}
@Override
public String toString(){
return map.toString();
}
}
private BoboIndexReader(IndexReader in)
{
super(in);
}
public BoboIndexReader copy(IndexReader in)
{
if(_subReaders != null)
{
throw new UnsupportedOperationException("this BoboIndexReader has subreaders");
}
BoboIndexReader copy = new BoboIndexReader(in);
copy._facetHandlerMap = this._facetHandlerMap;
copy._facetHandlers = this._facetHandlers;
copy._runtimeFacetHandlerFactories = this._runtimeFacetHandlerFactories;
copy._runtimeFacetHandlerFactoryMap = this._runtimeFacetHandlerFactoryMap;
copy._workArea = this._workArea;
copy._facetDataMap.putAll(this._facetDataMap);
copy._srcReader = in;
copy._starts = this._starts;
return copy;
}
}