package com.redpiranha.server.input.index;
/*
* Copyright (C) 2001- 2004 Paul Browne, http://www.firstpartners.net,
*
* released under terms of the GPL license
* http://www.opensource.org/licenses/gpl-license.php
*
* This product includes software developed by the
* Apache Software Foundation (http://www.apache.org)."
*
*/
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Enumeration;
import java.util.*;
import java.io.*;
import com.redpiranha.common.util.data.*;
import com.redpiranha.server.input.index.*;
import com.redpiranha.server.input.index.data.*;
import com.redpiranha.server.input.index.exception.*;
import com.redpiranha.server.input.index.lucene.*;
import com.redpiranha.common.*;
import java.io.*;
import com.redpiranha.server.input.index.data.*;
/**
* Does the Hard work of Indexing
*
* @author Paul Browne , based on sample from Apache Lucene
*/
public class IndexAgent {
// Handle to logger
static Logger log = Logger.getLogger(IndexAgent.class);
/**
* Index Name that we are using - this is the Directory the Lucene index will be stored under
*/
public static final String INDEX_NAME = Constants.INDEX_DIR;
/**
* Index the files in the collection , stored under the Ints
*
* @param filesToIndex - Collection of Integers of the files to index
*
* @throws java.io.IOException - if Lucene cannot index
* @throws ClassNotFoundException - if we can't read file from the database
* @throws IndexException - if anything else goes wrong
* @throws FinderException - from underlying Meta Data search
* @throws LocatorException - from underlying Meta Data search
*/
public void indexFiles(Collection filesToIndex)
throws java.io.IOException, ClassNotFoundException, IndexException {
//Local Variables
FileDocument fileEntity = null;
DataContainer fileMetaData = null;
Document thisDocument = null;
int counter = 1;
Iterator loopList = filesToIndex.iterator();
//for debugging code
Enumeration resultKeywords = null;
Field thisField;
//Handle to the Index
IndexWriter writer =
new IndexWriter(INDEX_NAME, new RpAnalyzer(), false);
//false , uses existing index
//Debug info
if ((log.isDebugEnabled()) && (loopList == null)) {
log.debug("List of files to index was null");
}
// Loop and get index the files on the database
while (loopList.hasNext()) {
//Get the next set of meta data
fileEntity =(File) loopList.next();
log.debug("Indexed File Meta Data:\n" + fileMetaData);
log.debug("End Indexed File Meta Data");
thisDocument = RpDocumentBuilder.createDocument(fileMetaData);
writer.addDocument(thisDocument);
//debug code
log.debug("INDEXED DOCUMENT NUMBER:" + counter);
counter++;
//now loop through the keywords we added and log debug them
if (log.isDebugEnabled()) {
resultKeywords = thisDocument.fields();
while (resultKeywords.hasMoreElements()) {
thisField = (Field) (resultKeywords.nextElement());
log.debug(
"indexed: contains field name:"
+ thisField.name()
+ " isIndexed:"
+ thisField.isIndexed()
+ " isStored:"
+ thisField.isStored()
+ " isTokenized:"
+ thisField.isTokenized());
}
}
}
//Close off the writer
writer.optimize();
writer.close();
//Log the time it took
}
/**
* Get a set of Meta Data for this collection of file ID
*
* @param fileIds - collection of File Id's that we want the meta information for
*
* @return Enumeration of Item Entity Beans
*
* @throws LocatorException - if we cannot find the beans
* @throws FinderException - if we cannot find the beans
*/
private Enumeration getDocumentMetaData(Collection fileIds)
// throws LocatorException, FinderException
{
//Local Variables
// ItemEntityHome itemHome = LocalServiceLocator.getInstance().getLocalItemEntity();
Enumeration itemEnumeration = null;
//itemHome.findByMultipleFileId(fileIds);
return itemEnumeration;
}
/**
* Clears any previous index and generates the index for <Strong>all</Strong> the files stored on the DB
*
* @throws java.io.IOException - if Lucene cannot index
* @throws ClassNotFoundException - if we can't read file from the database
* @throws IndexException - if anything else goes wrong
*/
public void resetIndex()
throws java.io.IOException, ClassNotFoundException, IndexException {
//New Write clears index
log.debug("creating new index in:" + INDEX_NAME);
IndexWriter writer =
new IndexWriter(INDEX_NAME, new RpAnalyzer(), true);
//true creates new index
//Close off the writer
writer.optimize();
writer.close();
}
}