package dumplucene;
import java.io.*;
import java.util.Collection;
import java.util.Iterator;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.store.*;
import java.io.File;
import java.io.IOException;
import java.util.List;
import javax.xml.stream.FactoryConfigurationError;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.FSDirectory;
/**
* This class is used to write Lucene Indexed Data to the CSV file
* which can be used by any database or as for requirement of the user
* @author rahul & sudipto
*
*/
public class DumpLucene {
Document thisDoc;
static String indexDir = "";
static String file = "";
String str = "";
String tmpStr = "";
String tmpStrNew = "";
Directory dr;
private String dir;
/**
* Constructor of the class to initialize indexName & fileName
* And call method to process on Lucene Index
* @param indexName
* @param fileName
* @throws Exception
*/
public DumpLucene(String indexName, String fileName) throws Exception {
indexDir = indexName;
file = fileName;
dr = FSDirectory.open(new File(indexDir));
processCSV();
}
/**
* method to process data present in the Lucene Index
* And reterive data from the index write in the file given
* By the user
* @throws Exception
*/
public void processCSV() throws Exception {
IndexReader ind = IndexReader.open(dr);
int totalDocs = ind.numDocs();
Collection coll = ind.getFieldNames(IndexReader.FieldOption.ALL);
Iterator its = coll.iterator();
/**
* Reterive document field one by one
* from Lucene Based Index
*/
while (its.hasNext()) {
tmpStrNew = its.next().toString();
if (!"".equals(tmpStrNew)) {
try {
tmpStr = tmpStr + "\"" + tmpStrNew + "\",";
} catch (Exception e) {
tmpStr = tmpStr + ",";
}
} else {
tmpStr = tmpStr + ",";
}
}//End of while loop
/**
* Store fields in a string
*/
str = str + tmpStr.substring(0, tmpStr.length() - 1) + "\n";
/**
* Reterive Document value from lucene index
*/
for (int m = 0; m < totalDocs; m++) {
tmpStr = "";
try {
thisDoc = ind.document(m);
} catch (Exception e) {
continue;
}
its = coll.iterator();
while (its.hasNext()) {
tmpStrNew = its.next().toString();
if (!"".equals(tmpStrNew)) {
try {
tmpStr = tmpStr + "\"" + thisDoc.getValues(tmpStrNew)[0] + "\",";
} catch (Exception e) {
tmpStr = tmpStr + ",";
}
} else {
tmpStr = tmpStr + ",";
}
}//End of while loop
str = str + tmpStr.substring(0, tmpStr.length() - 1) + "\n";
}//End of for loop
/**
* Write values in a file given by user
*/
char buffer[] = new char[str.length()];
str.getChars(0, str.length(), buffer, 0);
try {
FileWriter f0 = new FileWriter(file);
for (int i = 0; i < buffer.length; i += 1) {
f0.write(buffer[i]);
}
f0.close();
} catch (Exception e) {
System.out.println(e.getMessage());
// TODO: handle exception
}
}//End of method
public static void main(String args[]) throws Exception {
/**
* Path of lucene-index
*/
//String indexDir = "C:\\Users\\Sony\\Documents\\NetBeansProjects\\trunk\\dumpLucene\\testindex";
//String indexDir = "C:\\Users\\Sony\\Documents\\NetBeansProjects\\apache-solr-3.3.0\\example\\solr\\data\\index";
String indexDir = "/home/dspace/solr/apache-solr-3.3.0/example/solr/data/index/";
/**
* Path of CSV file in which data of lucene index to be written
*/
String fileDir = "index.csv";
/**
* constructor call to process lucene index
*/
//DumpLucene lCSV = new DumpLucene(indexDir, fileDir);
DumpLucene ld = new DumpLucene(indexDir);
ld.dump();
}
public DumpLucene(String dir) {
this.dir = dir;
}
public void dump() throws XMLStreamException, FactoryConfigurationError,
CorruptIndexException, IOException {
XMLStreamWriter out = XMLOutputFactory.newInstance().createXMLStreamWriter(System.out);
IndexReader reader = IndexReader.open(FSDirectory.open(new File(dir)), true);
TermEnum terms = reader.terms();
while (terms.next()) { // && (terms.docFreq() > 2)) {
if (terms.docFreq() > 1000) {
System.out.print( terms.term().text());
System.out.print(" -- ");
System.out.println(terms.docFreq());
}
}
// out.writeStartDocument();
// out.writeStartElement("documents");
// for (int i = 0; i < reader.numDocs(); i++) {
// dumpDocument(reader.document(i), out);
// }
// out.writeEndElement();
// out.writeEndDocument();
//out.flush();
//reader.close();
}
@SuppressWarnings("unchecked")
private void dumpDocument(Document document, XMLStreamWriter out)
throws XMLStreamException {
out.writeStartElement("document");
for (Fieldable field : document.getFields()) {
out.writeStartElement("field");
out.writeAttribute("name", field.name());
out.writeAttribute("value", field.stringValue());
out.writeEndElement();
}
out.writeEndElement();
}
}//End of Class