Package dumplucene

Source Code of dumplucene.DumpLucene

package dumplucene;

import java.io.*;
import java.util.Collection;
import java.util.Iterator;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.store.*;

import java.io.File;
import java.io.IOException;
import java.util.List;

import javax.xml.stream.FactoryConfigurationError;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.FSDirectory;

/**
* This class is used to write Lucene Indexed Data to the CSV file
* which can be used by any database or as for requirement of the user
* @author rahul & sudipto
*
*/
public class DumpLucene {

    Document thisDoc;
    static String indexDir = "";
    static String file = "";
    String str = "";
    String tmpStr = "";
    String tmpStrNew = "";
    Directory dr;
    private String dir;

    /**
     * Constructor of the class to initialize indexName & fileName
     * And call method to process on Lucene Index
     * @param indexName
     * @param fileName
     * @throws Exception
     */
    public DumpLucene(String indexName, String fileName) throws Exception {
        indexDir = indexName;
        file = fileName;
        dr = FSDirectory.open(new File(indexDir));

        processCSV();
    }

    /**
     * method to process data present in the Lucene Index
     * And reterive data from the index write in the file given
     * By the user
     * @throws Exception
     */
    public void processCSV() throws Exception {
        IndexReader ind = IndexReader.open(dr);
        int totalDocs = ind.numDocs();
        Collection coll = ind.getFieldNames(IndexReader.FieldOption.ALL);
        Iterator its = coll.iterator();
        /**
         * Reterive document field  one by one
         * from Lucene Based Index
         */
        while (its.hasNext()) {
            tmpStrNew = its.next().toString();
            if (!"".equals(tmpStrNew)) {
                try {
                    tmpStr = tmpStr + "\"" + tmpStrNew + "\",";
                } catch (Exception e) {
                    tmpStr = tmpStr + ",";
                }
            } else {
                tmpStr = tmpStr + ",";
            }
        }//End of while loop
        /**
         * Store fields in a string
         */
        str = str + tmpStr.substring(0, tmpStr.length() - 1) + "\n";
        /**
         * Reterive Document value from lucene index
         */
        for (int m = 0; m < totalDocs; m++) {
            tmpStr = "";
            try {
                thisDoc = ind.document(m);
            } catch (Exception e) {
                continue;
            }

            its = coll.iterator();
            while (its.hasNext()) {
                tmpStrNew = its.next().toString();
                if (!"".equals(tmpStrNew)) {
                    try {
                        tmpStr = tmpStr + "\"" + thisDoc.getValues(tmpStrNew)[0] + "\",";
                    } catch (Exception e) {
                        tmpStr = tmpStr + ",";
                    }
                } else {
                    tmpStr = tmpStr + ",";
                }
            }//End of while loop
            str = str + tmpStr.substring(0, tmpStr.length() - 1) + "\n";
        }//End of for loop
        /**
         * Write values in a file given by user
         */
        char buffer[] = new char[str.length()];
        str.getChars(0, str.length(), buffer, 0);
        try {
            FileWriter f0 = new FileWriter(file);
            for (int i = 0; i < buffer.length; i += 1) {
                f0.write(buffer[i]);
            }
            f0.close();
        } catch (Exception e) {
            System.out.println(e.getMessage());
            // TODO: handle exception
        }

    }//End of method

    public static void main(String args[]) throws Exception {
        /**
         * Path of lucene-index
         */
        //String indexDir = "C:\\Users\\Sony\\Documents\\NetBeansProjects\\trunk\\dumpLucene\\testindex";
        //String indexDir = "C:\\Users\\Sony\\Documents\\NetBeansProjects\\apache-solr-3.3.0\\example\\solr\\data\\index";
        String indexDir = "/home/dspace/solr/apache-solr-3.3.0/example/solr/data/index/";
        /**
         * Path of CSV file in which data of lucene index to be written
         */
        String fileDir = "index.csv";
        /**
         * constructor call to process lucene index
         */
        //DumpLucene lCSV = new DumpLucene(indexDir, fileDir);
        DumpLucene ld = new DumpLucene(indexDir);
        ld.dump();
       
    }

    public DumpLucene(String dir) {
        this.dir = dir;
    }

    public void dump() throws XMLStreamException, FactoryConfigurationError,
            CorruptIndexException, IOException {
        XMLStreamWriter out = XMLOutputFactory.newInstance().createXMLStreamWriter(System.out);

        IndexReader reader = IndexReader.open(FSDirectory.open(new File(dir)), true);
        TermEnum terms = reader.terms();
       
        while (terms.next()) { // &&  (terms.docFreq() > 2)) {
            if (terms.docFreq() > 1000) {
              System.out.print( terms.term().text());
              System.out.print(" -- ");
              System.out.println(terms.docFreq());
            }
        }
       
       
       // out.writeStartDocument();
       // out.writeStartElement("documents");
       // for (int i = 0; i < reader.numDocs(); i++) {
       //     dumpDocument(reader.document(i), out);
       // }
       // out.writeEndElement();
       // out.writeEndDocument();

        //out.flush();
        //reader.close();
    }

    @SuppressWarnings("unchecked")
    private void dumpDocument(Document document, XMLStreamWriter out)
            throws XMLStreamException {
        out.writeStartElement("document");
        for (Fieldable field :  document.getFields()) {
            out.writeStartElement("field");
            out.writeAttribute("name", field.name());
            out.writeAttribute("value", field.stringValue());
            out.writeEndElement();
        }
        out.writeEndElement();
    }
}//End of Class
TOP

Related Classes of dumplucene.DumpLucene

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.