Package org.sf.mustru.utils

Source Code of org.sf.mustru.utils.IndexTools

package org.sf.mustru.utils;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilePermission;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.Properties;

import org.apache.log4j.Logger;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.FSDirectory;
import org.eclipse.core.internal.runtime.HashMapOfString;
import org.sf.mustru.docs.IndexableDoc;

import com.sleepycat.je.Cursor;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.LockMode;
import com.sleepycat.je.OperationStatus;

/**
* A collection of tools to view the BDB database created for the index
*
*/
public class IndexTools extends Thread
{
static Logger logger = Logger.getLogger(IndexTools.class.getName());
private StringBuffer runOutput = new StringBuffer();
private String filename = "";
private boolean runRepair = false;
private boolean runSummary = false;
private boolean running = false;
 
public IndexTools(String filename) { this.filename = filename;  }

/**
  * Run one of the utilities - repair or summary
  */
public void run()
{
   running = true;
   runOutput = (runRepair) ? repairIndex(): summaryIndex();
   dumpToFile();
   running = false;
}

/**
  * Verify that the contents of the BDB database match the file system
  * @return StringBuffer containing log statements of the repair
  */
public StringBuffer repairIndex()
{
  StringBuffer out = new StringBuffer();

  //*-- scan the database by file name and check if the file name exists, otherwise
  //*-- delete the entry and keep track of the deletions
  DbTools dbt = Constants.getDbt();
  dbt.openDB(Constants.EXT_FILES_DB, false, false);
  Cursor cursor = null; int count = 0; int errors = 0;
  ArrayList<String> delFiles = new ArrayList<String>();
  IndexableDoc idoc = new IndexableDoc();
  try
  {
   cursor = dbt.getCurrentDB().openCursor(null, null);
   DatabaseEntry key = new DatabaseEntry();
   DatabaseEntry data = new DatabaseEntry();
   LOOP: while (cursor.getNext(key, data, LockMode.DEFAULT) == OperationStatus.SUCCESS)
   {
    //*-- check if the file name exists in the filesystem
    if (!running) break LOOP;
    String filename = new String( key.getData(), "UTF-8");
    File file = new File(filename);
    if (!file.exists())
    { out.append("ERROR: File " + filename + " does not exist" + Constants.NEWLINE);
    idoc = (IndexableDoc) idoc.getBdbBinding().entryToObject(data);

    //*-- remove the entry from the database
    if (!dbt.delete(filename))
     logger.error("Could not delete " + filename + " from the database");
    else
    { logger.info("Deleted " + filename + " from the database"); delFiles.add(filename); }
    errors++;
    } //*-- end of outer if
    ++count;
   } //*-- end of while

   //*-- clean up the Lucene index
   FSDirectory fsd = FSDirectory.getDirectory(new File(Constants.getINDEXDIR()), false);
   IndexReader ir = IndexReader.open(fsd);
   for (int i = 0; i < delFiles.size(); i++)
    ir.deleteDocuments(new Term("key", (String) delFiles.get(i)) );
   ir.close();
  }
  catch (DatabaseException dbe)
  { logger.error("Could not open cursor to browse File index" + dbe.getMessage() ); }
  catch (UnsupportedEncodingException ue)
  { logger.error("Could not decode key" + ue.getMessage() ); }
  catch (IOException ie)
  { logger.error("Could not read index directory" + ie.getMessage() ); }
  finally
  { try
  { if (cursor != null) cursor.close();
  dbt.closeDB();
  }
  catch (DatabaseException de) { logger.error("Ignore error"); }
  }

  out.append("Date: " + new Date().toString() + Constants.NEWLINE);
  out.append("Completed checking " + count + " files, found " + errors + " errors");
  return(out)
}

/*
  * Return a stringbuffer containing the types and numbers of files in the index
  */
public StringBuffer summaryIndex()
{
  //*-- initialize the hash
  Properties props = new Properties();
  try { props.load(new FileInputStream( Constants.DOCTYPES_FILE)); }
  catch ( IOException e) { logger.error("Could not open " + Constants.DOCTYPES_FILE + " " + e.getMessage()); }

  //*-- load the types of media from the docTypes properties file
  HashMapOfString stats = new HashMapOfString();
  String[] docTypes = new String[props.size()]; int j = 0;
  for (Enumeration keys = props.propertyNames(); keys.hasMoreElements(); )
  { String key = (String) keys.nextElement(); stats.put(key, "0" );
  docTypes[j++] = key; }

  //*-- scan the database by file name and keep track of the types of files in the index
  DbTools dbt = Constants.getDbt();  
  dbt.openDB(Constants.EXT_FILES_DB, true, false);
  Cursor cursor = null; IndexableDoc idoc = new IndexableDoc();
  try
  {
   cursor = dbt.getCurrentDB().openCursor(null, null);
   DatabaseEntry key = new DatabaseEntry();
   DatabaseEntry data = new DatabaseEntry(); int i = 0;
   LOOP: while (cursor.getNext(key, data, LockMode.DEFAULT) == OperationStatus.SUCCESS)
   {
    if (!running) break LOOP;
    idoc = (IndexableDoc) idoc.getBdbBinding().entryToObject(data);
    String docType = idoc.getFileType();
    if (docType == null) docType = "unknown";
    String num =  stats.get(docType);
    if ( num == null) { docType = "unknown"; num =  stats.get(docType); }
    int val =  Integer.parseInt(num) + 1;
   
    i++; if ( (i % 1000) == 0) System.out.println("finished " + i + " documents");
   
    stats.put(docType, String.valueOf(val) );
   } //*-- end of while

  } //*-- end of try
  catch (DatabaseException dbe)
  { logger.error("Could not open cursor to browse File index" + dbe.getMessage() ); }
  finally
  { try { if (cursor != null) cursor.close();  dbt.closeDB(); }
  catch (DatabaseException de) { logger.error("Ignore error"); }
  }

  StringBuffer out = new StringBuffer(); int numFiles = 0;
  int io;
  for (int i = 0; i < docTypes.length; i++)
  { io =   Integer.parseInt( stats.get( docTypes[i] ) );
  out.append("No. of " + docTypes[i] + " files: " + io + Constants.NEWLINE);
  numFiles += io;
  }
  out.append("Total no. of files: " + numFiles + Constants.NEWLINE)

  return(out)
}

//*-- dump the results from the buffer to a file
private synchronized void dumpToFile()
{
  PrintWriter pw = null; FileOutputStream fos = null;
  try
  {
   fos = new FileOutputStream(new File(filename));
   new FilePermission(filename, "read,execute");
   pw = new PrintWriter(fos);
   pw.println(runOutput);   
  }
  catch (IOException ie)
  { logger.error("Could not create file" + ie.getMessage()); }
  finally
  { try { if (pw != null) pw.close();
  if (fos != null)  { fos.flush(); fos.close(); }
  }
  catch (IOException ie) { logger.error("Ignore error"); }
  }
}

public boolean isRunRepair()
{ return runRepair; }

public void setRunRepair(boolean runRepair)
{ this.runRepair = runRepair; }

public boolean isRunSummary()
{ return runSummary; }

public void setRunSummary(boolean runSummary)
{ this.runSummary = runSummary; }

public StringBuffer getRunOutput()
{ return runOutput; }

public void setRunOutput(StringBuffer runOutput)
{ this.runOutput = runOutput; }

public  boolean isRunning()
{ return running; }

public void setRunning(boolean running)
{ this.running = running; }

}
TOP

Related Classes of org.sf.mustru.utils.IndexTools

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.