Package org.sf.mustru.filters

Source Code of org.sf.mustru.filters.PsHandler

package org.sf.mustru.filters;

import java.io.File;
import java.io.FileOutputStream;
import java.io.FilePermission;
import java.io.IOException;
import java.io.PrintWriter;

import org.sf.mustru.docs.IndexableDoc;
//import org.sf.mustru.docs.TextDoc;
import org.sf.mustru.utils.*;

import org.apache.log4j.Logger;

/**
* Class to extract text from postscript files using ps2txt
*
*/
public class PsHandler implements HandlerInterface
{
  static Logger logger = Logger.getLogger(PsHandler.class.getName());
 
  /**
   * empty constructor
   */
  public PsHandler() { super(); }
 
  /**
   *  Run an independent process to run ps2txt from the command line
   *  @param ifile The name of the file to be indexed
   */
  public void getDocument(String ifile, IndexableDoc doc)
  {
    String bodyText = runps2txt(ifile);
    if (bodyText != null) { bodyText = StringTools.filterChars(bodyText); }
    doc.setContentsnew StringBuffer(bodyText) );
    doc.setFileType("text"); doc.setFileName(ifile);
    return;
  }
 
  /**
   * Run ps2txt to try and extract text from the postscript document
   * @param ifile postscript file
   * @return String Extracted text
   */
  private static synchronized String runps2txt(String ifile)
  {
   String outfile = Constants.PS2TXTDIR + File.separator + "run_ps2txt";
   createBatchFile(ifile, outfile);
   String[] cmdline = {outfile};
   String bodyText = ExecProgram.runProgram(cmdline);
   if (bodyText.length() > 0)
   { logger.info("ps2txt did extract text from " + ifile)}
   else
   { logger.warn("ps2txt did not extract text from " + ifile); }
   return(bodyText);
  }
  /**
   * @param ifile Name of postscript file to be indexed
   * @param outfile Name of the output batch file to be executed
   */
  private static void createBatchFile(String ifile, String outfile)
  {
   String permissions = "read,execute";
   PrintWriter pw = null; FileOutputStream fos = null;
   try
   {
    //*-- create the Linux script
    if (Constants.OSNAME.endsWith("x"))
    {
     String tfile = outfile + ".sh";
     fos = new FileOutputStream(new File(tfile));
     new FilePermission(tfile, permissions);
     pw = new PrintWriter(fos);
     pw.println("#!/bin/sh");
     pw.println("#*-- Generated Linux script to run ps2txt");
     pw.println("export HOME=\"" + Constants.PS2TXTDIR + "\"");
     pw.println("cd $HOME");
     pw.println("ps2txt -dvi \"" + ifile + "\"");
    }
    //*-- create the windows script
    else
    {
     String tfile = outfile + ".bat";
     fos = new FileOutputStream(new File(tfile));
     new FilePermission(tfile, permissions);
     pw = new PrintWriter(fos);
     pw.println("@ECHO OFF");
     pw.println("REM *-- Generated Windows script to run ps2txt");
     pw.println("set HOME=" + Constants.PS2TXTDIR);
     pw.println("set PATH=" + Constants.CYGWINDIR + ";%PATH%");
     ifile = ifile.replace('/', '\\');
     pw.println("%HOME%\\ps2txt.exe -dvi \"" + ifile + "\"");  
    }
   }
   catch (IOException ie)
   { logger.error("Could not create run ps2txt batch file" + ie.getMessage()); }
   finally
   { try { if (pw != null) pw.close();
   if (fos != null) { fos.flush(); fos.close(); } }
   catch (IOException ie) { logger.error("Ignore error"); } }
  }
}
TOP

Related Classes of org.sf.mustru.filters.PsHandler

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.