Package edu.harvard.hul.ois.fits

Source Code of edu.harvard.hul.ois.fits.Fits

/*
* Copyright 2009 Harvard University Library
*
* This file is part of FITS (File Information Tool Set).
*
* FITS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* FITS is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with FITS.  If not, see <http://www.gnu.org/licenses/>.
*/
package edu.harvard.hul.ois.fits;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;

import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionGroup;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.XMLConfiguration;
import org.jdom.Document;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;

import edu.harvard.hul.ois.fits.consolidation.ToolOutputConsolidator;
import edu.harvard.hul.ois.fits.exceptions.FitsConfigurationException;
import edu.harvard.hul.ois.fits.exceptions.FitsException;

import edu.harvard.hul.ois.fits.mapping.FitsXmlMapper;
import edu.harvard.hul.ois.fits.tools.Tool;
import edu.harvard.hul.ois.fits.tools.ToolBelt;
import edu.harvard.hul.ois.fits.tools.ToolOutput;
import edu.harvard.hul.ois.ots.schemas.XmlContent.XmlContent;

public class Fits {
 
  public static String FITS_HOME;
  public static String FITS_XML;
  public static String FITS_TOOLS;
  public static XMLConfiguration config;
  public static FitsXmlMapper mapper;
  public static boolean validateToolOutput;
  public static String externalOutputSchema;
  public static String internalOutputSchema;
  public static final String XML_NAMESPACE = "http://hul.harvard.edu/ois/xml/ns/fits/fits_output";
 
  private ToolOutputConsolidator consolidator;
  private static XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance();
  private ToolBelt toolbelt;
 
  private static boolean traverseDirs;
 
  public Fits() throws FitsException {
    this(null);
  }
 
  public Fits(String fits_home) throws FitsConfigurationException {
    //Set BB_HOME dir with environment variable
    FITS_HOME = System.getenv("FITS_HOME");
    if(FITS_HOME == null) {
      //if env variable not set check for fits_home passed into constructor
      if(fits_home != null) {
        FITS_HOME = fits_home;
      }
      else {
        //if fits_home is still not set use the current directory
        FITS_HOME = "";
      }
    }
   
    //If fits home is not an empty string and doesn't send with a file separator character, add one
    if(FITS_HOME.length() > 0 && !FITS_HOME.endsWith(File.separator)) {
        FITS_HOME = FITS_HOME+File.separator;
    }
   
    FITS_XML = FITS_HOME+"xml"+File.separator;
    FITS_TOOLS = FITS_HOME+"tools"+File.separator;
   
    try {
      config = new XMLConfiguration(FITS_XML+"fits.xml");
    } catch (ConfigurationException e) {
      throw new FitsConfigurationException("Error reading "+FITS_XML+"fits.xml",e);
    }
    try {
      mapper = new FitsXmlMapper();
    } catch (Exception  e) {
      throw new FitsConfigurationException("Error creating FITS XML Mapper",e);
    }
    validateToolOutput = config.getBoolean("output.validate-tool-output");
    externalOutputSchema   = config.getString("output.external-output-schema");
    internalOutputSchema   = config.getString("output.internal-output-schema");
   
    String consolidatorClass = config.getString("output.dataConsolidator[@class]");
    try {
      Class<?> c = Class.forName(consolidatorClass);
      consolidator = (ToolOutputConsolidator)c.newInstance();
    }
    catch(Exception e) {
      throw new FitsConfigurationException("Error initializing "+consolidatorClass,e);
    }
   
    toolbelt = new ToolBelt(FITS_XML+"fits.xml");
   
  }
 
  public static void main(String[] args) throws FitsException, IOException, ParseException, XMLStreamException {
    Fits fits = new Fits();
   
    Options options = new Options();
    options.addOption("i",true, "input file or directory");
    options.addOption("r",false,"process directories recursively when -i is a directory ");
    options.addOption("o",true, "output file");
    options.addOption("h",false,"print this message");
    options.addOption("v",false,"print version information");
    OptionGroup outputOptions = new OptionGroup();
    Option stdxml = new Option("x",false,"convert FITS output to a standard metadata schema");
    Option combinedStd = new Option("xc",false,"output using a standard metadata schema and include FITS xml");
    outputOptions.addOption(stdxml);
    outputOptions.addOption(combinedStd);
    options.addOptionGroup(outputOptions);

    CommandLineParser parser = new GnuParser();
    CommandLine cmd = parser.parse(options, args);
   
    if(cmd.hasOption("h")) {
      fits.printHelp(options);
      System.exit(0);
    }
    if(cmd.hasOption("v")) {
      System.out.println(FitsOutput.VERSION);
      System.exit(0);
    }
    if(cmd.hasOption("r")) {
      traverseDirs = true;
    }
    else {
      traverseDirs = false;
    }
   
    if(cmd.hasOption("i")) {
      String input = cmd.getOptionValue("i")
      File inputFile = new File(input);
     
      if(inputFile.isDirectory()) {
        String outputDir = cmd.getOptionValue("o");
        if(outputDir == null || !(new File(outputDir).isDirectory())) {
          throw new FitsException("When FITS is run in directory processing mode the output location must be a diretory");
        }
        fits.doDirectory(inputFile,new File(outputDir),cmd.hasOption("x"),cmd.hasOption("xc"));
      }
      else {
        FitsOutput result = fits.doSingleFile(inputFile);
        if(result != null) {
          fits.outputResults(result,cmd.getOptionValue("o"),cmd.hasOption("x"),cmd.hasOption("xc"),false);
        }
      }
    }
    else {
      System.err.println("Invalid CLI options");
      fits.printHelp(options);
      System.exit(-1);
    }
     

   
    System.exit(0);
  }
 
  /**
   * Recursively processes all files in the directory.
   * @param intputFile
   * @param useStandardSchemas
   * @throws IOException
   * @throws XMLStreamException
   * @throws FitsException
   */
  private void doDirectory(File inputDir, File outputDir, boolean useStandardSchemas, boolean standardCombinedFormat) throws FitsException, XMLStreamException, IOException {
    if(inputDir.listFiles() == null) {
      return;
    }
   
    for(File f : inputDir.listFiles()) {
     
      if(f == null || !f.exists() || !f.canRead()) {
        continue;
      }
     
      System.out.println("processing " + f.getPath());
      if(f.isDirectory() && traverseDirs) {
        doDirectory(f, outputDir, useStandardSchemas,standardCombinedFormat);
      }
      else if(f.isFile()) {
        FitsOutput result = doSingleFile(f);
        String outputFile = outputDir.getPath() + File.separator + f.getName() + ".fits.xml";
        File output = new File(outputFile);
        if(output.exists()) {
          int cnt = 1;
          while(true) {
            outputFile = outputDir.getPath() + File.separator + f.getName() + "-" + cnt + ".fits.xml";
            output = new File(outputFile);
            if(!output.exists()) {
              break;
            }
            cnt++;
          }
        }
        outputResults(result,outputFile,useStandardSchemas,standardCombinedFormat,true);
      }
      else if(!f.canRead()) {
        System.out.println("warning: cannot read " + f.getPath());
      }
    }
  }
 
 
  /**
   * processes a single file and outputs to the provided output location. Outputs to
   * standard out if outputLocation is null
   * @param inputFile
   * @param outputLocation
   * @param useStandardSchemas - use standard schemas if available for output type
   * @throws FitsException
   * @throws XMLStreamException
   * @throws IOException
   */
  private FitsOutput doSingleFile(File inputFile) throws FitsException, XMLStreamException, IOException {
    if(!inputFile.canRead()) {
      System.out.println("warning: cannot read " + inputFile.getPath());
      return null;
    }
   
    FitsOutput result = this.examine(inputFile)
    if(result.getCaughtExceptions().size() > 0) {
      for(Exception e: result.getCaughtExceptions()) {
        System.err.println("Warning: " + e.getMessage());
      }
    }
    return result;
  }
 
  private void outputResults(FitsOutput result, String outputLocation, boolean standardSchema, boolean standardCombinedFormat, boolean dirMode) throws XMLStreamException, IOException, FitsException {
    OutputStream out = null;
    try
        //figure out the output location
      if(outputLocation != null) {
        out = new FileOutputStream(outputLocation);
      }
      else if(!dirMode) {   
        out = System.out;
      }
      else {
        throw new FitsException("The output location must be provided when running FITS in directory mode");
      }
     
      //if -x is set, then convert to standard metadata schema and output to -o
      if(standardSchema) {
        outputStandardSchemaXml(result,out);
      }
      //if we are using -xc output FITS xml and standard format
      else if(standardCombinedFormat) {
        outputStandardCombinedFormat(result,out);
      }
      //else output FITS XML to -o
      else {
        Document doc = result.getFitsXml();
        XMLOutputter serializer = new XMLOutputter(Format.getPrettyFormat());
        serializer.output(doc, out);
      }
   
    }
    finally {
      if(out != null) {
        out.close();
      }
    }
  }
 
  public static void outputStandardCombinedFormat(FitsOutput result, OutputStream out) throws XMLStreamException, IOException, FitsException {
    //add the normal fits xml output
    result.addStandardCombinedFormat();
   
    //output the merged JDOM Document
    XMLOutputter serializer = new XMLOutputter(Format.getPrettyFormat());
    serializer.output(result.getFitsXml(), out);

  }
 
  public static void outputStandardSchemaXml(FitsOutput fitsOutput, OutputStream out) throws XMLStreamException, IOException {
    XmlContent xml = fitsOutput.getStandardXmlContent();
   
    //create an xml output factory
      Transformer transformer = null;
     
      //initialize transformer for pretty print xslt
      TransformerFactory tFactory = TransformerFactory.newInstance ();
      String prettyPrintXslt = FITS_XML+"prettyprint.xslt";
      try {
      Templates template = tFactory.newTemplates(new StreamSource(prettyPrintXslt));     
      transformer = template.newTransformer();
      }
      catch(Exception e) {
        transformer = null;
      }
     
    if(xml != null && transformer != null) {

      xml.setRoot(true)
      ByteArrayOutputStream xmlOutStream = new ByteArrayOutputStream();
      OutputStream xsltOutStream = new ByteArrayOutputStream();
     
      try {
        //send standard xml to the output stream
        XMLStreamWriter sw = xmlOutputFactory.createXMLStreamWriter(xmlOutStream);
        xml.output(sw);
       
        //convert output stream to byte array and read back in as inputstream
        Source source = new StreamSource(new ByteArrayInputStream(xmlOutStream.toByteArray()));
        Result rstream = new StreamResult(xsltOutStream);
       
        //apply the xslt
        transformer.transform(source,rstream);
       
        //send to the providedOutpuStream
        out.write(xsltOutStream.toString().getBytes("UTF-8"));
        out.flush();
       
      } catch (Exception e) {
        System.err.println("error converting output to a standard schema format: " + e.getMessage());
      }
      finally {
        xmlOutStream.close();
        xsltOutStream.close();
      }     
     
    }
    else {
      System.err.println("Error: output cannot be converted to a standard schema format for this file");
    }
  }
 
  private void printHelp(Options opts) {
    HelpFormatter formatter = new HelpFormatter();
    formatter.printHelp("fits", opts );
  }
 
  /* ORIGINAL EXAMINE METHOD WITHOUT THREADS
  
  public FitsOutput examineOriginal(File input) throws FitsException { 
    if(!input.exists()) {
      throw new FitsConfigurationException(input+" does not exist or is not readable");
    }
       
    List<ToolOutput> toolResults = new ArrayList<ToolOutput>();
   
    //run file through each tool, catching exceptions thrown by tools
    List<Exception> caughtExceptions = new ArrayList<Exception>();
    String path = input.getPath().toLowerCase();
    String ext = path.substring(path.lastIndexOf(".")+1);
    for(Tool t : toolbelt.getTools()) {     
      if(t.isEnabled()) {     
        if(!t.hasExcludedExtension(ext)) {
          try {
            ToolOutput tOutput = t.extractInfo(input);
            toolResults.add(tOutput);
          }
          catch(Exception e) {
            caughtExceptions.add(e);
          }
        }
      }
    }

   
    // consolidate the results into a single DOM
    FitsOutput result = consolidator.processResults(toolResults);
    result.setCaughtExceptions(caughtExceptions);
   
    for(Tool t: toolbelt.getTools()) {
      t.resetOutput();
    }
   
    return result; 
  }
  */
 
  public FitsOutput examine(File input) throws FitsException
    if(!input.exists()) {
      throw new FitsConfigurationException(input+" does not exist or is not readable");
    }
       
    List<ToolOutput> toolResults = new ArrayList<ToolOutput>();
   
    //run file through each tool, catching exceptions thrown by tools
    List<Exception> caughtExceptions = new ArrayList<Exception>();
    String path = input.getPath().toLowerCase();
    String ext = path.substring(path.lastIndexOf(".")+1);
   
    ArrayList<Thread> threads = new ArrayList<Thread>();
    for(Tool t : toolbelt.getTools()) {     
      if(t.isEnabled()) {
        //Only run the tool against this file if:
        //  The tool uses an 'include-ext' extentions list and it contains the extension
        //  or if the tool does not use the 'include-ext' extension list and the extension is not in the 'exclude-ext' list
        if((t.hasIncludedExtensions() && t.hasIncludedExtension(ext)) || (!t.hasIncludedExtensions() && !t.hasExcludedExtension(ext))) {
          //spin up new threads
          t.setInputFile(input);
          Thread thread = new Thread(t);
          threads.add(thread);
          thread.start();
        }
      }
    }
   
    //wait for them all to finish
    for(Thread thread : threads) {
      try {
        thread.join();
      } catch (InterruptedException e) {
        e.printStackTrace();
      }
    }
   
    //get all output from the tools
    for(Tool t: toolbelt.getTools()) {
      toolResults.add(t.getOutput());
    }
   
    // consolidate the results into a single DOM
    FitsOutput result = consolidator.processResults(toolResults);
    result.setCaughtExceptions(caughtExceptions);
   
    for(Tool t: toolbelt.getTools()) {
      t.resetOutput();
    }
   
    return result; 
  }
 
  public ToolBelt getToolbelt() {
    return toolbelt;
  }

}
TOP

Related Classes of edu.harvard.hul.ois.fits.Fits

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.