/*
* Copyright (C) Chaperon. All rights reserved.
* -------------------------------------------------------------------------
* This software is published under the terms of the Apache Software License
* version 1.1, a copy of which has been included with this distribution in
* the LICENSE file.
*/
package net.sourceforge.chaperon.ant;
import net.sourceforge.chaperon.build.*;
import net.sourceforge.chaperon.model.grammar.*;
import net.sourceforge.chaperon.model.lexicon.*;
import net.sourceforge.chaperon.process.*;
import org.apache.tools.ant.*;
import org.apache.tools.ant.taskdefs.MatchingTask;
import org.apache.tools.ant.types.Mapper;
import org.apache.tools.ant.types.XMLCatalog;
import org.apache.tools.ant.util.FileNameMapper;
import org.apache.tools.ant.util.IdentityMapper;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import java.io.*;
import java.util.Properties;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
/**
* A ant task for parsing text files
*
* @author <a href="mailto:stephan@apache.org">Stephan Michels </a>
* @version CVS $Id: ParserTask.java,v 1.2 2004/01/08 11:30:52 benedikta Exp $
*/
public class ParserTask extends MatchingTask
{
private File srcDir = null;
private File destDir = null;
private File baseDir = null;
private File cacheDir = null;
private Mapper mapper = null;
/** for resolving entities such as dtds */
private XMLCatalog xmlCatalog = new XMLCatalog();
private File lexiconFile = null;
private File grammarFile = null;
private String parserFactory = null;
private SAXParserFactory parserFactoryImpl = null;
private String transformerFactory = null;
private SAXTransformerFactory transformerFactoryImpl = null;
private String encoding = "ISO-8859-1";
private boolean indent = false;
private boolean flatten = false;
private String inputtype = "text";
private int msgLevel = Project.MSG_ERR;
private AntLog log;
private ParserAutomaton parserautomaton = null;
private ParserProcessor parser = null;
private LexicalAutomaton lexicalautomaton = null;
private LexicalProcessor lexer = null;
/**
* Constructs the task
*/
public ParserTask() {}
/**
* Executes the task
*
* @throws BuildException
*/
public void execute() throws BuildException
{
if (baseDir==null)
baseDir = project.resolveFile(".");
if (lexiconFile==null)
throw new BuildException("No lexicon file is specified", location);
if (!lexiconFile.exists())
throw new BuildException("Lexicon file doesn't exists:"+lexiconFile.getAbsolutePath(),
location);
if (destDir==null)
throw new BuildException("No destdir specified!", location);
log = new AntLog(getProject(), msgLevel);
buildAutomata(lexiconFile, grammarFile);
DirectoryScanner scanner = getDirectoryScanner(srcDir);
FileNameMapper mapperImpl;
if (mapper==null)
mapperImpl = new IdentityMapper();
else
mapperImpl = mapper.getImplementation();
String[] list = scanner.getIncludedFiles();
for (int i = 0; i<list.length; i++)
{
String[] dest = mapperImpl.mapFileName(list[i]);
if (dest!=null)
for (int j = 0; j<dest.length; j++)
{
log("Transforming "+list[i]+" to "+dest[j], Project.MSG_DEBUG);
process(new File(srcDir, list[i]), new File(destDir, dest[j]));
}
}
}
/**
* Set the base directory.
*
* @param dir Base directory
*/
public void setBasedir(File dir)
{
baseDir = dir;
}
/**
* Set the source directory
*
* @param dir Source directory
*/
public void setSrcdir(File dir)
{
srcDir = dir;
}
/**
* Set the destination directory into which the result files should be copied to
*
* @param dir Destination directory
*/
public void setDestdir(File dir)
{
destDir = dir;
}
/**
* @param dir Directory for chaching objects
*/
public void setCachedir(File dir)
{
cacheDir = dir;
}
/**
* Creates a mapper.
*
* @return New mapper.
*
* @throws BuildException
*/
public Mapper createMapper() throws BuildException
{
if (mapper!=null)
throw new BuildException("Cannot define more than one mapper", location);
mapper = new Mapper(project);
return mapper;
}
/**
* Set the lexicon, which should be used.
*
* @param lexiconFile Lexicon file.
*/
public void setLexicon(File lexiconFile)
{
this.lexiconFile = lexiconFile;
}
/**
* Set the grammar, which should be used.
*
* @param grammarFile Grammar file.
*/
public void setGrammar(File grammarFile)
{
this.grammarFile = grammarFile;
}
/**
* Sets the message level.
*
* @param msgLevel Message level.
*/
public void setMsglevel(String msgLevel)
{
if (msgLevel.equalsIgnoreCase("debug"))
this.msgLevel = Project.MSG_DEBUG;
else if (msgLevel.equalsIgnoreCase("verbose"))
this.msgLevel = Project.MSG_VERBOSE;
else if (msgLevel.equalsIgnoreCase("info"))
this.msgLevel = Project.MSG_INFO;
else if (msgLevel.equalsIgnoreCase("warn"))
this.msgLevel = Project.MSG_WARN;
else if (msgLevel.equalsIgnoreCase("error"))
this.msgLevel = Project.MSG_ERR;
}
/**
* Sets the encoding for the input file
*
* @param encoding Encoding of the document
*/
public void setEncoding(String encoding)
{
this.encoding = encoding;
}
/**
* Set if the output document should be indented
*
* @param indent If the output should be indented
*/
public void setIndent(boolean indent)
{
this.indent = indent;
}
/**
* Reduces the deep of the produced hirachy by flatten nested element with same name.
*
* @param flatten If the hirache should be reduced.
*/
public void setFlatten(boolean flatten)
{
this.flatten = flatten;
}
/**
* If the input document is a XML or a text document.
*
* @param inputtype Type of the input document.
*/
public void setInputtype(String inputtype)
{
this.inputtype = inputtype;
}
/**
* Name of the parser factory.
*
* @param parserFactory Name of the parser factory.
*/
public void setParser(String parserFactory)
{
this.parserFactory = parserFactory;
}
/**
* Name of the transformer factory.
*
* @param transformerFactory Name of the transformer factory.
*/
public void setTransformer(String transformerFactory)
{
this.transformerFactory = transformerFactory;
}
/**
* Add the catalog to our internal catalog
*
* @param xmlCatalog the XMLCatalog instance to use to look up DTDs
*/
public void addConfiguredXMLCatalog(XMLCatalog xmlCatalog)
{
this.xmlCatalog.addConfiguredXMLCatalog(xmlCatalog);
}
/**
* Initialize internal instance of XMLCatalog
*/
public void init() throws BuildException
{
super.init();
xmlCatalog.setProject(project);
}
/**
* Processes the given input XML file and stores the result in the given resultFile.
*
* @param inFile The text file, which should parsed
* @param outFile The output file
*
* @throws BuildException
*/
private void process(File inFile, File outFile) throws BuildException
{
try
{
if (!inFile.exists())
throw new BuildException("File "+inFile+" doesn't exists", location);
if (inFile.lastModified()>outFile.lastModified())
{
ensureDirectoryFor(outFile);
log("Parsing file "+inFile+" to "+outFile, Project.MSG_INFO);
Properties format = new Properties();
format.put(OutputKeys.ENCODING, encoding);
if (indent)
format.put(OutputKeys.INDENT, "yes");
format.put(OutputKeys.METHOD, "xml");
SAXTransformerFactory factory = getTransformerFactory();
TransformerHandler serializer = factory.newTransformerHandler();
serializer.getTransformer().setOutputProperties(format);
serializer.setResult(new StreamResult(outFile));
if (this.parserautomaton!=null) // && (this.parser==null))
{
this.parser = new ParserProcessor();
this.parser.setLog(log);
this.parser.setFlatten(this.flatten);
this.parser.setParserAutomaton(this.parserautomaton);
this.parser.setContentHandler(serializer);
}
this.lexer = new LexicalProcessor();
this.lexer.setLog(log);
this.lexer.setLexicalAutomaton(this.lexicalautomaton);
if (this.parserautomaton!=null)
this.lexer.setContentHandler(this.parser);
else
this.lexer.setContentHandler(serializer);
if (!inputtype.equalsIgnoreCase("xml"))
pushTextFile(inFile);
else
pushXMLFile(inFile);
}
}
catch (Exception ex)
{
if (outFile!=null)
outFile.delete();
if (ex instanceof BuildException)
throw (BuildException)ex;
throw new BuildException("Failed to process "+inFile+" : "+ex.getMessage(), ex);
}
}
/**
* Build the automata for the lexicon and grammar.
*
* @param lexiconFile Lexicon file.
* @param grammarFile Grammar file.
*
* @throws BuildException
*/
private void buildAutomata(File lexiconFile, File grammarFile)
throws BuildException
{
if ((cacheDir!=null) && (!cacheDir.exists()))
throw new BuildException("Cache directory "+cacheDir+" doesn't exist");
try
{
// Lexicon
String filename = lexiconFile.getName();
File cacheFile = null;
if (cacheDir!=null)
cacheFile = new File(cacheDir, filename+".obj");
if ((cacheFile!=null) && (cacheFile.exists()) &&
(cacheFile.lastModified()>lexiconFile.lastModified()))
{
log("Reading lexicon from cache "+cacheFile, Project.MSG_DEBUG);
ObjectInputStream in = new ObjectInputStream(new FileInputStream(cacheFile));
this.lexicalautomaton = (LexicalAutomaton)in.readObject();
in.close();
}
else
{
log("Building lexicon from "+lexiconFile, Project.MSG_INFO);
SAXParserFactory factory = getParserFactory();
factory.setNamespaceAware(true);
XMLReader parser = factory.newSAXParser().getXMLReader();
parser.setEntityResolver(xmlCatalog);
LexiconFactory lexiconfactory = new LexiconFactory();
parser.setContentHandler(lexiconfactory);
try
{
parser.parse(lexiconFile.toString());
}
catch (SAXParseException se)
{
throw new BuildException("Couldn't parse file "+lexiconFile, se);
}
Lexicon lexicon = lexiconfactory.getLexicon();
this.lexicalautomaton = (new LexicalAutomatonBuilder(lexicon, log)).getLexicalAutomaton();
if (cacheFile!=null)
{
ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(cacheFile));
out.writeObject(this.lexicalautomaton);
out.flush();
out.close();
}
}
if (grammarFile!=null)
{
// Grammar
filename = grammarFile.getName();
cacheFile = null;
if (cacheDir!=null)
cacheFile = new File(cacheDir, filename+".obj");
if ((cacheFile!=null) && (cacheFile.exists()) &&
(cacheFile.lastModified()>grammarFile.lastModified()))
{
log("Reading grammar from cache "+cacheFile, Project.MSG_DEBUG);
ObjectInputStream in = new ObjectInputStream(new FileInputStream(cacheFile));
this.parserautomaton = (ParserAutomaton)in.readObject();
in.close();
}
else
{
log("Building grammar from "+grammarFile, Project.MSG_INFO);
SAXParserFactory factory = getParserFactory();
factory.setNamespaceAware(true);
XMLReader parser = factory.newSAXParser().getXMLReader();
parser.setEntityResolver(xmlCatalog);
GrammarFactory grammarfactory = new GrammarFactory();
parser.setContentHandler(grammarfactory);
try
{
parser.parse(grammarFile.toString());
}
catch (SAXParseException se)
{
throw new BuildException("Couldn't parse file "+lexiconFile, se);
}
Grammar grammar = grammarfactory.getGrammar();
this.parserautomaton = (new ParserAutomatonBuilder(grammar, log)).getParserAutomaton();
if (cacheFile!=null)
{
ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(cacheFile));
out.writeObject(this.parserautomaton);
out.flush();
out.close();
}
}
}
}
catch (Exception ex)
{
if (ex instanceof BuildException)
throw (BuildException)ex;
throw new BuildException(ex);
}
}
private void pushTextFile(File inFile) throws Exception
{
try
{
LocatorImpl locator = new LocatorImpl();
locator.setSystemId(inFile.toURL().toString());
locator.setLineNumber(1);
locator.setColumnNumber(1);
this.lexer.setDocumentLocator(locator);
this.lexer.startDocument();
this.lexer.startElement("http://chaperon.sourceforge.net/schema/text/1.0", "text", "text",
new AttributesImpl());
LineNumberReader reader =
new LineNumberReader(new InputStreamReader(new FileInputStream(inFile)));
String line;
String newline = null;
String separator = System.getProperty("line.separator");
while (true)
{
if (newline==null)
line = reader.readLine();
else
line = newline;
if (line==null)
break;
newline = reader.readLine();
line = (newline!=null) ? (line+separator) : line;
locator.setLineNumber(reader.getLineNumber());
locator.setColumnNumber(1);
this.lexer.characters(line.toCharArray(), 0, line.length());
if (newline==null)
break;
}
reader.close();
this.lexer.endElement("http://chaperon.sourceforge.net/schema/text/1.0", "text", "text");
this.lexer.endDocument();
}
catch (SAXParseException se)
{
throw new BuildException("Exception occurs during parsing file "+inFile+" at line "+
se.getLineNumber()+" column "+se.getColumnNumber(), se);
}
}
private void pushXMLFile(File inFile) throws Exception
{
SAXParserFactory parserfactory = getParserFactory();
parserfactory.setNamespaceAware(true);
XMLReader parser = parserfactory.newSAXParser().getXMLReader();
parser.setEntityResolver(xmlCatalog);
parser.setContentHandler(this.lexer);
try
{
parser.parse(inFile.toString());
}
catch (SAXParseException se)
{
throw new BuildException("Exception occurs during parsing file "+inFile+" at line "+
se.getLineNumber()+" column "+se.getColumnNumber(), se);
}
}
/**
* Ensures the directory for the output
*
* @param targetFile The directory
*
* @throws BuildException
*/
private void ensureDirectoryFor(File targetFile) throws BuildException
{
File directory = new File(targetFile.getParent());
if ((!directory.exists()) && (!directory.mkdirs()))
throw new BuildException("Unable to create directory: "+directory.getAbsolutePath());
}
private SAXParserFactory getParserFactory() throws BuildException
{
if (parserFactoryImpl==null)
{
try
{
if (parserFactory==null)
parserFactoryImpl = SAXParserFactory.newInstance();
else
parserFactoryImpl = (SAXParserFactory)Class.forName(parserFactory).newInstance();
}
catch (Exception e)
{
throw new BuildException("Could not load parser factory", e);
}
}
return parserFactoryImpl;
}
private SAXTransformerFactory getTransformerFactory()
throws BuildException
{
if (transformerFactoryImpl==null)
{
try
{
if (transformerFactory==null)
transformerFactoryImpl = (SAXTransformerFactory)SAXTransformerFactory.newInstance();
else
transformerFactoryImpl =
(SAXTransformerFactory)Class.forName(transformerFactory).newInstance();
}
catch (Exception e)
{
throw new BuildException("Could not load transformer factory", e);
}
}
return transformerFactoryImpl;
}
}