package au.net.causal.maven.plugins.html2pdf;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import org.apache.maven.model.FileSet;
import org.apache.maven.plugin.AbstractMojo;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugin.MojoFailureException;
import org.apache.maven.plugins.annotations.Mojo;
import org.apache.maven.plugins.annotations.Parameter;
import org.ccil.cowan.tagsoup.jaxp.SAXFactoryImpl;
import org.codehaus.plexus.util.FileUtils;
import org.codehaus.plexus.util.IOUtil;
import org.codehaus.plexus.util.StringUtils;
import org.w3c.dom.Document;
import org.xhtmlrenderer.extend.UserAgentCallback;
import org.xhtmlrenderer.pdf.ITextRenderer;
import org.xhtmlrenderer.resource.ImageResource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import com.lowagie.text.DocumentException;
/**
* Converts HTML and XHTML documentation into PDF format.
*
* @author prunge
*/
@Mojo(name="html2pdf")
public class Html2PdfMojo extends AbstractMojo
{
/**
* The PDF file to generate.
*/
@Parameter(property="html2pdf.outputFile", required=true)
private File outputFile;
/**
* The filesets to use for processing HTML files. Use either this or {@link #htmlFiles}.
*/
@Parameter
private List<FileSet> htmlFileSets;
/**
* A list of HTML files to convert to PDF form. Use either this or {@link #htmlFileSets}.
*/
@Parameter
private List<File> htmlFiles;
/**
* Input type to determine if file needs to be converted into XHTML first. Defaults to automatic mode.
*/
@Parameter(property="html2pdf.inputType", defaultValue="AUTO")
private InputType inputType = InputType.AUTO;
@Parameter(property="html2pdf.failOnMissingImages", defaultValue="true")
private boolean failOnMissingImages;
/**
* PDF metadata: document title. If not specified, the title is read from the first HTML file.
*/
@Parameter
private String title;
/**
* PDF metadata: author.
*/
@Parameter
private String author;
/**
* PDF metadata: subject.
*/
@Parameter
private String subject;
/**
* PDF metadata: keywords.
*/
@Parameter
private String keywords;
/**
* Whether file names in file sets are sorted case sensitively. Defaults to true.
*/
@Parameter(defaultValue="true")
private boolean caseSensitiveFileNameSort;
/**
* If true, file names in file sets are sorted including their path. If false, only the file name is used for sorting. Defaults to true.
*/
@Parameter(defaultValue="true")
private boolean sortFileSetsWithPath;
private List<File> toFileList(List<FileSet> fileSets)
throws IOException
{
List<File> fileList = new ArrayList<File>();
for (FileSet fileSet : fileSets)
{
buildFileList(fileSet, fileList);
}
getLog().debug("File list: " + fileList);
return(fileList);
}
private void buildFileList(FileSet fileSet, List<? super File> fileList)
throws IOException
{
File directory = new File(fileSet.getDirectory());
String includes = listToString(fileSet.getIncludes());
String excludes = listToString(fileSet.getExcludes());
List<File> curFileList = FileUtils.getFiles(directory, includes, excludes);
getLog().debug("curFileList before sort: " + curFileList);
//Sort according to configuration
Collections.sort(curFileList, createFileComparator());
getLog().debug("curFileList after sort: " + curFileList);
fileList.addAll(curFileList);
}
/**
* Creates a comparator for sorting files in a file set. Plugin configuration is taken into account when creating the comparator.
*
* @return the created comparator.
*/
protected Comparator<File> createFileComparator()
{
if (caseSensitiveFileNameSort)
{
if (sortFileSetsWithPath)
return(new FilePathCaseSensitiveComparator());
else
return(new FileNameOnlyCaseSensitiveComparator());
}
else
{
if (sortFileSetsWithPath)
return(new FilePathCaseInsensitiveComparator());
else
return(new FileNameOnlyCaseInsensitiveComparator());
}
}
private static String listToString(List<String> strings)
{
return(StringUtils.join(strings.iterator(), ","));
}
/**
* Reads the input HTML file as a DOM document with XHTML input, performing conversion if necessary.
*
* @param htmlFile the HTML file to read.
*
* @return a DOM document containing XHTML data.
*
* @throws MojoExecutionException if an error occurs.
*/
protected Document readInputFileAsDocument(File htmlFile)
throws MojoExecutionException
{
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
try
{
DocumentBuilder db = dbf.newDocumentBuilder();
db.setEntityResolver(new NullEntityResolver());
boolean needsTidy;
Document xhtmlDocument = null;
if (inputType == InputType.AUTO || inputType == InputType.XHTML)
{
try
{
//Do this so we don't get Xerces's default action which prints to the console
db.setErrorHandler(new DefaultHandler());
xhtmlDocument = db.parse(htmlFile);
needsTidy = false;
}
catch (SAXException e)
{
if (inputType == InputType.XHTML)
throw new MojoExecutionException("Error parsing XHTML in " + htmlFile.getPath() + ": " + e, e);
//Failed to parse, must be HTML
needsTidy = true;
}
finally
{
//Reset error handler to default
db.setErrorHandler(null);
}
}
else if (inputType == InputType.XHTML)
needsTidy = false;
else
needsTidy = true;
if (needsTidy)
{
SAXFactoryImpl tagSoupSaxFactory = new SAXFactoryImpl();
tagSoupSaxFactory.setNamespaceAware(true);
SAXParser tagSoupParser = tagSoupSaxFactory.newSAXParser();
TransformerFactory tf = TransformerFactory.newInstance();
if (!(tf instanceof SAXTransformerFactory))
throw new MojoExecutionException("Require a SAX transformer factory for HTML to XHTML conversion but could not get one from " + tf.getClass().getCanonicalName());
TransformerHandler th = ((SAXTransformerFactory)tf).newTransformerHandler();
xhtmlDocument = dbf.newDocumentBuilder().newDocument();
th.setResult(new DOMResult(xhtmlDocument));
try
{
tagSoupParser.parse(htmlFile, new DefaultContentHandler(th));
}
catch (SAXException e)
{
throw new MojoExecutionException("Failed to parse HTML in " + htmlFile.getPath() + ": " + e.getMessage(), e);
}
}
return(xhtmlDocument);
}
catch (ParserConfigurationException e)
{
throw new MojoExecutionException(e.getMessage(), e);
}
catch (TransformerConfigurationException e)
{
throw new MojoExecutionException(e.getMessage(), e);
}
catch (IOException e)
{
throw new MojoExecutionException("I/O error reading file " + htmlFile.getPath() + ": " + e.getMessage(), e);
}
}
@Override
public void execute() throws MojoExecutionException, MojoFailureException
{
if (!outputFile.getParentFile().exists())
outputFile.getParentFile().mkdirs();
if ((htmlFileSets == null || htmlFileSets.isEmpty()) && (htmlFiles == null || htmlFiles.isEmpty()))
throw new MojoExecutionException("htmlFileSets/htmlFiles not specified. Ensure htmlFileSets or htmlFiles is configured for the html2pdf plugin.");
if (htmlFileSets != null && !htmlFileSets.isEmpty() && htmlFiles != null && !htmlFiles.isEmpty())
throw new MojoExecutionException("Both htmlFileSets and htmlFiles were specified. Ensure htmlFileSets or htmlFiles (not both) is configured for the html2pdf plugin.");
getLog().info("Generating PDF: " + outputFile.getAbsolutePath());
try
{
OutputStream os = new FileOutputStream(outputFile);
try
{
ITextRenderer renderer = new ITextRenderer();
MissingResourceLoggingUserAgent uac = new MissingResourceLoggingUserAgent(renderer.getSharedContext().getUserAgentCallback());
renderer.getSharedContext().setUserAgentCallback(uac);
if (title != null)
renderer.getOutputDevice().setMetadata("title", title);
if (author != null)
renderer.getOutputDevice().setMetadata("author", author);
if (subject != null)
renderer.getOutputDevice().setMetadata("subject", subject);
if (keywords != null)
renderer.getOutputDevice().setMetadata("keywords", keywords);
if (htmlFiles == null || htmlFiles.isEmpty())
htmlFiles = toFileList(htmlFileSets);
if (htmlFiles.isEmpty())
throw new MojoExecutionException("No HTML files found");
boolean initial = true;
for (File htmlFile : htmlFiles)
{
getLog().info("Converting HTML " + htmlFile.getAbsolutePath());
Document doc = readInputFileAsDocument(htmlFile);
renderer.setDocument(doc, htmlFile.toURI().toString());
renderer.layout();
if (initial)
{
renderer.createPDF(os, false);
initial = false;
}
else
renderer.writeNextDocument();
}
renderer.finishPDF();
for (RelativeUri missingImageUri : uac.getMissingImageUris())
{
getLog().warn("Missing image: " + missingImageUri.getUri() + " (from " + missingImageUri.getBase() + ")");
}
if (failOnMissingImages && !uac.getMissingImageUris().isEmpty())
throw new MojoFailureException("Missing image files detected.");
}
finally
{
IOUtil.close(os);
}
}
catch (DocumentException e)
{
throw new MojoExecutionException("Error generating PDF: " + e, e);
}
catch (IOException e)
{
throw new MojoExecutionException(e.getMessage(), e);
}
}
private static class MissingResourceLoggingUserAgent extends FilteredUserAgent
{
private final Set<RelativeUri> missingImageUris = new TreeSet<RelativeUri>();
public MissingResourceLoggingUserAgent(UserAgentCallback uac)
{
super(uac);
}
@Override
public ImageResource getImageResource(String uri)
{
ImageResource img = super.getImageResource(uri);
if (img == null || img.getImage() == null)
missingImageUris.add(new RelativeUri(uri, getBaseURL()));
return(img);
}
public Set<? extends RelativeUri> getMissingImageUris()
{
return(Collections.unmodifiableSet(missingImageUris));
}
}
private static class FileNameOnlyCaseSensitiveComparator implements Comparator<File>
{
@Override
public int compare(File o1, File o2)
{
return(o1.getName().compareTo(o2.getName()));
}
}
private static class FileNameOnlyCaseInsensitiveComparator implements Comparator<File>
{
@Override
public int compare(File o1, File o2)
{
return(o1.getName().compareToIgnoreCase(o2.getName()));
}
}
private static class FilePathCaseSensitiveComparator implements Comparator<File>
{
@Override
public int compare(File o1, File o2)
{
return(o1.getPath().compareTo(o2.getPath()));
}
}
private static class FilePathCaseInsensitiveComparator implements Comparator<File>
{
@Override
public int compare(File o1, File o2)
{
return(o1.getPath().compareToIgnoreCase(o2.getPath()));
}
}
}