Source Code of html.BulkConvert

/*
 *    uDig - User Friendly Desktop Internet GIS client
 *    http://udig.refractions.net
 *    (C) 2012, Refractions Research Inc.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * (http://www.eclipse.org/legal/epl-v10.html), and the Refractions BSD
 * License v1.0 (http://udig.refractions.net/files/bsd3-v10.html).
 */
package html;


import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


import javax.swing.JFileChooser;


public class BulkConvert {
    /**
     * Used to shortlist index.html file
     */
    private static final class IndexFileFilter extends javax.swing.filechooser.FileFilter {
        public String getDescription() {
            return "index.html";
        }


        @Override
        public boolean accept(File f) {
            return f.getName().equals("index.html");
        }
    }


    /**
     * Used to list html files for conversion.
     */
    private final class HtmlFileFilter implements FileFilter {
        public boolean accept(File file) {
            return file.getName().endsWith(".html");
        }
    }


    public static void main(String args[]) {
        if (args.length == 1 && "?".equals(args[0])) {
            System.out.println(" Usage: java html.BulkConvert [index.html] [rst directory]");
            System.out.println();
            System.out.println("Where:");
            System.out.println("  index.html Where you have unzipped the confluence html export");
            System.out
                    .println("  rst directory location where you would like the html files saved");
            System.out.println();
            System.out
                    .println("If not provided the appication will prompt you for the above information");


            System.exit(0);
        }
        File indexFile = args.length > 0 ? new File(args[0]) : null;
        File rstDir = args.length > 1 ? new File(args[1]) : null;


        if (indexFile != null && indexFile.isDirectory()) {
            indexFile = new File(indexFile, "index.html");
        }


        if (indexFile == null || !indexFile.exists()) {
            File cd = new File(".");


            JFileChooser dialog = new JFileChooser(cd);
            dialog.setFileFilter(new IndexFileFilter());
            dialog.setDialogTitle("Locate Confluence wiki html export");
            int open = dialog.showDialog(null, "Convert");


            if (open == JFileChooser.CANCEL_OPTION) {
                System.out.println("Conversion canceled");
                System.exit(-1);
            }
            indexFile = dialog.getSelectedFile();
        }
        if (indexFile == null || !indexFile.exists() || indexFile.isDirectory()) {
            System.out.println("File index.html to use for conversion not provided: '" + indexFile
                    + "'");
            System.exit(-1);
        }
        File htmlDirectory = null;
        try {
            htmlDirectory = indexFile.getParentFile().getCanonicalFile();
        } catch (IOException eek) {
            System.out.println("Coudl not sort parent of " + indexFile + ":" + eek);
            System.exit(-1);
        }


        if (rstDir == null) {
            JFileChooser dialog = new JFileChooser(htmlDirectory.getParentFile());


            dialog.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
            dialog.setDialogTitle("Target Directory for textile files");


            int open = dialog.showDialog(null, "Export");


            if (open == JFileChooser.CANCEL_OPTION) {
                System.out.println("Canceled canceled");
                System.exit(-1);
            }
            rstDir = dialog.getSelectedFile();
        }
        if (rstDir == null || !rstDir.isDirectory() || !rstDir.exists()) {
            System.out.println("Taget directory for textile files not provided: '" + rstDir + "'");
            System.exit(-1);
        }


        BulkConvert bulk = new BulkConvert(htmlDirectory, rstDir);
        bulk.convert();
    }


    private void convert() {


        File[] htmlFileList = htmlDirectory.listFiles(new HtmlFileFilter());
        for (File htmlFile : htmlFileList) {
            // File dir = htmlFile.getParentFile();


            String htmlName = htmlFile.getName();
            if( htmlName.equals("index.html")){
                File tocFile = new File( rstDirectory.getParentFile(), "toc.xml");
                boolean deleted = tocFile.delete();
                if (!deleted) {
                    System.out.println("\tCould not remove '" + tocFile
                            + "' do you have it open in an editor?");
                    System.out.println("\tSkipping ...");
                    continue;
                }
                bufferedStreamsCopy( htmlFile, tocFile );
                continue; // just a straight copy
            }
            String name = htmlName.substring(0, htmlName.lastIndexOf('.'));
            name = fixPageReference(name);
            
            String rstName = name + ".rst";
            if( htmlName.equals("Home.html") ){
                rstName = "index.rst";
            }
            
            File rstFile = new File(rstDirectory, rstName);
            System.out.println(htmlFile + " to " + rstFile.getName());
            if (rstFile.exists()) {
                boolean deleted = rstFile.delete();
                if (!deleted) {
                    System.out.println("\tCould not remove '" + rstFile
                            + "' do you have it open in an editor?");
                    System.out.println("\tSkipping ...");
                    continue;
                }
            }
            boolean success = pandoc(htmlFile, rstFile);
            if (!success) {
                break;
            }
            fixRstAndMoveImages(rstFile);
        }
    }


    private File htmlDirectory;


    private File rstDirectory;


    private BulkConvert(File htmlDirectory, File rstDir) {
        this.htmlDirectory = htmlDirectory;
        this.rstDirectory = rstDir;
    }


    String baseName(File file) {
        String name = file.getName();
        int split = name.lastIndexOf('.');
        if (split == -1) {
            return name;
        }
        String page = name.substring(0, split);
        page = fixPageReference(page);
        page = page.toLowerCase();


        return page;
    }


    String baseTitle(File file) {
        String name = file.getName();
        int split = name.lastIndexOf('.');
        if (split == -1) {
            return name;
        }
        String page = name.substring(0, split);
        
        String title = toTitleCase( page.replace('_',' ') );
        return title;
    }


    String toTitleCase(String heading) {
        StringBuilder title = new StringBuilder();
        boolean next = true;


        for (char c : heading.toCharArray()) {
            if (Character.isSpaceChar(c)) {
                next = true;
            } else if (next) {
                c = Character.toTitleCase(c);
                next = false;
            }
            title.append(c);
        }
        return title.toString();
    }
    /**
     * Process the generated rstFile and perform a few fixes.
     * 
     * @param rstFile
     */
    private boolean fixRstAndMoveImages(File rstFile) {
        File imageDir = new File(rstFile.getParent(), "images");
        String page = baseName(rstFile);
        String title = baseTitle(rstFile);


        // Fix image location
        // BEFORE: .. |image0| image:: download/attachments/3536/view.gif
        //  AFTER: .. |image0| image:: /image/active_part/view.gif
        //
        // Use http://www.regexplanet.com/advanced/java/index.html to work out
        //
        // pattern: .. \|image(\d)\| image:: (download/attachments/.+?)/(.+)
        // replace: .. |image$1| image:: /page/$3
        Pattern imagePattern = Pattern
                .compile(".. \\|image(\\d+)\\| image:: (download/attachments/.+?)/(.+)");
        String imageReplace = ".. |image$1| image:: /images/" + page + "/$3";


        // BEFORE: .. |image10| image:: download/attachments/4523/delete_feature_mode.gif
        //  AFTER: .. |image10| image:: /image/edit_tools/delete_feature_mode.gif
        
        Pattern figurePattern = Pattern.compile(".. figure:: (download/attachments/.+?)/(.+)");
        String figureReplace = ".. figure:: /images/" + page + "/$2";


        // Fix Related cross references
        // .. figure:: http://udig.refractions.net/image/EN/ngrelc.gif
        Map<Pattern, String> related = new HashMap<Pattern, String>();
        related.put(Pattern.compile(".. figure:: http://udig.refractions.net/image/EN/ngrelt.gif"),
                "**Related tasks**");
        related.put(Pattern.compile(".. figure:: http://udig.refractions.net/image/EN/ngrelr.gif"),
                "**Related reference**");
        related.put(Pattern.compile(".. figure:: http://udig.refractions.net/image/EN/ngrelc.gif"),
                "**Related concepts**");


        Pattern linkPattern = Pattern.compile("(\\s*)-\\s*`(.*) <(.*)>`_");
        String linkReplace = "$1* :doc:`$2`";
        
        Pattern strightLinkPattern = Pattern.compile("(\\s*)`(.*) <(.*)>`_");
        String straightLinkReplace = "$1:doc:`$2`";


        Pattern heading = Pattern.compile("(=|-|~|\\^|\\')+");
        final String LEVEL = "=-~^'";


        StringBuilder contents = new StringBuilder();
        BufferedReader reader = null;
        String line;


        int pageLevel = -1; // no heading encountered yet
        boolean includesTitle = false;
        boolean lineFeedNeeded = false;


        String previousLine = null;
        try {
            reader = new BufferedReader(new FileReader(rstFile));


            while ((line = reader.readLine()) != null) {
                if( line.contains("delete_feature_mode.gif")){
                    // breakpoint for debugging
                    System.out.println("Check delete_feature_mode.gif ");
                }
                // check headings
                if (heading.matcher(line).matches()) {
                    int level = LEVEL.indexOf(line.charAt(1));
                    if (pageLevel == -1) {
                        pageLevel = level; // you are the first heading
                        if (previousLine.equalsIgnoreCase( title ) ) {
                            // title already emitted!
                            includesTitle = true;
                        }
                    }
                }
                // Fix Related References
                for (Entry<Pattern, String> entry : related.entrySet()) {
                    Matcher matcher = entry.getKey().matcher(line);
                    if (matcher.matches()) {
                        line = matcher.replaceAll(entry.getValue());
                        reader.readLine(); // skip :align: center
                        reader.readLine(); // skip :alt:


                        lineFeedNeeded = true;
                    }
                }


                // Fix document links
                Matcher linkMatcher = linkPattern.matcher(line);
                if (linkMatcher.matches()) {
                    String indent = linkMatcher.group(1);
                    String pageRef = linkMatcher.group(2);
                    String htmlRef = linkMatcher.group(3);
                    // BEFORE: - `some page <some%20page.html>`_
                    // AFTER: * :doc:`some page`
                    // "$1* :doc:`$2`"
                    // line = linkMatcher.replaceAll(linkReplace);
                    String fixed_page_ref = fixPageReference(pageRef);
                    if( htmlRef.startsWith("http")){
                        // external link `
                        // $1* `$2 <$3>`_
                        line = indent+"* `"+pageRef+" <"+htmlRef+">`_";
                    }
                    else {
                        line = indent+"* :doc:`"+fixed_page_ref+"`";
                        lineFeedNeeded = true;
                    }
                }
                Matcher straightLinkMatcher = strightLinkPattern.matcher(line);
                if (straightLinkMatcher.matches()) {
                    String indent = straightLinkMatcher.group(1);
                    String pageRef = straightLinkMatcher.group(2);
                    String htmlRef = straightLinkMatcher.group(3);
                    // BEFORE: `some page <some%20page.html>`_
                    // AFTER: :doc:`some page`
                    // line = straightLinkMatcher.replaceAll(straightLinkReplace);
                    // lineFeedNeeded = true;
                    String fixed_page_ref = fixPageReference(pageRef);
                    if( htmlRef.startsWith("http")){
                        // external link `
                        // $1`$2 <$3>`_
                        line = indent+"`"+pageRef+" <"+htmlRef+">`_";
                    }
                    else {
                        line = indent+":doc:`"+fixed_page_ref+"`";
                        lineFeedNeeded = true;
                    }
                }
                // check for image references
                //
                Matcher imageMatcher = imagePattern.matcher(line);
                if (imageMatcher.matches()) {
                    String image = imageMatcher.group(1);
                    String path = imageMatcher.group(2);
                    String file = imageMatcher.group(3);


                    File attachementImage = new File(new File(htmlDirectory, path), file);
                    File pageDir = new File(imageDir, page);
                    File pageImage = new File(pageDir, file);


                    duplicateImage(attachementImage, pageImage);
                    boolean moved = pageImage.exists();
                    if (moved) {
                        line = imageMatcher.replaceAll(imageReplace);
                    }
                }
                // check for figure references
                //
                Matcher figureMatcher = figurePattern.matcher(line);
                if (figureMatcher.matches()) {
                    String path = figureMatcher.group(1);
                    String file = figureMatcher.group(2);


                    File attachementImage =new File( new File(htmlDirectory,path), file);
                    File pageDir = new File(imageDir, page);
                    File pageImage = new File(pageDir, file);


                    duplicateImage(attachementImage, pageImage);
                    boolean moved = pageImage.exists();
                    if (moved) {
                        line = figureMatcher.replaceAll(figureReplace);
                    }
                }
                contents.append(line);
                contents.append("\n");
                if (lineFeedNeeded) {
                    contents.append("\n");
                    lineFeedNeeded = false;
                }
                previousLine = line; // remember for next time in case this is a heading!
            }
        } catch (FileNotFoundException e) {
            System.out.println("Unable to read '" + rstFile + "':" + e);
            return false;
        } catch (IOException e) {
            System.out.println("Trouble reading '" + rstFile + "':" + e);
            return false;
        } finally {
            close(reader);
        }
        // prepend title if needed
        if (!includesTitle && title != null && !title.isEmpty()) {
            char chars[] = new char[title.length()];
            Arrays.fill(chars, '#');
            String underline = new String(chars);
            contents.insert(0, "\n");
            contents.insert(0, "\n");
            contents.insert(0, underline);
            contents.insert(0, "\n");
            contents.insert(0, title);
        }


        String text = contents.toString();
        boolean deleted = rstFile.delete();
        if (deleted) {
            // write out modified copy
            try {
                OutputStream modifiedCopy = new BufferedOutputStream(new FileOutputStream(rstFile));


                InputStream textSteram = new ByteArrayInputStream(text.getBytes(Charset
                        .defaultCharset()));
                bufferedStreamsCopy(textSteram, modifiedCopy);
            } catch (IOException eek) {
                System.out.println("Trouble writing modified '" + rstFile + "':" + eek);
                return false;
            }
        }
        return true;
    }
    /** Frank has asked that pages be all lowercase and not contain any spaces */
    private String fixPageReference(String pageRef) {
        pageRef = pageRef.replace(' ','_');
        pageRef = pageRef.toLowerCase();
        
        return pageRef;
    }


    private void duplicateImage(File attachementImage, File pageImage) {
        String fileName = attachementImage.getName();
        
        File liveImage = searchPluginImages(fileName);
        if( liveImage != null ){
            System.out.println("   Override attachment image from : "+ liveImage );
            attachementImage = liveImage;
        }
        else if (!attachementImage.exists()) {
            File otherAttachment = searchAllAttachments(fileName);
            if( otherAttachment != null ){
                System.out.println("   Found attachment on another page " + otherAttachment );
                attachementImage = otherAttachment; // okay we found it on another page
            }
            else {
                System.out.println("   WARNING: Unable to locate " + attachementImage + " broken link!");                
            }
        }


        File pageDir = pageImage.getParentFile();
        if (!pageDir.exists()) {
            pageDir.mkdirs();
        }
        if (!pageImage.exists()) {
            System.out.println("   Copy image to  " + pageImage);
            bufferedStreamsCopy(attachementImage, pageImage);
        } else {
            boolean deleted = pageImage.delete();
            if (deleted) {
                System.out.println("   Copy image to  " + pageImage);
                bufferedStreamsCopy(attachementImage, pageImage);
            } else {
                System.out.println("   WARNING: Unable to repalce " + pageImage);
            }
        }
    }


    /** Search for the provided filename */
    private File searchAllAttachments(String fileName) {
        // search downloads
        File attachments = new File( new File(htmlDirectory, "download"),"attachments");
        if (attachments.exists() && attachments.isDirectory()) {
            File file = searchDirectory(attachments, fileName);
            if (file != null) {
                return file; // found it!
            }
        }
        return null; // not found!
    }


    private File searchPluginImages(String fileName) {
        // search for live icons first!
        File checkout = rstDirectory.getParentFile().getParentFile().getParentFile();
        File plugins = new File(checkout, "plugins");
        if( plugins.exists() && plugins.isDirectory() ){
            for (File plugin : plugins.listFiles()) {
                File icons = new File(plugin, "icons");
                if (icons.exists() && icons.isDirectory()) {
                    File file = searchDirectory(icons, fileName);
                    if (file != null) {
                        return file; // found it!
                    }
                }
            }
        }
        return null; // not found
    }


    private File searchDirectory(File dir, String fileName) {
        for( File file : dir.listFiles() ){
            if( file.isDirectory() ){
                File found = searchDirectory(file, fileName);
                if( found != null ){
                    return found; // found!
                }
            }
            if( fileName.equals( file.getName() ) ){
                return file;
            }
        }
        return null; // not found!
    }


    private boolean pandoc(File htmlFile, File rstFile) {


        String rstFilePath = rstFile.getAbsolutePath().toString();
        String htmlFilePath = htmlFile.getAbsolutePath().toString();


        System.out.println("/usr/local/bin/pandoc --columns=100 -o \"" + rstFilePath + "\" \""
                + htmlFilePath + "\"");
        String run[] = new String[] { "/usr/local/bin/pandoc", "--columns=100", "-o", rstFilePath,
                htmlFilePath };
        try {
            Process process = Runtime.getRuntime().exec(run, null, htmlFile.getParentFile());
            int exit = process.waitFor();
            System.out.println("\tGenerated " + rstFile.getName() + " with exist code " + exit);
            return exit == 0;
        } catch (InterruptedException e) {
            System.out.println("\\tFailed on " + rstFile.getName() + " with: " + e);
            return false;
        } catch (IOException e) {
            System.out.println("\\tFailed on " + rstFile.getName() + " with: " + e);
            e.printStackTrace();
            return false;
        }
    }


    private boolean html2res(File htmlFile, File rstFile) {
        String run[] = new String[] { "/usr/local/bin/html2rest", htmlFile.getName() };


        BufferedInputStream inputStream = null;
        BufferedOutputStream outputStream = null;
        try {
            Process process = Runtime.getRuntime().exec(run, null, htmlFile.getParentFile());
            // inputStream = new BufferedInputStream(new FileInputStream(htmlFile));
            // outputStream = new BufferedOutputStream( new FileOutputStream(rstFile));


            inputStream = new BufferedInputStream(process.getInputStream());
            outputStream = new BufferedOutputStream(new FileOutputStream(rstFile));


            bufferedStreamsCopy(inputStream, outputStream);


            process.waitFor();
            int exit = process.exitValue();
            System.out.println("\tGenerated " + rstFile.getName() + " with exist code " + exit);
            return exit == 0;
        } catch (IOException e) {
            System.out.println("\\tFailed on " + rstFile.getName() + " with: " + e);
            e.printStackTrace();
            return false;
        } catch (InterruptedException e) {
            System.out.println("\\tFailed on " + rstFile.getName() + " with: " + e);
            return false;
        }
    }


    private void bufferedStreamsCopy(File origional, File copy) {
        InputStream in = null;
        OutputStream out = null;
        try {
            in = new BufferedInputStream(new FileInputStream(origional));
            out = new BufferedOutputStream(new FileOutputStream(copy));


            bufferedStreamsCopy(in, out);
        } catch (Exception eek) {
            System.out.print("Unable to copy from " + origional.getName() + "to " + copy);
            eek.printStackTrace();
        }
    }


    // From http://java.dzone.com/articles/file-copy-java-�-benchmark
    private void bufferedStreamsCopy(InputStream fin, OutputStream fout) throws IOException {
        try {
            int data;
            while ((data = fin.read()) != -1) {
                fout.write(data);
            }
            fout.flush();
        } finally {
            close(fin);
            close(fout);
        }
    }


    private void close(Closeable closable) {
        if (closable != null) {
            try {
                closable.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}
Source Code of html.BulkConvert

Related Classes of html.BulkConvert