Package

Source Code of CrossReferencer

import java.util.*;
import java.io.*;
import java.net.*;
import com.quiotix.html.parser.*;
import com.quiotix.html.parser.HtmlDocument.Attribute;
import com.quiotix.html.parser.HtmlDocument.AttributeList;

class CrossReferencer extends HtmlVisitor {

    InputStream in;
    PrintWriter out;
    ParagraphNumberer numberer;

    String skipToEndTag = null;

    public CrossReferencer(File inputFile, File outputFile,
                           ParagraphNumberer numberer) {
        try {
            this.in = new FileInputStream(inputFile);
            this.out = new PrintWriter(new FileWriter(outputFile));
            this.numberer = numberer;
            HtmlParser parser = new HtmlParser(in);
            parser.streamAccept(this);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void print(Object o) {
        if (skipToEndTag == null)
            out.print(o.toString());
    }

    public void visit(HtmlDocument.Text t) {
        String text = t.toString();
        int pos = text.indexOf(TABLE_OF_CONTENTS_TAG);
        if (pos == -1)
            print(t);
        else {
            print(text.substring(0, pos));
            if (numberer != null)
                numberer.printTOC(out);
            print(text.substring(pos+TABLE_OF_CONTENTS_TAG.length()));
        }
    }
    public static final String TABLE_OF_CONTENTS_TAG = "TABLE_OF_CONTENTS";

    public void visit(HtmlDocument.Tag t) {
        if (t.tagName.equalsIgnoreCase("SPAN"))
            visitSpan(t);

        print(t);
    }

    public void visit(HtmlDocument.EndTag t) {
        print(t);
        if (skipToEndTag != null && t.tagName.equalsIgnoreCase(skipToEndTag))
            skipToEndTag = null;
    }

    public void visit(HtmlDocument.Comment c)    { print(c);   }
    public void visit(HtmlDocument.Newline n)    { out.println(); }
    public void visit(HtmlDocument.Annotation a) { print(a);   }
    public void finish() {
        try {
            out.flush();
            out.close();
            in.close();
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }

    public void visitSpan(HtmlDocument.Tag t) {
        if (numberer == null) return;
        String crossRef = getCrossReference(t, null);
        if (crossRef == null) return;
        String parNum = numberer.getNumberForRef(crossRef, true);
        if (parNum == null) {
            System.err.println("Couldn't find reference " + crossRef);
            return;
        }

        out.print(t);
        out.print(parNum);
        out.print("</span>");
        skipToEndTag = "span";
    }

    private static final String CROSS_REF_STARTER = "mso-field-code:\"REF ";
    public static String getCrossReference(HtmlDocument.Tag t,
                                           String insertPrefix) {
        Attribute style = HTMLSpider.getAttribute(t, "STYLE");
        if (style == null) return null;
        String s = style.value;
        // only handle cross references to paragraph numbers for now
        if (s.indexOf("\\r") == -1) return null;
        int beginPos = s.indexOf(CROSS_REF_STARTER);
        if (beginPos == -1) return null;
        beginPos += CROSS_REF_STARTER.length();

        if (insertPrefix != null)
            style.value = s =
                s.substring(0, beginPos) +insertPrefix+ s.substring(beginPos);

        int endPos = endOfWord(s, beginPos);
        return s.substring(beginPos, endPos);
    }
    private static final String WORD_CHARS =
        "abcdefghijklmnopqrstuvwxyz_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
    private static int endOfWord(String s, int wordStart) {
        for (int i = wordStart;   i < s.length();   i++)
            if (WORD_CHARS.indexOf(s.charAt(i)) == -1)
                return i;
        return s.length();
    }
}
TOP

Related Classes of CrossReferencer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.