Package

Source Code of AnchorMapper

import java.util.*;
import java.io.*;
import java.net.*;
import com.quiotix.html.parser.*;
import com.quiotix.html.parser.HtmlDocument.Attribute;
import com.quiotix.html.parser.HtmlDocument.AttributeList;

class AnchorMapper extends HTMLSpider {

    public Map anchorMap;
    static int docNum;
    String docName;


    public static Map runMapper(URL u) {
        docNum = 0;
        AnchorMapper mapper = new AnchorMapper();
        mapper.openURL(u);
        return mapper.anchorMap;
    }


    public AnchorMapper() {
        super();
        anchorMap = new HashMap();
        docName = "doc" + docNum++;
    }

    protected AnchorMapper(AnchorMapper that) {
        super(that);
        this.anchorMap = that.anchorMap;
        docName = "doc" + docNum++;
    }

    protected HTMLSpider getRecursiveInstance(HtmlDocument.Tag t) {
        return new AnchorMapper(this);
    }

    public void openURL(URL u) {
        anchorMap.put(normalizeURL(u), docName + "_top");
        super.openURL(u);
    }

    public void visit(HtmlDocument.Tag t) {
        if (t.tagName.equalsIgnoreCase("A")) {
            Attribute name = getAttribute(t, "NAME");
            if (name != null) {
                String anchorName = deQuote(name.value);
                URL anchorUrl = resolveHash(name);
                if (anchorUrl != null)
                    anchorMap.put(normalizeURL(anchorUrl),
                                  docName + "_" + anchorName);
            }
        }
        super.visit(t);
    }


}
TOP

Related Classes of AnchorMapper

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.