Package org.archive.wayback.util.htmllex

Source Code of org.archive.wayback.util.htmllex.ParseEventDelegator

/*
*  This file is part of the Wayback archival access software
*   (http://archive-access.sourceforge.net/projects/wayback/).
*
*  Licensed to the Internet Archive (IA) by one or more individual
*  contributors.
*
*  The IA licenses this file to You under the Apache License, Version 2.0
*  (the "License"); you may not use this file except in compliance with
*  the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/
package org.archive.wayback.util.htmllex;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.archive.wayback.util.htmllex.handlers.CSSTextHandler;
import org.archive.wayback.util.htmllex.handlers.CloseTagHandler;
import org.archive.wayback.util.htmllex.handlers.ContentTextHandler;
import org.archive.wayback.util.htmllex.handlers.JSTextHandler;
import org.archive.wayback.util.htmllex.handlers.OpenTagHandler;
import org.archive.wayback.util.htmllex.handlers.ParseCompleteHandler;
import org.archive.wayback.util.htmllex.handlers.ParseStartHandler;
import org.archive.wayback.util.htmllex.handlers.RemarkTextHandler;
import org.htmlparser.Node;
import org.htmlparser.nodes.RemarkNode;
import org.htmlparser.nodes.TagNode;
import org.htmlparser.nodes.TextNode;

/**
*
* This class provides an abstraction between high-level SAX events, and
* application specific low-level SAX event handlers.
*
* Any object which wishes to receive any low-level SAX events is placed in the
* parserVisitors List, and at initialization of this class, each element in
* that list is given an opportunity to register to receive whatever low-level
* SAX events it is interested in.
*
* This class also manages casting of Node objects into more event-specific
* casts, and uses the ParseContext to route specific nodes to the registered
* handlers of each low-level event types.
*
* This class attempts to be efficient about targeting specific TagNodes:
*   When registering to receive events, handlers can register for a specific
*   tag name, or for the global-tag ("*") name.
*
* As TagNodes are handled, all tag-specific handlers are called, followed by
* all global-tag handlers.
*
* @author brad
*/
public class ParseEventDelegator implements ParseEventHandler {

  public static final String WILDCARD_TAG_NAME = "*";

  private Map<String,List<CloseTagHandler>> closeTagHandlers = null;
  private Map<String,List<OpenTagHandler>> openTagHandlers = null;
  private List<CSSTextHandler> cssTextHandlers = null;
  private List<JSTextHandler> jsTextHandler = null;
  private List<RemarkTextHandler> remarkTextHandler = null;
  private List<ContentTextHandler> contentTextHandler = null;
  private List<ParseCompleteHandler> parseCompleteHandlers = null;
  private List<ParseStartHandler> parseStartHandlers = null;

  private List<ParseEventDelegatorVisitor> parserVisitors = null;


  public void init() {
    if(parserVisitors != null) {
      for(ParseEventDelegatorVisitor visitor : parserVisitors) {
        visitor.visit(this);
      }
    }
  }

  public void handleNode(ParseContext context, Node node)
    throws IOException {

    if(NodeUtils.isRemarkNode(node)) {
      RemarkNode remarkNode = (RemarkNode) node;
      handleRemarkTextNode(context,remarkNode);
     
    } else if(NodeUtils.isTextNode(node)) {
      TextNode textNode = (TextNode) node;
      if(context.isInCSS()) {
        handleCSSTextNode(context,textNode);
       
      } else if(context.isInScriptText()) {
        handleJSTextNode(context,textNode);
      } else {
        handleContentTextNode(context,textNode);
      }
    } else if(NodeUtils.isTagNode(node)) {
      TagNode tagNode = (TagNode) node;
      if(tagNode.isEndTag()) {
        handleCloseTagNode(context,tagNode);
      } else {
        // assume start, possibly empty:
        handleOpenTagNode(context,tagNode);
      }
    } else {
      throw new IllegalArgumentException("Unknown node type..");
    }
  }

  // CLOSE TAG:
  public void addCloseTagHandler(CloseTagHandler v) {
    addCloseTagHandler(v, WILDCARD_TAG_NAME);
  }
  public void addCloseTagHandler(CloseTagHandler v, String name) {
    if(closeTagHandlers == null) {
      closeTagHandlers = new HashMap<String,List<CloseTagHandler>>();
    }
    if(!closeTagHandlers.containsKey(name)) {
      closeTagHandlers.put(name, new ArrayList<CloseTagHandler>());
    }
    closeTagHandlers.get(name).add(v);
  }
  public void handleCloseTagNode(ParseContext context, TagNode node) throws IOException {
    String name = node.getTagName();
    if(closeTagHandlers != null) {
      for(String n : new String[]{name,WILDCARD_TAG_NAME}) {
        if(closeTagHandlers.containsKey(n)) {
          for(CloseTagHandler v : closeTagHandlers.get(n)) {
            v.handleCloseTagNode(context,node);
          }
        }
      }
    }
  }

  // OPEN TAG:
  public void addOpenTagHandler(OpenTagHandler v) {
    addOpenTagHandler(v, WILDCARD_TAG_NAME);
  }
  public void addOpenTagHandler(OpenTagHandler v, String name) {
    if(openTagHandlers == null) {
      openTagHandlers = new HashMap<String,List<OpenTagHandler>>();
    }
    if(!openTagHandlers.containsKey(name)) {
      openTagHandlers.put(name, new ArrayList<OpenTagHandler>());
    }
    openTagHandlers.get(name).add(v);
  }

  public void handleOpenTagNode(ParseContext context, TagNode node) throws IOException {
    String name = node.getTagName();
    if(openTagHandlers != null) {
      for(String n : new String[]{name,WILDCARD_TAG_NAME}) {
        if(openTagHandlers.containsKey(n)) {
          for(OpenTagHandler v : openTagHandlers.get(n)) {
            v.handleOpenTagNode(context,node);
          }
        }
      }
    }
  }
  public void addCSSTextHandler(CSSTextHandler v) {
    if(cssTextHandlers == null) {
      cssTextHandlers = new ArrayList<CSSTextHandler>();
    }
    cssTextHandlers.add(v);
  }
  public void handleCSSTextNode(ParseContext context, TextNode node) throws IOException {
    if(cssTextHandlers != null) {
      for(CSSTextHandler v : cssTextHandlers) {
        v.handleCSSTextNode(context,node);
      }
    }
  }
  public void addJSTextHandler(JSTextHandler v) {
    if(jsTextHandler == null) {
      jsTextHandler = new ArrayList<JSTextHandler>();
    }
    jsTextHandler.add(v);
  }
  public void handleJSTextNode(ParseContext context, TextNode node) throws IOException {
    if(jsTextHandler != null) {
      for(JSTextHandler v : jsTextHandler) {
        v.handleJSTextNode(context,node);
      }
    }
  }

  public void addRemarkTextHandler(RemarkTextHandler v) {
    if(remarkTextHandler == null) {
      remarkTextHandler = new ArrayList<RemarkTextHandler>();
    }
    remarkTextHandler.add(v);
  }
  public void handleRemarkTextNode(ParseContext context, RemarkNode node) throws IOException {
    if(remarkTextHandler != null) {
      for(RemarkTextHandler v : remarkTextHandler) {
        v.handleRemarkTextNode(context,node);
      }
    }
  }

  public void addContentTextHandler(ContentTextHandler v) {
    if(contentTextHandler == null) {
      contentTextHandler = new ArrayList<ContentTextHandler>();
    }
    contentTextHandler.add(v);
  }
  public void handleContentTextNode(ParseContext context, TextNode node) throws IOException {
    if(contentTextHandler != null) {
      for(ContentTextHandler v : contentTextHandler) {
        v.handleContentTextNode(context,node);
      }
    }
  }

  public void addParseCompleteHandler(ParseCompleteHandler v) {
    if(parseCompleteHandlers == null) {
      parseCompleteHandlers = new ArrayList<ParseCompleteHandler>();
    }
    parseCompleteHandlers.add(v);
  }
  public void handleParseComplete(ParseContext context) throws IOException {
    if(parseCompleteHandlers != null) {
      for(ParseCompleteHandler v : parseCompleteHandlers) {
        v.handleParseComplete(context);
      }
    }
  }

  public void addParseStartHandler(ParseStartHandler v) {
    if(parseStartHandlers == null) {
      parseStartHandlers = new ArrayList<ParseStartHandler>();
    }
    parseStartHandlers.add(v);
  }
  public void handleParseStart(ParseContext context) throws IOException {
    if(parseStartHandlers != null) {
      for(ParseStartHandler v : parseStartHandlers) {
        v.handleParseStart(context);
      }
    }
  }

  /**
   * @return the parserVisitors
   */
  public List<ParseEventDelegatorVisitor> getParserVisitors() {
    return parserVisitors;
  }

  /**
   * @param parserVisitors the parserVisitors to set
   */
  public void setParserVisitors(List<ParseEventDelegatorVisitor> parserVisitors) {
    this.parserVisitors = parserVisitors;
  }
}
TOP

Related Classes of org.archive.wayback.util.htmllex.ParseEventDelegator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.