Source Code of org.jnetpcap.protocol.application.HtmlParser

/*
 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Sly Technologies, Inc.
 *
 * This file is part of jNetPcap.
 *
 * jNetPcap is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as 
 * published by the Free Software Foundation, either version 3 of 
 * the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.jnetpcap.protocol.application;


import java.util.ArrayList;
import java.util.List;


import org.jnetpcap.protocol.application.Html.HtmlTag;
import org.jnetpcap.protocol.application.Html.Tag;
import org.jnetpcap.util.JThreadLocal;


// TODO: Auto-generated Javadoc
/**
 * Html header parser.
 * 
 * @author Mark Bednarczyk
 * @author Sly Technologies, Inc.
 */
public class HtmlParser {


  /** The e. */
  private int e = 0;


  /** The s. */
  private int s = 0;


  /** The str. */
  private String str = null;


  /** The Constant listLocal. */
  @SuppressWarnings("rawtypes")
  private static final JThreadLocal<ArrayList> listLocal =
      new JThreadLocal<ArrayList>(ArrayList.class);


  /**
   * Decode all tags.
   * 
   * @param page
   *          the page
   * @return the html tag[]
   */
  @SuppressWarnings("unchecked")
  public HtmlTag[] decodeAllTags(String page) {
    this.e = 0;
    this.s = e;


    final List<HtmlTag> list = listLocal.get();
    list.clear();


    int textStart = 0;
    while (true) {
      final HtmlTag tag = nextTag(page, '<', '>');
      if (tag == null) {
        break;
      }


      if (textStart != this.s) {
        String text = page.substring(textStart, this.s);
        if (text.length() != 0) {
          list.add(new HtmlTag(Tag.TEXT, HtmlTag.Type.ATOMIC, text, page,
              textStart, this.s));
        }
      }


      textStart = this.e + 1;


      list.add(tag);
    }


    return list.toArray(new HtmlTag[list.size()]);
  }


  /**
   * Decode links.
   * 
   * @param tags
   *          the tags
   * @return the html tag[]
   */
  @SuppressWarnings("unchecked")
  public HtmlTag[] decodeLinks(HtmlTag[] tags) {
    List<HtmlTag> links = listLocal.get();
    links.clear();


    for (HtmlTag t : tags) {
      switch (t.getTag()) {
        case A:
        case LINK:
        case IMG:
        case SCRIPT:
        case FORM:
          if (t.type == HtmlTag.Type.OPEN) {
            links.add(t);
          }
      }
    }


    return links.toArray(new HtmlTag[links.size()]);
  }


  /**
   * Extract bounded.
   * 
   * @param str
   *          the str
   * @param start
   *          the start
   * @param end
   *          the end
   * @return the string
   */
  private String extractBounded(String str, char start, char end) {
    if (this.str != str) {
      this.s = 0;
      this.e = 0;
      this.str = str;
    }


    s = str.indexOf('<', e);
    e = str.indexOf('>', s);


    return (s == -1 || e == -1) ? null : str.substring(s + 1, e).trim()
        .replace("\r\n", "");
  }


  /**
   * Next tag.
   * 
   * @param str
   *          the str
   * @param start
   *          the start
   * @param end
   *          the end
   * @return the html tag
   */
  private HtmlTag nextTag(String str, char start, char end) {


    String tagString = extractBounded(str, start, end);
    if (tagString == null) {
      return null;
    }


    Tag tag;
    HtmlTag.Type type = HtmlTag.Type.OPEN;
    if (tagString.charAt(0) == '/') {
      tagString = tagString.substring(1);
      type = HtmlTag.Type.CLOSE;
    }


    tag = Tag.parseStringPrefix(tagString);


    if (tag == null) {
      return null;
    }


    HtmlTag ht = new HtmlTag(tag, type, tagString, this.str, this.s, this.e);


    return ht;
  }


  /**
   * Format.
   * 
   * @param str
   *          the str
   * @return the string
   */
  public String format(String str) {


    str = str.replace("\n", "\\n").replace("\r", "\\r").replace("\t", "\\t");
    return str;
  }


}
Source Code of org.jnetpcap.protocol.application.HtmlParser

Related Classes of org.jnetpcap.protocol.application.HtmlParser