Package org.htmlparser.scanners

Source Code of org.htmlparser.scanners.FormScanner

// $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/scanners/FormScanner.java,v 1.2 2004/02/10 13:41:09 woolfel Exp $
/*
* ====================================================================
* Copyright 2002-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

// The developers of JMeter and Apache are greatful to the developers
// of HTMLParser for giving Apache Software Foundation a non-exclusive
// license. The performance benefits of HTMLParser are clear and the
// users of JMeter will benefit from the hard work the HTMLParser
// team. For detailed information about HTMLParser, the project is
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
//
// HTMLParser was originally created by Somik Raha in 2000. Since then
// a healthy community of users has formed and helped refine the
// design so that it is able to tackle the difficult task of parsing
// dirty HTML. Derrick Oswald is the current lead developer and was kind
// enough to assist JMeter.

package org.htmlparser.scanners;

//////////////////
// Java Imports //
//////////////////
import org.htmlparser.Parser;
import org.htmlparser.tags.FormTag;
import org.htmlparser.tags.Tag;
import org.htmlparser.tags.data.CompositeTagData;
import org.htmlparser.tags.data.TagData;
import org.htmlparser.util.LinkProcessor;
import org.htmlparser.util.ParserException;

/**
* Scans for the Image Tag. This is a subclass of TagScanner, and is called using a
* variant of the template method. If the evaluate() method returns true, that means the
* given string contains an image tag. Extraction is done by the scan method thereafter
* by the user of this class.
*/
public class FormScanner extends CompositeTagScanner
{
    private static final String[] MATCH_ID = { "FORM" };
    public static final String PREVIOUS_DIRTY_LINK_MESSAGE =
        "Encountered a form tag after an open link tag.\nThere should have been an end tag for the link before the form tag began.\nCorrecting this..";
    private boolean linkScannerAlreadyOpen = false;
    private static final String[] formTagEnders = { "HTML", "BODY" };
    /**
    * HTMLFormScanner constructor comment.
    */
    public FormScanner(Parser parser)
    {
        this("", parser);
    }
    /**
     * Overriding the constructor to accept the filter
     */
    public FormScanner(String filter, Parser parser)
    {
        super(filter, MATCH_ID, formTagEnders, false);
        parser.addScanner(new InputTagScanner("-i"));
        parser.addScanner(new TextareaTagScanner("-t"));
        parser.addScanner(new SelectTagScanner("-select"));
        parser.addScanner(new OptionTagScanner("-option"));
    }

    /**
     * Extract the location of the image, given the string to be parsed, and the url
     * of the html page in which this tag exists.
     * @param s String to be parsed
     * @param url URL of web page being parsed
     */
    public String extractFormLocn(Tag tag, String url) throws ParserException
    {
        try
        {
            String formURL = tag.getAttribute("ACTION");
            if (formURL == null)
                return "";
            else
                return (new LinkProcessor()).extract(formURL, url);
        }
        catch (Exception e)
        {
            String msg;
            if (tag != null)
                msg = tag.getText();
            else
                msg = "";
            throw new ParserException(
                "HTMLFormScanner.extractFormLocn() : Error in extracting form location, tag = "
                    + msg
                    + ", url = "
                    + url,
                e);
        }
    }

    public String extractFormName(Tag tag)
    {
        return tag.getAttribute("NAME");
    }

    public String extractFormMethod(Tag tag)
    {
        String method = tag.getAttribute("METHOD");
        if (method == null)
            method = FormTag.GET;
        return method.toUpperCase();

    }

    /**
     * Scan the tag and extract the information related to the <IMG> tag. The url of the
     * initiating scan has to be provided in case relative links are found. The initial
     * url is then prepended to it to give an absolute link.
     * The NodeReader is provided in order to do a lookahead operation. We assume that
     * the identification has already been performed using the evaluate() method.
     * @param tag HTML Tag to be scanned for identification
     * @param url The initiating url of the scan (Where the html page lies)
     * @param reader The reader object responsible for reading the html page
     * @param currentLine The current line (automatically provided by Tag)
     */
    //  public Tag scan(Tag tag,String url,NodeReader reader,String currentLine) throws ParserException
    //  {
    //    if (linkScannerAlreadyOpen) {
    //      String newLine = insertEndTagBeforeNode(tag, currentLine);
    //      reader.changeLine(newLine);
    //      return new EndTag(
    //        new TagData(
    //          tag.elementBegin(),
    //          tag.elementBegin()+3,
    //          "A",
    //          currentLine
    //        )
    //      );
    //    }
    //    return super.scan(tag,url,reader,currentLine);
    //  }

    /**
     * @see org.htmlparser.scanners.TagScanner#getID()
     */
    public String[] getID()
    {
        return MATCH_ID;
    }

    public boolean evaluate(String s, TagScanner previousOpenScanner)
    {
        if (previousOpenScanner instanceof LinkScanner)
        {
            linkScannerAlreadyOpen = true;
            StringBuffer msg = new StringBuffer();
            msg.append("<");
            msg.append(s);
            msg.append(">");
            msg.append(PREVIOUS_DIRTY_LINK_MESSAGE);
            feedback.warning(msg.toString());
            // This is dirty HTML. Assume the current tag is
            // not a new link tag - but an end tag. This is actually a really wild bug -
            // Internet Explorer actually parses such tags.
            // So - we shall then proceed to fool the scanner into sending an endtag of type </A>
            // For this - set the dirty flag to true and return
        }
        else
            linkScannerAlreadyOpen = false;
        return super.evaluate(s, previousOpenScanner);
    }

    public Tag createTag(TagData tagData, CompositeTagData compositeTagData)
        throws ParserException
    {
        String formUrl =
            extractFormLocn(
                compositeTagData.getStartTag(),
                tagData.getUrlBeingParsed());
        if (formUrl != null && formUrl.length() > 0)
            compositeTagData.getStartTag().setAttribute("ACTION", formUrl);
        return new FormTag(tagData, compositeTagData);
    }

}
TOP

Related Classes of org.htmlparser.scanners.FormScanner

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.