Source Code of de.susebox.jtopas.TestEmbeddedTokenizer

/*
 * TestEmbeddedTokenizer.java: JUnit test for the StandardTokenizer
 *
 * Copyright (C) 2001 Heiko Blau
 *
 * This file belongs to the Susebox Java core test suite.
 * The Susebox Java core test suite is free software; you can redistribute it 
 * and/or modify it under the terms of the GNU Lesser General Public License as 
 * published by the Free Software Foundation; either version 2.1 of the License, 
 * or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
 * FITNESS FOR A PARTICULAR PURPOSE. 
 * See the GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License along 
 * with the Susebox Java core test suite. If not, write to the
 *
 *   Free Software Foundation, Inc.
 *   59 Temple Place, Suite 330, 
 *   Boston, MA 02111-1307 
 *   USA
 *
 * or check the Internet: http://www.fsf.org
 *
 * The Susebox Java core test suite uses the test framework JUnit by Kent Beck 
 * and Erich Gamma. You should have received a copy of their JUnit licence 
 * agreement along with the Susebox Java test suite.
 *
 * We do NOT provide the JUnit archive junit.jar nessecary to compile and run 
 * our tests, since we assume, that You  either have it already or would like 
 * to get the current release Yourself. 
 * Please visit either:
 *   http://sourceforge.net/projects/junit
 * or
 *   http://junit.org
 * to obtain JUnit.
 *
 * Contact:
 *   email: heiko@susebox.de 
 */


package de.susebox.jtopas;


//-----------------------------------------------------------------------------
// Imports
//
import java.io.InputStream;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Vector;
import java.util.Properties;
import java.net.URL;


import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import junit.framework.Assert;


import de.susebox.java.lang.ExtRuntimeException;


import de.susebox.TestUtilities;




//-----------------------------------------------------------------------------
// Class TestEmbeddedTokenizer
//


/**<p>
 * This unit test checks the embedded-tokenizer feature of the class {@link StandardTokenizer}.
 * With this technique it is possible to parse multipart documents like HTML with
 * embedded CSS and script parts, Java and javadoc comments etc.
 *</p><p>
 * This test suite works with a test configuration file. This file contains some
 * sets of properties, each set for one or more different test runs.
 *</p><p>
 * The properties are defined as class constants. In the configuration file, a 
 * property consists of the property name and a number identifying the property
 * set. 
 *</p>
 *
 * @see     StandardTokenizer
 * @author  Heiko Blau
 */
public class TestEmbeddedTokenizer extends TestCase {
  
  //---------------------------------------------------------------------------
  // properties
  //


  /**
   * The name of the test configuration file. This file will be read by 
   * {@link java.lang.Class#getResourceAsStream}.
   */
  public static final String CONFIG_FILE = "TestEmbeddedTokenizer.conf";
  
  /**
   * Property for the test {@link #testEmbeddedTokenizer}
   */
  public static final String PROP_PATH = "Path";
  
  /**
   * Property for the test {@link #testJavaTokenizer}
   */
  public static final String PROP_JAVAPATH = "JavaPath";
  
  
  //---------------------------------------------------------------------------
  // main method
  //
  
  /**
   * call this method to invoke the tests.
   *
   * @param args  unused
   */
  public static void main(String[] args) {
    String[]   tests = { TestEmbeddedTokenizer.class.getName() };


    TestUtilities.run(tests, args);
  }
  


  //---------------------------------------------------------------------------
  // suite method
  //
  
  /**
   * Implementation of the JUnit method <code>suite</code>. For each set of test
   * properties one or more tests are instantiated.
   *
   * @return a test suite
   */
  public static Test suite() {
    TestSuite   suite = new TestSuite(TestEmbeddedTokenizer.class.getName());
    Properties  props = new Properties();
    int         count = 1;
    String      path;
    URL         url;
    
    try {
      props.load(TestEmbeddedTokenizer.class.getResourceAsStream(CONFIG_FILE));
    } catch (Exception ex) {
      throw new ExtRuntimeException(ex);
    }


    // test on HTML files
    while ((path = props.getProperty(PROP_PATH + count)) != null) {
      if ((url = TestEmbeddedTokenizer.class.getResource(path)) != null) {
        path = url.getFile();
      }
      suite.addTest(new TestEmbeddedTokenizer("testEmbeddedTokenizer", path));
      count++;
    }


    // tests on Java files
    count = 1;
    while ((path = props.getProperty(PROP_JAVAPATH + count)) != null) {
      if ((url = TestEmbeddedTokenizer.class.getResource(path)) != null) {
        path = url.getFile();
      }
      suite.addTest(new TestEmbeddedTokenizer("testJavaTokenizer", path));
      count++;
    }
    return suite;
  }
  
  
  //---------------------------------------------------------------------------
  // Constructor
  //
  
  /**
   * Initializing the instance with the test file path
   *
   * @param test  which test method should be invoked
   * @param path  name of test configuration file  
   */  
  public TestEmbeddedTokenizer(String test, String path) {
    super(test);
    _path = path;
  }


  
  //---------------------------------------------------------------------------
  // Fixture setup and release
  //
  
  /**
   * Sets up the fixture, for example, open a network connection.
   * This method is called before a test is executed.
   *
   * @throws Exception for anything that might go wrong
   */
  protected void setUp() throws Exception {
    InputStream  stream = new FileInputStream(_path);
    
    _reader = new InputStreamReader(stream);
  }


  
  /**
   * Tears down the fixture, for example, close a network connection.
   * This method is called after a test is executed.
   *
   * @throws Exception for anything that might go wrong
   */
  protected void tearDown() throws Exception {
    _reader.close();
  }
  
  //---------------------------------------------------------------------------
  // test cases
  //
  
  
  /**
   * This method reads the given stream as a Java source. It extracts javadoc
   * comments and source code.
   * There should be a class or interface name in every Java source. The opening
   * and closing brackets should match etc.
   *
   * @throws Throwable   for anything that might go wrong
   * @see   #testEmbeddedTokenizer
   */
  public void testJavaTokenizer() throws Throwable {
    long                        start         = System.currentTimeMillis();
    StandardTokenizerProperties javaProps     = new StandardTokenizerProperties();
    StandardTokenizerProperties docProps      = new StandardTokenizerProperties();
    StandardTokenizer           javaTokenizer = new StandardTokenizer(javaProps);
    StandardTokenizer           docTokenizer  = new StandardTokenizer(docProps);
    StandardTokenizer           currTokenizer = javaTokenizer;
    Object                      openBlock     = new Object();
    Object                      closeBlock    = new Object();
    Object                      atSign        = new Object();
    int                         blockBalance  = 0;
    Token                       token;
    int                         lastStartLineNo = -1;
    int                         lastStartColNo  = -1;


    javaProps.setParseFlags(Flags.F_TOKEN_POS_ONLY | Flags.F_KEEP_DATA | Flags.F_COUNT_LINES);
    docProps.setParseFlags(Flags.F_NO_CASE);
    
    javaProps.addSpecialSequence("/**", docTokenizer);
    javaProps.addSpecialSequence("{", openBlock);
    javaProps.addSpecialSequence("}", closeBlock);
    javaProps.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
    javaProps.addLineComment(TokenizerProperties.DEFAULT_LINE_COMMENT);
    javaProps.addString(TokenizerProperties.DEFAULT_STRING_START, TokenizerProperties.DEFAULT_STRING_END, TokenizerProperties.DEFAULT_STRING_ESCAPE);
    javaProps.addString("'", "'", "\\");
    docProps.addSpecialSequence("*/", javaTokenizer);
    docProps.addSpecialSequence("@", atSign);
    docProps.addKeyword("param");
    docProps.addKeyword("return");
    docProps.addKeyword("throws");
    docProps.addKeyword("author");
    docProps.addKeyword("version");
    docProps.addKeyword("link");
    docProps.addKeyword("see");
    docProps.addKeyword("deprecated");
    
    javaTokenizer.setSource(_reader);
    javaTokenizer.addTokenizer(docTokenizer);


    System.out.println("\nStart parsing \"" + _path + "\"");
    while (currTokenizer.hasMoreToken()) {
      token = currTokenizer.nextToken();
      
      // Line counting test
      assertTrue(token.getStartLine() >= lastStartLineNo);
      if (token.getStartLine() == lastStartLineNo) {
        assertTrue(token.getStartColumn() >= lastStartColNo);
        if (token.getEndLine() == lastStartLineNo) {
          assertTrue(token.getEndColumn() == token.getStartColumn() + token.getLength());
        }
      }
      lastStartLineNo = token.getStartLine();
      lastStartColNo  = token.getStartColumn();
      
      // tokenizer switching
      switch (token.getType()) {
        case Token.SPECIAL_SEQUENCE:
          if (token.getCompanion() instanceof StandardTokenizer) {
            StandardTokenizer tokenizer = (StandardTokenizer)token.getCompanion();
            
            currTokenizer.switchTo(tokenizer);
            currTokenizer = tokenizer;
          } else if (token.getCompanion() == openBlock) {
            blockBalance++;
          } else if (token.getCompanion() == closeBlock) {
            blockBalance--;
          } else if (token.getCompanion() == atSign) {
            token = currTokenizer.nextToken();
            assertTrue("Expected keyword after @ sign in javadoc comment, but found \"" + currTokenizer.currentImage(),
                      token.getType() == Token.KEYWORD);
          }
          break;
      }
    }
    
    // some checks
    assertTrue("Braces should be balanced in Java file \"" 
              + _path + "\", but detected inbalance " + blockBalance,
              blockBalance == 0);


    // print elapsed time
    long diff = System.currentTimeMillis() - start;
    System.out.println("Finished after " + diff + " milliseconds");
  }
    


  
  /**
   * The method takes the HTML file given in the constructor, and parses with
   * the main HTML tokenizer and two embedded tokenizers for JavaScript and
   * CSS.
   *
   * @throws Throwable   for anything that might go wrong
   * @see   #testEmbeddedTokenizer
   */
  public void testEmbeddedTokenizer() throws Throwable {
    long                        start         = System.currentTimeMillis();
    StandardTokenizerProperties htmlProps     = new StandardTokenizerProperties();
    StandardTokenizerProperties jsProps       = new StandardTokenizerProperties();
    StandardTokenizerProperties cssProps      = new StandardTokenizerProperties();
    StandardTokenizer           htmlTokenizer = new StandardTokenizer(htmlProps);
    StandardTokenizer           jsTokenizer   = new StandardTokenizer(jsProps);
    StandardTokenizer           cssTokenizer  = new StandardTokenizer(cssProps);
    String                      keywordLang   = new String("LANGUAGE");
    Object                      endOfEmbedded = new Object();
    Object                      startOfTag    = new Object();
    Object                      endOfTag      = new Object();
    Object                      endOfScript   = new Object();
    Token                       token;
    int                         lastStartLineNo = -1;
    int                         lastStartColNo  = -1;


    htmlProps.setParseFlags(Flags.F_TOKEN_POS_ONLY 
                          | Flags.F_KEEP_DATA 
                          | Flags.F_COUNT_LINES);
    cssProps.setParseFlags (Flags.F_TOKEN_POS_ONLY | Flags.F_NO_CASE);
    jsProps.setParseFlags  (Flags.F_TOKEN_POS_ONLY);
    
    htmlProps.addKeyword("SCRIPT", jsTokenizer);
    htmlProps.addKeyword("LANGUAGE", keywordLang);
    htmlProps.addKeyword("STYLE", cssTokenizer);
    htmlProps.addSpecialSequence("<", startOfTag);
    htmlProps.addSpecialSequence(">", endOfTag);
    htmlProps.addBlockComment("<!--", "-->");
    htmlProps.addString(TokenizerProperties.DEFAULT_STRING_START, TokenizerProperties.DEFAULT_STRING_END, TokenizerProperties.DEFAULT_STRING_ESCAPE);
    htmlProps.setSeparators(TokenizerProperties.DEFAULT_SEPARATORS);
    
    jsProps.addBlockComment(TokenizerProperties.DEFAULT_BLOCK_COMMENT_START, TokenizerProperties.DEFAULT_BLOCK_COMMENT_END);
    jsProps.addSpecialSequence("<!--");
    jsProps.addSpecialSequence("-->", endOfEmbedded);
    jsProps.setSeparators(TokenizerProperties.DEFAULT_SEPARATORS);
    
    cssProps.addSpecialSequence("<!--");
    cssProps.addSpecialSequence("-->", endOfEmbedded);
    
    htmlTokenizer.setSource(_reader);
    htmlTokenizer.addTokenizer(jsTokenizer);
    htmlTokenizer.addTokenizer(cssTokenizer);


    System.out.println("\nStart parsing \"" + _path + "\"");
    while (htmlTokenizer.hasMoreToken()) {
      token = htmlTokenizer.nextToken();
      
      // Line counting test
      assertTrue(token.getStartLine() >= lastStartLineNo);
      if (token.getStartLine() == lastStartLineNo) {
        assertTrue(token.getStartColumn() >= lastStartColNo);
        if (token.getEndLine() == lastStartLineNo) {
          assertTrue(token.getEndColumn() == token.getStartColumn() + token.getLength());
        }
      }
      lastStartLineNo = token.getStartLine();
      lastStartColNo  = token.getStartColumn();
      
      // Tokenizer switching
      switch (token.getType()) {
      case Token.SPECIAL_SEQUENCE:
        
        // dealing with JavaScript
        if (token.getCompanion() == startOfTag) {
          token = htmlTokenizer.nextToken();
          if (token.getType() == Token.KEYWORD && token.getCompanion() == jsTokenizer) {
            token = htmlTokenizer.nextToken();
            assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected \"" + keywordLang + "\".",
                      token.getCompanion() == keywordLang);       // see above; should be the LANGUAGE token
            token = htmlTokenizer.nextToken();
            assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected \"=\".",
                      htmlTokenizer.currentImage().equals("="));  // see above; should be "="
            token = htmlTokenizer.nextToken();
            assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected string.",
                      token.getType() == Token.STRING);           // see above; should be "JavaScript"
            
            // exclude JavaScript-Includes
            token = htmlTokenizer.nextToken();
            if (token.getCompanion() == endOfTag) {
              htmlTokenizer.switchTo(jsTokenizer);


              // continuing with JavaScriptTokenizer
              while (jsTokenizer.hasMoreToken()) {
                token = jsTokenizer.nextToken();
                if (token.getType() == Token.SPECIAL_SEQUENCE && token.getCompanion() == endOfEmbedded) {
                  jsTokenizer.switchTo(htmlTokenizer);
                  break;
                }
              }
              
              // now we should find the end-of script tag
              token = htmlTokenizer.nextToken();
              assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected start of tag.",
                        token.getCompanion() == startOfTag);
              token = htmlTokenizer.nextToken();
              assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected \"/\".",
                        htmlTokenizer.currentImage().equals("/"));
              token = htmlTokenizer.nextToken();
              assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected script.",
                        token.getCompanion() == jsTokenizer);
              token = htmlTokenizer.nextToken();
              assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected end of tag.",
                        token.getCompanion() == endOfTag);
            }
            
          // dealing with Cascading Style Sheets (CSS
          } else if (token.getType() == Token.KEYWORD && token.getCompanion() == jsTokenizer) {
            token = htmlTokenizer.nextToken();
            assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected end of tag.",
                      token.getCompanion() == endOfTag);   // should be the end of tag
            
            htmlTokenizer.switchTo(cssTokenizer);
            while (cssTokenizer.hasMoreToken()) {
              token = cssTokenizer.nextToken();
              if (token.getType() == Token.SPECIAL_SEQUENCE && token.getCompanion() == endOfEmbedded) {
                jsTokenizer.switchTo(htmlTokenizer);
                break;
              }
            }
            
            // now we should find the end-of-style tag
            token = htmlTokenizer.nextToken();
            assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected start of tag.",
                      token.getCompanion() == startOfTag);
            token = htmlTokenizer.nextToken();
            assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected \"/\".",
                      htmlTokenizer.currentImage().equals("/"));
            token = htmlTokenizer.nextToken();
            assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected script.",
                      token.getCompanion() == cssTokenizer);
            token = htmlTokenizer.nextToken();
            assertTrue("Found token \"" + htmlTokenizer.currentImage() + "\". Expected end of tag.",
                      token.getCompanion() == endOfTag);
          }
        }
        break;
      }
    }


    long diff = System.currentTimeMillis() - start;
    System.out.println("Finished after " + diff + " milliseconds");
  }
  
  
  //---------------------------------------------------------------------------
  // Members
  //
  protected InputStreamReader _reader = null;
  protected String            _path   = null;
}
Source Code of de.susebox.jtopas.TestEmbeddedTokenizer

Related Classes of de.susebox.jtopas.TestEmbeddedTokenizer