Package org.exoplatform.services.parser.html.test

Source Code of org.exoplatform.services.parser.html.test.TestFileDetect

/**
* Copyright (C) 2009 eXo Platform SAS.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/

package org.exoplatform.services.parser.html.test;

import org.exoplatform.component.test.AbstractGateInTest;
import org.exoplatform.services.html.HTMLDocument;
import org.exoplatform.services.html.HTMLNode;
import org.exoplatform.services.html.Name;
import org.exoplatform.services.html.parser.HTMLParser;
import org.exoplatform.services.html.path.NodePath;
import org.exoplatform.services.html.path.NodePathParser;
import org.exoplatform.services.html.path.NodePathUtil;

import java.io.File;
import java.net.URL;

/**
* Created by The eXo Platform SARL Author : Nhu Dinh Thuan
* nhudinhthuan@exoplatform.com Nov 29, 2006
*/
public class TestFileDetect extends AbstractGateInTest
{
   private File file_;

   public void setUp() throws Exception
   {
      this.file_ = new File(ClassLoader.getSystemResource("normal.html").getFile());
      assertNotNull(this.file_);
      System.out.println("FILE PATH: " + this.file_.getCanonicalPath());
   }

   public void testCharsetWithFile() throws Exception
   {
      HTMLDocument document = HTMLParser.createDocument(this.file_, null);
      assertNotNull(document);
      assertEquals("ASCII", HTMLParser.getCharset());
      assertNotSame("UTF-8", HTMLParser.getCharset());
      System.out.println("CHARSET: " + HTMLParser.getCharset());

      System.out.println("DOCUMENT-TEXTVALUE: " + document.getTextValue());
      System.out.println("DOCUMENT-ROOT: " + document.getRoot().getName().toString());
      System.out.println("CLASS: " + document.getClass().getName() + "\n");

      System.out.println("DOCUMENT-DOCTYPE-TEXTVALUE: " + document.getDoctype().getTextValue());
      System.out.println("DOCUMENT-DOCTYPE-VALUE: " + new String(document.getDoctype().getValue()));
      System.out.println("DOCUMENT-DOCTYPE-NAME: " + document.getDoctype().getName().toString());
   }

   public void testCharsetWithURL() throws Exception
   {
      HTMLDocument document;
      try
      {
         URL url_ = new URL("http://www.24h.com.vn");
         document = HTMLParser.createDocument(url_.openConnection().getInputStream(), null);
         document = HTMLParser.createDocument(url_.openStream(), null);
      }
      catch (java.net.UnknownHostException e)
      {
         return;
      }
      catch (java.net.ConnectException e)
      {
         return;
      }

      assertNotNull(document);
      assertNotNull(document.getRoot());
   }

   public void testCharsetWithTEXT() throws Exception
   {
      String text =
         "<html>" + "<head>" + "</head>" + "<body>" + "<h1>This is a HTML file for testing!</h1>" + "</body>"
            + "</html>";
      HTMLDocument document = HTMLParser.createDocument(text);
      assertNotNull(document);
      NodePath path = NodePathParser.toPath("html.body.h1");
      HTMLNode node = NodePathUtil.lookFor(document.getRoot(), path);
      assertNotNull(node);
      assertEquals(node.getName(), Name.H1);
      assertEquals(node.getName().toString(), "H1");
      System.out.println("NAME: " + node.getName());
      System.out.println("VALUE: " + new String(node.getValue()));
      System.out.println("TEXTVALUE: " + node.getTextValue());
      // assertNull(node.getChildren());
      assertNotNull(node.getChildren());
      assertEquals(node.getChildren().size(), 1);
      assertEquals(!node.getChildren().isEmpty(), true);
      assertEquals(node.getChildren().get(0).getName(), Name.CONTENT);
      assertEquals(node.getChildren().get(0).getName().toString(), "CONTENT");

      HTMLNode child = node.getChildren().get(0);
      assertNotNull(child);
      assertNull(child.getChildren());
      assertEquals(child.getTextValue(), "This is a HTML file for testing!");
      // assertEquals(child.getValue(),"content");
      System.out.println("CONTENT-VALUE: " + new String(child.getValue()));
      assertEquals(child.getTextValue(), new String(child.getValue()));
   }
}
TOP

Related Classes of org.exoplatform.services.parser.html.test.TestFileDetect

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.