Examples of HTMLDocument


Examples of org.apache.lucene.ant.HtmlDocument

   
    HtmlDocument doc;
   
    @Override
    public void setUp() throws IOException {
        doc = new HtmlDocument(getFile("test.html"));
    }
View Full Code Here

Examples of org.exoplatform.services.html.HTMLDocument

   {
      //Checks the existing of the HTML file.
      assertNotNull(file_);

      //Checks the charset type of the HTML file.
      HTMLDocument document = HTMLParser.createDocument(file_, null);
      assertEquals("ASCII", HTMLParser.getCharset());
      System.out.println("\n\n\n == > charset " + HTMLParser.getCharset() + "\n\n");
      assertNotNull(document);

      //Gets the NodePath object locating the path of a TAG in the HTML file.
      NodePath path = NodePathParser.toPath("html.body.h2");

      //Looks for the Node coresponding to the NodePath object of the HTML document.
      //and checks the existing of this TAG.
      HTMLNode node = NodePathUtil.lookFor(document.getRoot(), path);
      assertEquals(node.getName(), Name.H2);
      assertEquals(node.getName().toString(), "H2");

      //Similar as above.
      path = NodePathParser.toPath("html.body.font[1]");
      node = NodePathUtil.lookFor(document.getRoot(), path);
      assertNotNull(node);

      //Gets all the attributes of the Node object in the HTML document.
      //simultaneously checks the existing of an Attribute of this Node.
      Attributes attributes = AttributeParser.getAttributes(node);
View Full Code Here

Examples of org.exoplatform.services.html.HTMLDocument

      //assertNotNull(attributes.get("size"));  
   }

   public void testDocumentType() throws Exception
   {
      HTMLDocument document;
      String text = "<html><body><h3>dsfsdf</h3></body></html>";
      document = HTMLParser.createDocument(text);
      assertNotNull(document);
      assertEquals("ASCII", HTMLParser.getCharset());
      System.out.println("CHARSET: " + HTMLParser.getCharset());
View Full Code Here

Examples of org.exoplatform.services.html.HTMLDocument

      */
   }

   public void testRoot() throws Exception
   {
      HTMLDocument document = HTMLParser.createDocument(this.file_, null);
      assertNotNull(document);
      //HTMLNode root = NodePathUtil.lookFor(document.getRoot(),NodePathParser.toPath("html"));
      HTMLNode root = document.getRoot();
      assertNotNull(root);
      System.out.println("ROOT-NAME: " + root.getName());
      System.out.println("ROOT-VALUE: " + root.getValue().toString());
      System.out.println("ROOT-TEXTVALUE: " + root.getTextValue());
   }
View Full Code Here

Examples of org.exoplatform.services.html.HTMLDocument

      System.out.println("ROOT-TEXTVALUE: " + root.getTextValue());
   }

   public void testHead() throws Exception
   {
      HTMLDocument document = HTMLParser.createDocument(this.file_, null);
      String pathStr = "html.head";
      NodePath path = NodePathParser.toPath(pathStr);
      HTMLNode node = NodePathUtil.lookFor(document.getRoot(), path);
      assertNotNull(node);
      assertEquals(node.getName(), Name.HEAD);
      assertEquals(node.getName().toString(), "HEAD");
      List<HTMLNode> children = node.getChildrenNode();
      assertNotNull(children);
View Full Code Here

Examples of org.exoplatform.services.html.HTMLDocument

      }
   }

   public void testBody() throws Exception
   {
      HTMLDocument document = HTMLParser.createDocument(this.file_, null);
      String pathStr = "html.body";
      NodePath path = NodePathParser.toPath(pathStr);
      HTMLNode node = NodePathUtil.lookFor(document.getRoot(), path);
      assertNotNull(node);
      assertEquals(node.getName(), Name.BODY);
      assertEquals(node.getName().toString(), "BODY");
   }
View Full Code Here

Examples of org.exoplatform.services.html.HTMLDocument

      //FILE
      assertNotNull(this.file_);
      System.out.println("FILE PATH: " + this.file_.getCanonicalPath());

      //HTMLDocument.
      HTMLDocument htmlDocument = HTMLParser.createDocument(this.file_, null);
      assertNotNull(htmlDocument.getDoctype());
      System.out.println("DOCTYPE: " + htmlDocument.getDoctype().getValue().toString());

      //NodePath.
      NodePath tablePath = NodePathParser.toPath("html.body.table[1]");
      assertNotNull(tablePath);
      System.out.println(tablePath.toString());

      //HTMLNode.
      HTMLNode node = NodePathUtil.lookFor(htmlDocument.getRoot(), tablePath);
      assertNotNull(node);
      assertEquals(node.getName(), Name.TABLE);
      assertEquals(node.getName().toString(), "TABLE");
      System.out.println("NODE-NAME: " + node.getName());
      System.out.println("NODE-VALUE: " + new String(node.getValue()));
View Full Code Here

Examples of org.exoplatform.services.html.HTMLDocument

      //FileOutputStream is meant for writing streams of raw bytes of data such as image data.
      //For writing streams of characters, consider using FileWriter.
      FileOutputStream output = new FileOutputStream(file);

      //The initial whole HTMLDocument
      HTMLDocument document = HTMLParser.createDocument(this.data.toByteArray(), this.charset_);

      //The new HTMLDocument after spliting (separating) only childPath<NodePath>-->only a Node.
      document = NodePathUtil.create(document.getRoot(), new NodePath[]{this.childPath_});

      output.write(document.getTextValue().getBytes("utf-8"));
      output.flush();

      output.close();
      this.data = null;
   }
View Full Code Here

Examples of org.exoplatform.services.html.HTMLDocument

      if (this.childrenThread[0].getData() == null)
         return;

      byte[] data = this.childrenThread[0].getData().toByteArray();
      //The whole HTMLdocument.
      HTMLDocument document = HTMLParser.createDocument(data, this.charset_);
      //The(an) only part of HTMLDocument with 'homePath' Node (NodePath).
      document = NodePathUtil.create(document.getRoot(), new NodePath[]{this.homePath_});

      //Create the full url-link for all links to be downloaded.
      this.linkUtil.createFullNormalLink(document.getRoot(), new URL(this.url_));
      //And get all these url-links.
      this.links = this.linkUtil.getSiteLink(document.getRoot());
      this.idx = 0;
   }
View Full Code Here

Examples of org.exoplatform.services.html.HTMLDocument

      //HTMLDocument.
      String text =
         "<html>" + "<head>" + "<title>My own HTML file</title>" + "</head>" + "<body>"
            + "<h2>This is a test exercise for me!</h2>" + "</body>" + "</html>";
      HTMLDocument document = HTMLParser.createDocument(text);
      assertNotNull(document);

      String pathStr = "html.head.title";
      NodePath path = NodePathParser.toPath(pathStr);
      assertNotNull(path);
      assertEquals(path.toString(), "HTML[0].HEAD[0].TITLE[0]");
      System.out.println("PATH: " + path.toString());

      HTMLNode node = NodePathUtil.lookFor(document.getRoot(), path);
      assertNotNull(node);
      assertEquals(node.getName(), Name.TITLE);

      //Add a Tag to HTMLDocument.
      NodeImpl impl = new NodeImpl("h2 id = \"dds\"".toCharArray(), Name.H2);
      node.addChild(impl);
      assertNotNull(node.getChildrenNode().get(1));
      assertEquals(node.getChildren().get(1).getName(), Name.H2);
      System.out.println("THE NEW NODE-NAME: " + node.getChildrenNode().get(1).getName().toString());
      System.out.println("THE NEW NODE-VALUE: " + new String(node.getChildren().get(1).getValue()));

      //Add a Table to HTMLDocument.
      HTMLDocument doc = HTMLParser.createDocument("<table border='1'><tr></tr></table>");
      HTMLNode table = NodePathUtil.lookFor(doc.getRoot(), NodePathParser.toPath("html.body.table"));
      node.addChild(table);

      //Remove a Node which is text in format from HTMLDocument.
      System.out.println("\n\nRemove:");
      HTMLNode contentNode = NodePathUtil.lookFor(document.getRoot(), NodePathParser.toPath("html.head.title.content"));
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.