Examples of HTMLDocument

com.aicontest.visualizer.js.dom.HTMLDocument
com.gargoylesoftware.htmlunit.javascript.host.html.HTMLDocument
535862.aspx">MSDN documentation @see W3C Dom Level 1
com.gistlabs.mechanize.document.html.HtmlDocument
@author Martin Kersten
de.l3s.boilerpipe.sax.HTMLDocument
javax.swing.text.html.HTMLDocument
mf.org.w3c.dom.html.HTMLDocument
An HTMLDocument is the root of the HTML hierarchy andholds the entire content. Beside providing access to the hierarchy, italso provides some convenience methods for accessing certain sets ofinformation from the document.
The following properties have been deprecated in favor of the corresponding ones for the BODY element:alinkColorbackgroundbgColorfgColor linkColorvlinkColor
net.matuschek.html.HtmlDocument
This class implements an HTML document It uses JTidy to parse the given HTML code to an internal DOM representation. @author Daniel Matuschek @version $Id $
net.wastl.webmail.ui.html.HTMLDocument
WebMail's class for representing HTML Documents. @author Sebastian Schaffert
nokogiri.HtmlDocument
Class for Nokogiri::HTML::Document. @author sergio @author Yoko Harada
org.apache.any23.extractor.html.HTMLDocument
A wrapper around the DOM representation of an HTML document. Provides convenience access to various parts of the document. @author Gabriele Renzi @author Michele Mostarda
org.apache.lucene.ant.HtmlDocument
The HtmlDocument class creates a Lucene {@link org.apache.lucene.document.Document} from an HTML document.
It does this by using JTidy package. It can take input input from {@link java.io.File} or {@link java.io.InputStream}. @author Erik Hatcher
org.exoplatform.services.html.HTMLDocument
Author : Nhu Dinh Thuan Email:nhudinhthuan@yahoo.com Aug 5, 2006
org.jacoco.report.html.HTMLDocument
{@link XMLDocument} that declares its content type to be XHTML 1.0 Strict andproduces {@link HTMLElement}s as children. @author Marc R. Hoffmann @version $qualified.bundle.version$
org.jacoco.report.internal.html.HTMLDocument
{@link XMLDocument} that declares its content type to be XHTML 1.0 Strict andproduces {@link HTMLElement}s as children.
org.vietspider.html.HTMLDocument
Author : Nhu Dinh Thuan Email:nhudinhthuan@yahoo.com Aug 5, 2006
org.w3c.dom.html.HTMLDocument
3.org/TR/2000/CR-DOM-Level-2-20000510'>Document Object Model (DOM) Level 2 Specification.
org.w3c.dom.html2.HTMLDocument
3.org/TR/2003/REC-DOM-Level-2-HTML-20030109'>Document Object Model (DOM) Level 2 HTML Specification.

Examples of javax.swing.text.html.HTMLDocument

  }


  @Override
  public String getText() {
    String text = super.getText();
    HTMLDocument document = (HTMLDocument) this.getDocument();
    // #0 is the HTML element, #1 the bidi-root
    Element[] roots = document.getRootElements();
    Element body = findElement(roots[0], HTML.Tag.BODY);
    Element p = findElement(body, HTML.Tag.P);


    Document realText = p.getDocument();
    try {

View Full Code Here

Examples of javax.swing.text.html.HTMLDocument

    public void hyperlinkUpdate(HyperlinkEvent e) {
      if (e.getEventType() == HyperlinkEvent.EventType.ACTIVATED) {
        JEditorPane pane = (JEditorPane) e.getSource();
        if (e instanceof HTMLFrameHyperlinkEvent) {
          HTMLFrameHyperlinkEvent evt = (HTMLFrameHyperlinkEvent) e;
          HTMLDocument doc = (HTMLDocument) pane.getDocument();
          doc.processHTMLFrameHyperlinkEvent(evt);
        } else {
          try {
            pane.setPage(e.getURL());
          } catch (Throwable t) {
            t.printStackTrace();

View Full Code Here

Examples of javax.swing.text.html.HTMLDocument

    public void hyperlinkUpdate(HyperlinkEvent e) {
      if (e.getEventType() == HyperlinkEvent.EventType.ACTIVATED) {
        JEditorPane pane = (JEditorPane) e.getSource();
        if (e instanceof HTMLFrameHyperlinkEvent) {
          HTMLFrameHyperlinkEvent evt = (HTMLFrameHyperlinkEvent) e;
          HTMLDocument doc = (HTMLDocument) pane.getDocument();
          doc.processHTMLFrameHyperlinkEvent(evt);
        } else {
          try {
            pane.setPage(e.getURL());
          } catch (Throwable t) {
            t.printStackTrace();

View Full Code Here

Examples of javax.swing.text.html.HTMLDocument

    private static final long serialVersionUID = 6031640599826858733L;


    // Override createDefaultDocument to force synchronous loading
        @Override
        public Document createDefaultDocument() {
            HTMLDocument doc = (HTMLDocument) super.createDefaultDocument();
            doc.setTokenThreshold(Integer.MAX_VALUE);
            doc.setAsynchronousLoadPriority(-1);
            return doc;
        }

View Full Code Here

Examples of mf.org.w3c.dom.html.HTMLDocument

     * @return New HTML document
     */
    public final HTMLDocument createHTMLDocument( String title )
        throws DOMException
    {
  HTMLDocument doc;


  if ( title == null )
      throw new NullPointerException( "HTM014 Argument 'title' is null." );
  doc = new HTMLDocumentImpl();
  doc.setTitle( title );
  return doc;
    }

View Full Code Here

Examples of net.matuschek.html.HtmlDocument

      // extract links
      try {
        if (doc.isHTML() && (depth > 0)) {
          // solving encoding problem
          // HtmlDocument htmlDoc = new HtmlDocument(u, doc.getContent());
          HtmlDocument htmlDoc = null;
          HttpHeader contentTypeHeader = doc.getHeader("Content-type");
          if (contentTypeHeader != null) {
            String contentType = contentTypeHeader.getValue();
            int index = contentType.toLowerCase().indexOf("charset=");
            if (index > 0) {
              htmlDoc = new HtmlDocument(u, doc.getContent(), contentType.substring(index+8));
            } else {
              htmlDoc = new HtmlDocument(u, doc.getContent());
            }
          } else {
            htmlDoc = new HtmlDocument(u, doc.getContent());
          }
  
          // add links
          
          // this depth-check is critical!
          // otherwise far too many RobotTasks will be created
          // this will cause a premature OutOfMemoryException!
          if (depth > 0) {
            if (duplicate != null) {
              HttpDoc linksDoc = docManager.retrieveFromCache(new URL(duplicate));
              doc.setLinks(linksDoc.getLinks());
            } else if (cached) {
            } 
            if (links == null) {
              links = htmlDoc.getLinks();
              doc.setLinks(links);
            }
            if (duplicate == null) {
              HashSet checkedLinks = new HashSet();
              for (int i = 0; i < links.size(); i++) {
                URL link = (URL) links.elementAt(i);
                log.info("Link: "+link);
                // check already here for duplicate links to avoid expensive
                // creation of RobotTasks
                if (!checkedLinks.contains(link)) {
                  checkedLinks.add(link);
                  String myReferer = u.toString();
                  if (u.getUserInfo() != null) {
                    // remove userinfo from referer
                    int endindex = myReferer.indexOf("@")+1;
                    myReferer = "http://"+ myReferer.substring(endindex);
                  }
                  
                  RobotTask newTask = createRobotTask((URL) links.elementAt(i), depth - 1, myReferer);
                  // check already here for visited tasks to save memory
                  if (!visited.contains(newTask)) {
                    // bad workaround to retrieve images first
                    if (newTask.urlString.endsWith(".jpg")) {
                      addTaskAtStart(newTask);
                    } else {
                      addTask(newTask);
                    }
                  }
                }
              }
            }
          }
          
          if (hasFormHandlers) {
            // add forms
            Vector forms = htmlDoc.getElements("form");
            for (int i = 0; i < forms.size(); i++) {
              ExtendedURL eurl = formFiller.fillForm(u, (Element) forms.elementAt(i));
              if (eurl != null) {
                RobotTask newTask = createRobotTask(eurl.getURL(), depth - 1, u.toString());
                newTask.setParamString(eurl.getParams());

View Full Code Here

Examples of net.matuschek.html.HtmlDocument

    BasicConfigurator.configure();
    
    HttpTool tool = new HttpTool();
    HttpDoc doc = tool.retrieveDocument(new URL("http://usul27:a1rrakis@www.atkpremium.com/members/styles/standard/pages/index.php?thispage=modelupdate&thisupdate=083735&thismodel=len004"),
          HttpConstants.GET,null);
    HtmlDocument html=new HtmlDocument(new URL("http://localhost"), doc.getContent());
    for (URL u: html.getLinks()) {
      System.out.println(u);
    }
    
    //    System.out.println(doc);
  }

View Full Code Here

Examples of net.wastl.webmail.ui.html.HTMLDocument

            } catch (final Exception ex) {
                throw new ServletException("Remote host must identify!");
            }
        }


        HTMLDocument content = null;
        final int err_code = 400;
        HTTPSession sess = null;


        /*
         * Here we try to parse the MIME content that the Client sent in his
         * POST since the JServ doesn't do that for us:-( At least we can use
         * the functionality provided by the standalone server where we need to
         * parse the content ourself anyway.
         */
        try {
            final BufferedOutputStream out =
                    new BufferedOutputStream(res.getOutputStream());


            /*
             * First we try to use the Servlet API's methods to parse the
             * parameters. Unfortunately, it doesn't know how to handle MIME
             * multipart POSTs, so we will have to handle that ourselves
             */


            /*
             * First get all the parameters and set their values into
             * http_header
             */
            Enumeration enum2 = req.getParameterNames();
            while (enum2.hasMoreElements()) {
                final String s = (String) enum2.nextElement();
                http_header.setContent(s, req.getParameter(s));
                // log.info("Parameter "+s);
            }


            /* Then we set all the headers in http_header */
            enum2 = req.getHeaderNames();
            while (enum2.hasMoreElements()) {
                final String s = (String) enum2.nextElement();
                http_header.setHeader(s, req.getHeader(s));
            }


            /*
             * In Servlet API 2.2 we might want to fetch the attributes also,
             * but this doesn't work in API 2.0, so we leave it commented out
             */
            // enum2=req.getAttributeNames();
            // while(enum2.hasMoreElements()) {
            // String s=(String)enum2.nextElement();
            // log.info("Attribute "+s);
            // }


            /* Now let's try to handle multipart/form-data posts */


            if (req.getContentType() != null
                    && req.getContentType().toUpperCase().
                    startsWith("MULTIPART/FORM-DATA")) {
                final int size = Integer.parseInt(WebMailServer.
                        getStorage().getConfig("max attach size"));
                final MultipartParser mparser = new MultipartParser(req, size);
                Part p;
                while ((p = mparser.readNextPart()) != null) {
                    if (p.isFile()) {
                        final ByteStore bs = ByteStore.getBinaryFromIS(
                                ((FilePart) p).getInputStream(), size);
                        bs.setName(((FilePart) p).getFileName());
                        bs.setContentType(getStorage().getMimeType(
                                    ((FilePart) p).getFileName()));
                        http_header.setContent(p.getName(), bs);
                        log.info("File name " + bs.getName());
                        log.info("Type      " + bs.getContentType());


                    } else if (p.isParam()) {
                        http_header.setContent(p.getName(),
                                ((ParamPart) p).getStringValue());
                    }


                    // log.info("Parameter "+p.getName());
                }
            }


            try {
                final String url = http_header.getPath();


                try {
                    /* Find out about the session id */
                    sess = req.getSession(false) == null
                            ? null
                            : (HTTPSession) req.getSession(false).
                            getAttribute("webmail.session");


                    /*
                     * If the user was logging on, he doesn't have a session id,
                     * so generate one. If he already had one, all the better,
                     * we will take the old one
                     */
                    if (sess == null && url.startsWith("/login")) {
                        sess = newSession(req, http_header);
                    } else if (sess == null && url.startsWith("/admin/login")) {
                        http_header.setHeader("LOGIN", "Administrator");
                        sess = newAdminSession(req, http_header);
                    }
                    if (sess == null && !url.equals("/")
                            && !url.startsWith("/passthrough")
                            && !url.startsWith("/admin")) {
                        content = getURLHandler().handleURL(
                                "/logout", sess, http_header);
                    } else {
                        /* Ensure that the session state is up-to-date */
                        if (sess != null) {
                            sess.setEnv();
                        }


                        /* Let the URLHandler determine the result of the query */
                        content = getURLHandler().
                                handleURL(url, sess, http_header);
                    }
                } catch (final InvalidPasswordException e) {
                    log.error("Connection to " + addr.toString()
                            + ": Authentication failed!");
                    if (url.startsWith("/admin/login")) {
                        content = getURLHandler().
                                handleURL("/admin", null, http_header);
                    } else if (url.startsWith("/login")) {
                        content = getURLHandler().
                                handleURL("/", null, http_header);
                    } else
                        // content=new
                        // HTMLErrorMessage(getStorage(),e.getMessage());
                        throw new ServletException("Invalid URL called!");
                } catch (final Exception ex) {
                    content = getURLHandler().
                            handleException(ex, sess, http_header);
                    log.debug("Some strange error while handling request", ex);
                }


                /*
                 * Set some HTTP headers: Date is now, the document should
                 * expire in 5 minutes, proxies and clients shouldn't cache it
                 * and all WebMail documents must be revalidated when they think
                 * they don't have to follow the "no-cache".
                 */
                res.setDateHeader("Date:", System.currentTimeMillis());
                res.setDateHeader(
                        "Expires", System.currentTimeMillis() + 300000);
                res.setHeader("Pragma", "no-cache");
                res.setHeader("Cache-Control", "must-revalidate");


                synchronized (out) {
                    res.setStatus(content.getReturnCode());


                    if (content.hasHTTPHeader()) {
                        final Enumeration enumVar = content.getHTTPHeaderKeys();
                        while (enumVar.hasMoreElements()) {
                            final String s = (String) enumVar.nextElement();
                            res.setHeader(s, content.getHTTPHeader(s));
                        }
                    }


                    /*
                     * What we will send is an image or some other sort of
                     * binary
                     */
                    if (content instanceof HTMLImage) {
                        final HTMLImage img = (HTMLImage) content;
                        /*
                         * the HTMLImage class provides us with most of the
                         * necessary information that we want to send
                         */
                        res.setHeader("Content-Type", img.getContentType());
                        res.setHeader("Content-Transfer-Encoding",
                                img.getContentEncoding());
                        res.setHeader("Content-Length", "" + img.size());
                        res.setHeader("Connection", "Keep-Alive");


                        /* Send 8k junks */
                        int offset = 0;
                        while (offset + chunk_size < img.size()) {
                            out.write(img.toBinary(), offset, chunk_size);
                            offset += chunk_size;
                        }
                        out.write(img.toBinary(), offset, img.size() - offset);
                        out.flush();


                        out.close();
                    } else {
                        final byte[] encoded_content =
                                content.toString().getBytes("UTF-8");


                        /*
                         * We are sending HTML text. Set the encoding to UTF-8
                         * for Unicode messages
                         */

View Full Code Here

Examples of nokogiri.HtmlDocument


    @Override
    protected XmlDocument wrapDocument(ThreadContext context,
                                       RubyClass klazz,
                                       Document document) {
        HtmlDocument htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
        htmlDocument.setDocumentNode(context, document);
        if (ruby_encoding.isNil()) {
            // ruby_encoding might have detected by HtmlDocument::EncodingReader
            if (detected_encoding != null && !detected_encoding.isNil()) {
                ruby_encoding = detected_encoding;
            } else {
                // no encoding given & no encoding detected, then try to get it
                String charset = tryGetCharsetFromHtml5MetaTag(document);
                ruby_encoding = stringOrNil(context.getRuntime(), charset);
            }
        }
        htmlDocument.setEncoding(ruby_encoding);
        htmlDocument.setParsedEncoding(java_encoding);
        return htmlDocument;
    }

View Full Code Here

Examples of org.apache.any23.extractor.html.HTMLDocument

    private String extractDocumentLanguage(ExtractionParameters extractionParameters)
    throws IOException, ValidatorException {
        if( ! isHTMLDocument() ) {
            return null;
        }
        final HTMLDocument document;
        try {
            document = new HTMLDocument( getTagSoupDOM(extractionParameters).getDocument() );
        } catch (IOException ioe) {
            log.debug("Cannot extract language from document.", ioe);
            return null;
        }
        return document.getDefaultLanguage();
    }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.