Examples of HTMLDocument


Examples of javax.swing.text.html.HTMLDocument

  }

  @Override
  public String getText() {
    String text = super.getText();
    HTMLDocument document = (HTMLDocument) this.getDocument();
    // #0 is the HTML element, #1 the bidi-root
    Element[] roots = document.getRootElements();
    Element body = findElement(roots[0], HTML.Tag.BODY);
    Element p = findElement(body, HTML.Tag.P);

    Document realText = p.getDocument();
    try {
View Full Code Here

Examples of javax.swing.text.html.HTMLDocument

    public void hyperlinkUpdate(HyperlinkEvent e) {
      if (e.getEventType() == HyperlinkEvent.EventType.ACTIVATED) {
        JEditorPane pane = (JEditorPane) e.getSource();
        if (e instanceof HTMLFrameHyperlinkEvent) {
          HTMLFrameHyperlinkEvent evt = (HTMLFrameHyperlinkEvent) e;
          HTMLDocument doc = (HTMLDocument) pane.getDocument();
          doc.processHTMLFrameHyperlinkEvent(evt);
        } else {
          try {
            pane.setPage(e.getURL());
          } catch (Throwable t) {
            t.printStackTrace();
View Full Code Here

Examples of javax.swing.text.html.HTMLDocument

    public void hyperlinkUpdate(HyperlinkEvent e) {
      if (e.getEventType() == HyperlinkEvent.EventType.ACTIVATED) {
        JEditorPane pane = (JEditorPane) e.getSource();
        if (e instanceof HTMLFrameHyperlinkEvent) {
          HTMLFrameHyperlinkEvent evt = (HTMLFrameHyperlinkEvent) e;
          HTMLDocument doc = (HTMLDocument) pane.getDocument();
          doc.processHTMLFrameHyperlinkEvent(evt);
        } else {
          try {
            pane.setPage(e.getURL());
          } catch (Throwable t) {
            t.printStackTrace();
View Full Code Here

Examples of javax.swing.text.html.HTMLDocument

    private static final long serialVersionUID = 6031640599826858733L;

    // Override createDefaultDocument to force synchronous loading
        @Override
        public Document createDefaultDocument() {
            HTMLDocument doc = (HTMLDocument) super.createDefaultDocument();
            doc.setTokenThreshold(Integer.MAX_VALUE);
            doc.setAsynchronousLoadPriority(-1);
            return doc;
        }
View Full Code Here

Examples of mf.org.w3c.dom.html.HTMLDocument

     * @return New HTML document
     */
    public final HTMLDocument createHTMLDocument( String title )
        throws DOMException
    {
  HTMLDocument doc;

  if ( title == null )
      throw new NullPointerException( "HTM014 Argument 'title' is null." );
  doc = new HTMLDocumentImpl();
  doc.setTitle( title );
  return doc;
    }
View Full Code Here

Examples of net.matuschek.html.HtmlDocument

      // extract links
      try {
        if (doc.isHTML() && (depth > 0)) {
          // solving encoding problem
          // HtmlDocument htmlDoc = new HtmlDocument(u, doc.getContent());
          HtmlDocument htmlDoc = null;
          HttpHeader contentTypeHeader = doc.getHeader("Content-type");
          if (contentTypeHeader != null) {
            String contentType = contentTypeHeader.getValue();
            int index = contentType.toLowerCase().indexOf("charset=");
            if (index > 0) {
              htmlDoc = new HtmlDocument(u, doc.getContent(), contentType.substring(index+8));
            } else {
              htmlDoc = new HtmlDocument(u, doc.getContent());
            }
          } else {
            htmlDoc = new HtmlDocument(u, doc.getContent());
          }
 
          // add links
         
          // this depth-check is critical!
          // otherwise far too many RobotTasks will be created
          // this will cause a premature OutOfMemoryException!
          if (depth > 0) {
            if (duplicate != null) {
              HttpDoc linksDoc = docManager.retrieveFromCache(new URL(duplicate));
              doc.setLinks(linksDoc.getLinks());
            } else if (cached) {
            }
            if (links == null) {
              links = htmlDoc.getLinks();
              doc.setLinks(links);
            }
            if (duplicate == null) {
              HashSet checkedLinks = new HashSet();
              for (int i = 0; i < links.size(); i++) {
                URL link = (URL) links.elementAt(i);
                log.info("Link: "+link);
                // check already here for duplicate links to avoid expensive
                // creation of RobotTasks
                if (!checkedLinks.contains(link)) {
                  checkedLinks.add(link);
                  String myReferer = u.toString();
                  if (u.getUserInfo() != null) {
                    // remove userinfo from referer
                    int endindex = myReferer.indexOf("@")+1;
                    myReferer = "http://"+ myReferer.substring(endindex);
                  }
                 
                  RobotTask newTask = createRobotTask((URL) links.elementAt(i), depth - 1, myReferer);
                  // check already here for visited tasks to save memory
                  if (!visited.contains(newTask)) {
                    // bad workaround to retrieve images first
                    if (newTask.urlString.endsWith(".jpg")) {
                      addTaskAtStart(newTask);
                    } else {
                      addTask(newTask);
                    }
                  }
                }
              }
            }
          }
         
          if (hasFormHandlers) {
            // add forms
            Vector forms = htmlDoc.getElements("form");
            for (int i = 0; i < forms.size(); i++) {
              ExtendedURL eurl = formFiller.fillForm(u, (Element) forms.elementAt(i));
              if (eurl != null) {
                RobotTask newTask = createRobotTask(eurl.getURL(), depth - 1, u.toString());
                newTask.setParamString(eurl.getParams());
View Full Code Here

Examples of net.matuschek.html.HtmlDocument

    BasicConfigurator.configure();
   
    HttpTool tool = new HttpTool();
    HttpDoc doc = tool.retrieveDocument(new URL("http://usul27:a1rrakis@www.atkpremium.com/members/styles/standard/pages/index.php?thispage=modelupdate&thisupdate=083735&thismodel=len004"),
          HttpConstants.GET,null);
    HtmlDocument html=new HtmlDocument(new URL("http://localhost"), doc.getContent());
    for (URL u: html.getLinks()) {
      System.out.println(u);
    }
   
    //    System.out.println(doc);
  }
View Full Code Here

Examples of net.wastl.webmail.ui.html.HTMLDocument

            } catch (final Exception ex) {
                throw new ServletException("Remote host must identify!");
            }
        }

        HTMLDocument content = null;
        final int err_code = 400;
        HTTPSession sess = null;

        /*
         * Here we try to parse the MIME content that the Client sent in his
         * POST since the JServ doesn't do that for us:-( At least we can use
         * the functionality provided by the standalone server where we need to
         * parse the content ourself anyway.
         */
        try {
            final BufferedOutputStream out =
                    new BufferedOutputStream(res.getOutputStream());

            /*
             * First we try to use the Servlet API's methods to parse the
             * parameters. Unfortunately, it doesn't know how to handle MIME
             * multipart POSTs, so we will have to handle that ourselves
             */

            /*
             * First get all the parameters and set their values into
             * http_header
             */
            Enumeration enum2 = req.getParameterNames();
            while (enum2.hasMoreElements()) {
                final String s = (String) enum2.nextElement();
                http_header.setContent(s, req.getParameter(s));
                // log.info("Parameter "+s);
            }

            /* Then we set all the headers in http_header */
            enum2 = req.getHeaderNames();
            while (enum2.hasMoreElements()) {
                final String s = (String) enum2.nextElement();
                http_header.setHeader(s, req.getHeader(s));
            }

            /*
             * In Servlet API 2.2 we might want to fetch the attributes also,
             * but this doesn't work in API 2.0, so we leave it commented out
             */
            // enum2=req.getAttributeNames();
            // while(enum2.hasMoreElements()) {
            // String s=(String)enum2.nextElement();
            // log.info("Attribute "+s);
            // }

            /* Now let's try to handle multipart/form-data posts */

            if (req.getContentType() != null
                    && req.getContentType().toUpperCase().
                    startsWith("MULTIPART/FORM-DATA")) {
                final int size = Integer.parseInt(WebMailServer.
                        getStorage().getConfig("max attach size"));
                final MultipartParser mparser = new MultipartParser(req, size);
                Part p;
                while ((p = mparser.readNextPart()) != null) {
                    if (p.isFile()) {
                        final ByteStore bs = ByteStore.getBinaryFromIS(
                                ((FilePart) p).getInputStream(), size);
                        bs.setName(((FilePart) p).getFileName());
                        bs.setContentType(getStorage().getMimeType(
                                    ((FilePart) p).getFileName()));
                        http_header.setContent(p.getName(), bs);
                        log.info("File name " + bs.getName());
                        log.info("Type      " + bs.getContentType());

                    } else if (p.isParam()) {
                        http_header.setContent(p.getName(),
                                ((ParamPart) p).getStringValue());
                    }

                    // log.info("Parameter "+p.getName());
                }
            }

            try {
                final String url = http_header.getPath();

                try {
                    /* Find out about the session id */
                    sess = req.getSession(false) == null
                            ? null
                            : (HTTPSession) req.getSession(false).
                            getAttribute("webmail.session");

                    /*
                     * If the user was logging on, he doesn't have a session id,
                     * so generate one. If he already had one, all the better,
                     * we will take the old one
                     */
                    if (sess == null && url.startsWith("/login")) {
                        sess = newSession(req, http_header);
                    } else if (sess == null && url.startsWith("/admin/login")) {
                        http_header.setHeader("LOGIN", "Administrator");
                        sess = newAdminSession(req, http_header);
                    }
                    if (sess == null && !url.equals("/")
                            && !url.startsWith("/passthrough")
                            && !url.startsWith("/admin")) {
                        content = getURLHandler().handleURL(
                                "/logout", sess, http_header);
                    } else {
                        /* Ensure that the session state is up-to-date */
                        if (sess != null) {
                            sess.setEnv();
                        }

                        /* Let the URLHandler determine the result of the query */
                        content = getURLHandler().
                                handleURL(url, sess, http_header);
                    }
                } catch (final InvalidPasswordException e) {
                    log.error("Connection to " + addr.toString()
                            + ": Authentication failed!");
                    if (url.startsWith("/admin/login")) {
                        content = getURLHandler().
                                handleURL("/admin", null, http_header);
                    } else if (url.startsWith("/login")) {
                        content = getURLHandler().
                                handleURL("/", null, http_header);
                    } else
                        // content=new
                        // HTMLErrorMessage(getStorage(),e.getMessage());
                        throw new ServletException("Invalid URL called!");
                } catch (final Exception ex) {
                    content = getURLHandler().
                            handleException(ex, sess, http_header);
                    log.debug("Some strange error while handling request", ex);
                }

                /*
                 * Set some HTTP headers: Date is now, the document should
                 * expire in 5 minutes, proxies and clients shouldn't cache it
                 * and all WebMail documents must be revalidated when they think
                 * they don't have to follow the "no-cache".
                 */
                res.setDateHeader("Date:", System.currentTimeMillis());
                res.setDateHeader(
                        "Expires", System.currentTimeMillis() + 300000);
                res.setHeader("Pragma", "no-cache");
                res.setHeader("Cache-Control", "must-revalidate");

                synchronized (out) {
                    res.setStatus(content.getReturnCode());

                    if (content.hasHTTPHeader()) {
                        final Enumeration enumVar = content.getHTTPHeaderKeys();
                        while (enumVar.hasMoreElements()) {
                            final String s = (String) enumVar.nextElement();
                            res.setHeader(s, content.getHTTPHeader(s));
                        }
                    }

                    /*
                     * What we will send is an image or some other sort of
                     * binary
                     */
                    if (content instanceof HTMLImage) {
                        final HTMLImage img = (HTMLImage) content;
                        /*
                         * the HTMLImage class provides us with most of the
                         * necessary information that we want to send
                         */
                        res.setHeader("Content-Type", img.getContentType());
                        res.setHeader("Content-Transfer-Encoding",
                                img.getContentEncoding());
                        res.setHeader("Content-Length", "" + img.size());
                        res.setHeader("Connection", "Keep-Alive");

                        /* Send 8k junks */
                        int offset = 0;
                        while (offset + chunk_size < img.size()) {
                            out.write(img.toBinary(), offset, chunk_size);
                            offset += chunk_size;
                        }
                        out.write(img.toBinary(), offset, img.size() - offset);
                        out.flush();

                        out.close();
                    } else {
                        final byte[] encoded_content =
                                content.toString().getBytes("UTF-8");

                        /*
                         * We are sending HTML text. Set the encoding to UTF-8
                         * for Unicode messages
                         */
 
View Full Code Here

Examples of nokogiri.HtmlDocument

    @Override
    protected XmlDocument wrapDocument(ThreadContext context,
                                       RubyClass klazz,
                                       Document document) {
        HtmlDocument htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz);
        htmlDocument.setDocumentNode(context, document);
        if (ruby_encoding.isNil()) {
            // ruby_encoding might have detected by HtmlDocument::EncodingReader
            if (detected_encoding != null && !detected_encoding.isNil()) {
                ruby_encoding = detected_encoding;
            } else {
                // no encoding given & no encoding detected, then try to get it
                String charset = tryGetCharsetFromHtml5MetaTag(document);
                ruby_encoding = stringOrNil(context.getRuntime(), charset);
            }
        }
        htmlDocument.setEncoding(ruby_encoding);
        htmlDocument.setParsedEncoding(java_encoding);
        return htmlDocument;
    }
View Full Code Here

Examples of org.apache.any23.extractor.html.HTMLDocument

    private String extractDocumentLanguage(ExtractionParameters extractionParameters)
    throws IOException, ValidatorException {
        if( ! isHTMLDocument() ) {
            return null;
        }
        final HTMLDocument document;
        try {
            document = new HTMLDocument( getTagSoupDOM(extractionParameters).getDocument() );
        } catch (IOException ioe) {
            log.debug("Cannot extract language from document.", ioe);
            return null;
        }
        return document.getDefaultLanguage();
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.