Package org.htmlparser

Examples of org.htmlparser.Parser


        try {
          // �����ʼ�
          String mail_content = MessageFormat.format(template,
              new String[]{loginUser.getNickname(),img.toString(), url.toString(),curTime});
          //System.out.println(mail_content);
          Parser html = new Parser();
          html.setEncoding(Globals.ENC_8859_1);
          html.setInputHTML(mail_content);
          Node[] nodes = html.extractAllNodesThatMatch(
              HtmlNodeFilters.titleFilter).toNodeArray();
          String title = nodes[0].toPlainTextString();
          MailSender sender = MailSender.getHtmlMailSender(null, 25,
              null, null);
          sender.setSubject(title);
View Full Code Here


          // �����ʼ�����
          String notify_content = MessageFormat.format(template,
              new String[]{rbean.getDiary().getOwner().getNickname(),
              rbean.getDiary().getTitle(), rbean.getAuthor(),
              url.toString(), curTime, rbean.getContent()});
          Parser html = new Parser();
          html.setEncoding(Globals.ENC_8859_1);
          html.setInputHTML(notify_content);
          Node[] nodes = html.extractAllNodesThatMatch(
              HtmlNodeFilters.titleFilter).toNodeArray();
          String title = nodes[0].toPlainTextString();
          MailSender sender = MailSender.getHtmlMailSender(null, 25,
              null, null);
          sender.setSubject(title);
View Full Code Here

     * @throws ParserException
     */
  public String plain_text(String html) throws UnsupportedEncodingException, ParserException{
    if(html==null) return null;
    StringBuffer text = new StringBuffer();
    Parser parser = new Parser();
    parser.setInputHTML(html);
    parser.setEncoding(Globals.ENC_8859_1);
    //�������еĽڵ�
    NodeList nodes;
    try {
      nodes = parser.extractAllNodesThatMatch(nfilter);
    } catch (ParserException e) {
      return html;
    }
    for(int i=0;i<nodes.size();i++){
      TextNode node = (TextNode)nodes.elementAt(i);
View Full Code Here

   */
  protected String autoFormatImage(String desc){
    StringBuffer content = new StringBuffer();
    int last_pos = 0;
    try{
      Parser parser = new Parser();
      parser.setInputHTML(desc);
      parser.setEncoding(Globals.ENC_8859_1);
      Node[] images = parser.extractAllNodesThatMatch(HtmlNodeFilters.imageFilter).toNodeArray();
      for(int i=0;images!=null&&i<images.length;i++){
        int start_pos = images[i].getStartPosition();
        content.append(desc.substring(last_pos, start_pos));
        last_pos = images[i].getEndPosition();
        ImageTag img = (ImageTag)images[i];
View Full Code Here

   * @return
   */
  public static String preview(String html, int max_count){
    if(html.length()<= max_count * 1.1)
      return html;
    Parser parser = new Parser();
    StringBuffer prvContent = new StringBuffer();
    try {
      parser.setEncoding(Globals.ENC_8859_1);
      parser.setInputHTML(html);
      NodeList nodes = parser.extractAllNodesThatMatch(nfilter);
      Node node = null;
      for(int i=0;i<nodes.size();i++){
        if(prvContent.length() >= max_count){
          if(node instanceof TagNode){
            TagNode tmp_node = (TagNode)node;
View Full Code Here

    * @param inputHtml
    * @return
    */
  public static String extractText(String inputHtml) throws Exception {
    StringBuffer text = new StringBuffer();
    Parser parser = new Parser();
    parser.setInputHTML(new String(inputHtml.getBytes(),ISO8859_1));
    //Parser parser = Parser.createParser(new String(inputHtml.getBytes(),ISO8859_1));
    //�������еĽڵ�
    NodeList nodes = parser.extractAllNodesThatMatch(nodeFilter);
    for(int i=0;i<nodes.size();i++){
      Node node = nodes.elementAt(i);
      text.append(new String(node.toPlainTextString().getBytes(ISO8859_1)));
    }
    return text.toString();
View Full Code Here

            }

            //����Ҫ���͵�html�ļ�����Ҫ�����html�ļ��е�ͼƬ
            private String getContent(String mailContent) {
                try {
                  Parser parser = new Parser();
                  parser.setInputHTML(new String(mailContent.getBytes(), ISO8859_1));
                    //Parser parser = Parser.createParser(new String(mailContent.getBytes(), ISO8859_1));
                    Node[] images = parser.extractAllNodesThatMatch(HtmlNodeFilters.imageFilter).toNodeArray();
                    for(int i=0;i<images.length;i++) {
                        ImageTag imgTag = (ImageTag) images[i];
                        if(!imgTag.getImageURL().toLowerCase().startsWith("http://"))
                            arrayList1.add(imgTag.getImageURL());
                    }
View Full Code Here

  protected static String filterScriptAndStyle(String html){
    if(StringUtils.isEmpty(html))
      return html;
    try{
      StringBuffer text = new StringBuffer();
      Parser parser = new Parser();
      parser.setEncoding("8859_1");
      parser.setInputHTML(html);
      //�������еĽڵ�
      NodeList nodes = parser.extractAllNodesThatMatch(scriptFilter);     
      for(int i=0;i<nodes.size();i++){
        Node node = nodes.elementAt(i);
        if(node instanceof TextNode)
          text.append(node.getText());
        else{
View Full Code Here

      public void run() {
        try {
          // �����ʼ�����
          String notify_content = MessageFormat.format(template,
              new String[]{ubean.getName(),ubean.getPassword(),urlPrefix});
          Parser html = new Parser();
          html.setEncoding(Globals.ENC_8859_1);
          html.setInputHTML(notify_content);
          Node[] nodes = html.extractAllNodesThatMatch(
              HtmlNodeFilters.titleFilter).toNodeArray();
          String title = nodes[0].toPlainTextString();
          MailSender sender = MailSender.getHtmlMailSender(null, 25,
              null, null);
          sender.setSubject(title);
View Full Code Here

     * @see jsynoptic.ui.HTMLParser#extractText(java.lang.String)
     */
    public String extractText(String htmlPath) {
        String result = "";
        try {
            Parser parser = new Parser (htmlPath);
            TextExtractingVisitor visitor = new TextExtractingVisitor ();
            parser.visitAllNodesWith (visitor);
            result = visitor.getExtractedText();

        } catch (ParserException e1) {
        }
        return result;
View Full Code Here

TOP

Related Classes of org.htmlparser.Parser

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.