Package htmldao

Source Code of htmldao.HTML

package htmldao;

import items.*;

import java.net.URL;
import java.util.*;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;

public class HTML
{
  private String url;
  private ArrayList<TagElement> lElements;
 
  public HTML( String url , ArrayList<HTMLtag> aHT ) throws Exception
  {
    this.url = url;
    lElements = new ArrayList<TagElement>();
    for( int i=0 ; i<aHT.size() ; i++ )
      lElements.add(new TagElement(aHT.get(i).getTag(),aHT.get(i).getClas()));
    getHTMLdata();
  }
 
  private void getHTMLdata() throws Exception
  {
    if( lElements.size()>0 )
    {
      for( int i=0 ; i<lElements.size() ; i++ )
      {
        try
        {
          HtmlCleaner cleaner = new HtmlCleaner();
          TagNode rootNode = cleaner.clean(new URL(url));
          TagNode tagElements[] = rootNode.getElementsByName( lElements.get(i).getTag(), true );
          for( int j=0 ; j<tagElements.length ; j++ )
            if( lElements.get(i).getClas().equalsIgnoreCase("*") || tagElements[j].getAttributeByName("class").equals(lElements.get(i).getClas()) )
              lElements.get(i).add( tagElements[j].getText().toString() );
        }
        catch( Exception e )
        {
          System.out.println("Problemas analizando el html: " + url);
          e.printStackTrace();
        }
      }
    }
    else
      throw new Exception("No se han definido etiquetas HTML!!");
  }
 
  public ArrayList<TagElement> getLElements()
  {
    return lElements;
  }
 
  public int getElementSize()
  {
    return lElements.size();
  }
 
  public String getURL()
  {
    return url;
  }
}
TOP

Related Classes of htmldao.HTML

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.