package htmldao;
import items.*;
import java.net.URL;
import java.util.*;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
public class HTML
{
private String url;
private ArrayList<TagElement> lElements;
public HTML( String url , ArrayList<HTMLtag> aHT ) throws Exception
{
this.url = url;
lElements = new ArrayList<TagElement>();
for( int i=0 ; i<aHT.size() ; i++ )
lElements.add(new TagElement(aHT.get(i).getTag(),aHT.get(i).getClas()));
getHTMLdata();
}
private void getHTMLdata() throws Exception
{
if( lElements.size()>0 )
{
for( int i=0 ; i<lElements.size() ; i++ )
{
try
{
HtmlCleaner cleaner = new HtmlCleaner();
TagNode rootNode = cleaner.clean(new URL(url));
TagNode tagElements[] = rootNode.getElementsByName( lElements.get(i).getTag(), true );
for( int j=0 ; j<tagElements.length ; j++ )
if( lElements.get(i).getClas().equalsIgnoreCase("*") || tagElements[j].getAttributeByName("class").equals(lElements.get(i).getClas()) )
lElements.get(i).add( tagElements[j].getText().toString() );
}
catch( Exception e )
{
System.out.println("Problemas analizando el html: " + url);
e.printStackTrace();
}
}
}
else
throw new Exception("No se han definido etiquetas HTML!!");
}
public ArrayList<TagElement> getLElements()
{
return lElements;
}
public int getElementSize()
{
return lElements.size();
}
public String getURL()
{
return url;
}
}