props.setOmitComments(true);
HtmlCleaner cleaner = new HtmlCleaner(props);
int i=0;
TagNode node = cleaner.clean(new URL(lien));
for (Object o : node.evaluateXPath("//div[@class='encadre_fiche firstencadre']/div/div/a"))
{
lien_site_spe = ((TagNode)(o)).getAttributeByName("href");
//System.out.println("lien spe "+lien_site_spe);
}
for (Object o : node.evaluateXPath("//div[@id='infos_generales']/table/tbody/tr"))
{
for(Object temp: ((TagNode)(o)).getAllChildren())
{
if(temp.toString().contains("th"))
{
for(Object temp2: ((TagNode)(temp)).getAllChildren())
{
info_principal[i][0] = org.apache.commons.lang3.StringEscapeUtils.unescapeHtml4(temp2.toString());
}
}
if(temp.toString().contains("td"))
{
for(Object temp2: ((TagNode)(temp)).getAllChildren())
{
info_principal[i][1] = org.apache.commons.lang3.StringEscapeUtils.unescapeHtml4(temp2.toString());
i++;
}
}
}
}
i=0;
for (Object o : node.evaluateXPath("//div[@class='element_deco'][2]"))
{
//System.out.println("description "+((TagNode)(o)).getAllChildren());
for(Object temp: ((TagNode)(o)).getAllChildren())
{
if(temp.toString().contains("strong"))
{
for(Object temp2: ((TagNode)(temp)).getAllChildren())
{
description[i][0] =org.apache.commons.lang3.StringEscapeUtils.unescapeHtml4(temp2.toString());
}
}
else if(temp.toString().equals("div"))
{
for(Object temp2: ((TagNode)(temp)).getAllChildren())
{
if(temp2.toString().equals("font"))
{
for(Object temp3: ((TagNode)(temp2)).getAllChildren())
{
if(temp3.toString().equals("strong"))
{
for(Object temp4: ((TagNode)(temp3)).getAllChildren())
{
//System.out.println("description strong "+temp4.toString());
description[i][0] =org.apache.commons.lang3.StringEscapeUtils.unescapeHtml4(temp2.toString());
i++;
}
}
else if(temp3.toString().length()>=5)
{
//System.out.println("description font "+temp3.toString());
description[i][0] =org.apache.commons.lang3.StringEscapeUtils.unescapeHtml4(temp2.toString());
i++;
}
}
}
else if(temp2.toString().length()>=5)
{
//System.out.println("description div "+temp2.toString());
description[i][0] =org.apache.commons.lang3.StringEscapeUtils.unescapeHtml4(temp2.toString());
i++;
}
}
}
else if(temp.toString().equals("p"))
{
for(Object temp2: ((TagNode)(temp)).getAllChildren())
{
if(temp2.toString().equals("strong"))
{
for(Object temp3: ((TagNode)(temp2)).getAllChildren())
{
//System.out.println("description 3 "+temp3.toString());
break;
}
}
else if(temp2.toString().length()>=5)
{
//System.out.println("description i "+i+" "+temp2.toString());
description[i][0] =org.apache.commons.lang3.StringEscapeUtils.unescapeHtml4(temp2.toString());
i++;
break;
}
}
}
else if(temp.toString().length()>=5)
{
description[i][1] = org.apache.commons.lang3.StringEscapeUtils.unescapeHtml4(temp.toString());
break;
}
}
}
i=0;
int ok=0;
for (Object o : node.evaluateXPath("//div[@class='encadre_fiche']"))
{
for(Object temp: ((TagNode)(o)).getAllChildren())
{
if(ok == 1)
{