@Override
public Lemma getLemma(String lemma) throws Exception {
URL url=new URL(this.path+"wiki/"+lemma.replace(" ", "_")+this.disambiguationWord);
Lemma l=null;
int iidx=this.badSearches.indexOf(lemma);
if(iidx<0)
{
Document xml=this.loadURL(url);
String text="";
if(xml!=null)
text=xml.getRootElement().getValue();
if(xml!=null&&!text.contains(this.missingMSG)&&!text.contains(this.wikiErrorMSG))
{
ArrayList<URL> urls=new ArrayList<URL>();
ArrayList<Integer> levels=new ArrayList<Integer>();
ArrayList<Integer> hindex=new ArrayList<Integer>();
ArrayList<String> heads=new ArrayList<String>();
for(Element div:xml.getDescendants(new ElementFilter("div")))
{
if(div.getAttributeValue("id")!=null&&div.getAttributeValue("id").equals("content"))
{
ElementFilter f=new ElementFilter("a");
Filter<? extends Content> or=f.or(new ElementFilter("span"));
for(Content c:div.getDescendants(or))
{
if(c.getCType()==CType.Element)
{
Element word=(Element)c;
if(word.getName().equals("a"))
{
String href=word.getAttributeValue("href");
if(href!=null&&href.startsWith("/wiki"))
{
urls.add(new URL(this.path+href));
hindex.add(new Integer(heads.size()-1));
}
}
else
{
if(word.getAttributeValue("class")!=null && word.getAttributeValue("class").equals("mw-headline"))
{
heads.add(word.getAttributeValue("id"));
Element e=word.getParentElement();
levels.add(new Integer(Integer.parseInt(e.getName().substring(1))-2));
}
}
}
}
break;
}
}
ArrayList<Sense> senses=new ArrayList<Sense>(urls.size());
for(int i=0;i<urls.size();i++)
{
String urlx=urls.get(i).getFile().replace("/wiki/", "");
if(!this.isNotAnArticle(urlx))//it is a sense
{
senses.add(this.getSense(urlx.replace("/", "")));
}
}
ArrayList<Count> counts=new ArrayList<Count>();
counts.add(this.getWikiCounts(lemma)); ;
l=new Lemma(lemma,"",senses,counts,this.name);
}
else
{
url=new URL(this.path+"wiki/"+lemma.replace(" ", "_"));
xml=this.loadURL(url);
if(xml!=null)
text=xml.getRootElement().getValue();
if(xml!=null&&!text.contains(this.missingMSG)&&!text.contains(this.wikiErrorMSG))
{
if(text.contains(this.disambiguationMSG)&&!this.jump)
{
this.jump=true;
ArrayList<Sense> senses=new ArrayList<Sense>();
Element body=this.getContentNode(xml);
for(Element e:body.getChildren())
{
if(e.getAttributeValue("class")!=null&&e.getAttributeValue("class").equals("dablink"))
{
for(Content c:e.getContent())
{
if(c.getCType().equals(CType.Element))
{
Element a=((Element)c);
if(a.getName().equals("a"))
{
String sid=a.getAttributeValue("href");
sid=sid.substring(sid.indexOf("wiki/")+5);
sid=sid.replace(this.disambiguationWord, "");
Lemma ll=this.getLemma(sid);
if(ll!=null)
{
for(Sense s:ll.getSenses())
{
if(!senses.contains(s))
senses.add(s);
}
}
}
}
}
}
}
this.jump=false;
if(senses.size()>0)
{
ArrayList<Count> counts=new ArrayList<Count>();
counts.add(this.getWikiCounts(lemma));
l=new Lemma(lemma,"",senses,counts,this.name);
}
}
else
{
ArrayList<Sense> senses=new ArrayList<Sense>(1);
String urlx=url.getFile().replace("/wiki/", "");
if(!this.isNotAnArticle(urlx))//it is a sense
{
senses.add(this.getSense(urlx));
ArrayList<Count> counts=new ArrayList<Count>();
counts.add(this.getWikiCounts(lemma));
l=new Lemma(lemma,"",senses,counts,this.name);
}
}
}
}
}