Package gannuNLP.data

Examples of gannuNLP.data.Sense


      }
    }
    //calculate means
    for(int classIndex=0;classIndex<target.getSenses().size();classIndex++)
    {
      Sense s=target.getSenses().get(classIndex);
      classCount[classIndex]+=s.getSamples().size();
      for(String word:s.getBagOfWords())
      {       
        u[classIndex][Collections.binarySearch(this.features, word)]+=1.0f;
      }
    }
    for(int c=0;c<classes;c++)
    {
      for(int j=0;j<attributeCount;j++)
      {
        u[c][j]=u[c][j]/((float)classCount[c]);
      }
    }
    //calculate variance values
    for(int classIndex=0;classIndex<target.getSenses().size();classIndex++)
    {
      Sense s=target.getSenses().get(classIndex);
      ArrayList<Integer> indexes=new ArrayList<Integer>(s.getBagOfWords().size());   
      for(String word:s.getBagOfWords())
      {
        Integer tmp=new Integer(Collections.binarySearch(this.features, word));
        if(!indexes.contains(tmp))
          indexes.add(tmp);
      }
      for(int j=0;j<attributeCount;j++)
      {
        Integer tmp=new Integer(j);
        if(!indexes.contains(tmp))
           this.s[classIndex][j]+=((float)Math.pow(u[classIndex][j],2.0));
        else
        {
          float w=0.0f;
          for(String word:s.getBagOfWords())
          {
            if(word.equals(this.features.get(tmp.intValue())))
            {
              w+=1.0f;
            }
View Full Code Here


          this.dict.WriteSuperLemma(path, s);
        }
        s=this.dict.loadSuperLemma(u.getLemma(),path);
        l=s.retrieveLemma(this.dict.getName());
      }
      Sense sens=l.getSenses().get(u.getSense());
      if(!sens.getSamples().contains(u.getText()))
      {
        sens.addBagOfWords(u.getText(), u.getBow(),this.name);
      }
       p=u;
    }
    if(s!=null)
    {
View Full Code Here

    return c;
  }

 
  public Sense getSense(String sid)throws Exception {   
    Sense s;   
    File f=new File("./data/wiki/"+Dictionary.normalizeLemmaforFile(this.getCompleteName())+"/"+Dictionary.normalizeLemmaforFile(sid)+".wco");
    File dir=new File("./data/wiki/"+Dictionary.normalizeLemmaforFile(this.getCompleteName())+"/");
    dir.mkdirs();
    if(f.exists())
    {
      s=(Sense)Util.loadObject(f);
    }
    else
    {
      ArrayList<String> syns=new ArrayList<String>(1);
      syns.add(sid);
      s=new Sense(sid,"",syns);
      URL url=new URL(this.path+"wiki/"+sid.replace(" ", "_"));
      Document xml=this.loadURL(url);
      Element body=this.getContentNode(xml);
      //Check if definition is a section
      if(!xml.getRootElement().getValue().contains(this.redirect))
      {
        boolean start=false;
       
        for(Element e:body.getChildren())
        {
          if(e.getName().equals("p"))
          {
            start=true;
            s.addBagOfWords(e.getValue(),e.getValue().split(" "),this.name);
            for(Element ge:e.getChildren())
            {
              //Retrieve all the <a> for extracting the inGloss relation
              if(ge.getName().equals("a"))
              {
                String nurl=ge.getAttributeValue("href");
                s.addRelation("inGloss", new Relation("inGloss", nurl.replace("/wiki/", ""), ""));           
              }
            }
          }
          else
          {
            if(start)
              break;
          }
        }

      }
      else//Look for the start of the section
      {
        String all=xml.getRootElement().getValue();
        String section=all.substring(all.indexOf(this.redirect)+this.redirect.length());
        section=section.substring(0,section.indexOf("\")"));
       
        boolean start=false;
        for(Element e:body.getChildren())
        {
          if(start)
          {
            if(e.getName().equals("p"))
            {
              s.addBagOfWords(e.getValue(),e.getValue().split(" "),this.name);
              for(Element ge:e.getChildren())
              {
                //Retrieve all the <a> for extracting the inGloss relation
                if(ge.getName().equals("a"))
                {
                  String nurl=ge.getAttributeValue("href");
                  s.addRelation("inGloss", new Relation("inGloss", nurl.replace("/wiki/", ""), ""));           
                }
              }
            }
            else
            {
              break;
            }

          }
          else
          {
            if(e.getName().startsWith("h"))
            {
              for(Element es:e.getChildren())
              {
                if(es.getName().equals("span")&&section.equals(es.getAttributeValue("id")))
                {               
                  start=true;
                }
              }
            }
          }
         
        }
      }
      Element navbox=null;
      for(Element e:xml.getRootElement().getDescendants(new ElementFilter("table")))
      {
        if(e.getAttributeValue("class")!=null&&e.getAttributeValue("class").equals("navbox"))
        {
            navbox=e;
            break;
        }
      }
      //Add inNavBox relations
      if(navbox!=null)
      {
        for(Element e:navbox.getDescendants(new ElementFilter("a")))
        {
          String nurl=e.getAttributeValue("href");
          if(nurl!=null&&!this.isNotAnArticle(nurl))
          {
            s.addRelation("inNavBox", new Relation("inNavBox", nurl.replace("/wiki/", ""), ""));
          }
        }
      }
      //Add in CatLinks relations
      Element catlinks=null;
      String aux="catlinks";
      for(Element e:body.getParent().getDescendants(new ElementFilter("div")))
      {
        if(aux.equals(e.getAttributeValue("id")))
        {
          catlinks=e;
          break;
        }
      }
      if(catlinks!=null)
      {
        for(Element e:catlinks.getDescendants(new ElementFilter("a")))
        {
          String nurl=e.getAttributeValue("href");
          if(nurl!=null&&!this.isNotAnArticle(nurl))
          {
            s.addRelation("inCatLinks", new Relation("inCatLinks", nurl.replace("/wiki/", ""), ""));
          }
        }
      }
      Util.writeObject(f, s);
    }
View Full Code Here

                }
              }
            }
            if(wnsn.equals("")&&l!=null)
            {
              Sense sense=this.getSense(href);
              ban=false;
              i=0;
              for(Sense sx:l.getSenses())
              {
                i++;
                if(sense.itContainsTheSameSamples(sx))
                {
                  ban=true;
                  break;
                }
              }
View Full Code Here

    BufferedReader in=new BufferedReader(input);
    String line=in.readLine();
    while(line!=null)
    {
      String tokens[]=line.split("\\|");
      Sense ps=getSense(tokens[0]);
      if(tokens.length>2)
      {
        ps.addBagOfWords(tokens[1], tokens[2].split(" "),this.name);
        this.wordCount+=(double)(tokens[2].split(" ").length);
      }
      else
        ps.addBagOfWords(tokens[1], new String[]{""},this.name);
      line=in.readLine();
    }
    in.close();
  }
View Full Code Here

    {
      for(ArrayList<Relation> rels:sense.getRelations().values())
      {
        for(Relation rel:rels)
        {
          Sense s=this.getSense(rel.getSid()+"_"+rel.getPos());
          ArrayList<Sense> ss=new ArrayList<Sense>();
          int j=-1;
          while(ss.size()==0)
          {
             j++;
               ss=this.getSenses(s.getSynonyms().get(j));
          }
          int i;
          for(i=0;i<ss.size();i++)
          {
            if(ss.get(i).equals(s))
              break;
          }
          rel.setSid(s.getSynonyms().get(j)+"@"+String.valueOf(i));
        }
      }
    }
    ArrayList<String> c=new ArrayList<String>();
    ArrayList<Sense> newlist=new ArrayList<Sense>(this.senses.size());
    ArrayList<KeyString> equivalents=new ArrayList<KeyString>(this.senses.size());
    c.add("all");
    for(Sense sense:this.senses)
    {
      ArrayList<Sense> ss=new ArrayList<Sense>();
      int j=-1;
      while(ss.size()==0)
      {
        j++;
        ss=this.getSenses(sense.getSynonyms().get(j));
      }
     
      int i;
      for(i=0;i<ss.size();i++)
      {
        if(ss.get(i).equals(sense))
          break;
      }
     
      Sense s=new Sense(sense,c);
      s.setSid(sense.getSynonyms().get(j)+"@"+String.valueOf(i));
      newlist.add(s);
      equivalents.add(new KeyString(sense.getSid(),s.getSid()));
    }
    Collections.sort(newlist);
    Collections.sort(equivalents);
    this.senses=null;
    this.senses=newlist;
View Full Code Here

      synonyms.add(tokens[i].replaceAll(Pattern.quote("(")+"[a-zA-Z]+"+Pattern.quote(")"), "")+postag);
      i+=2;
    }
   
   
    Sense ps=new Sense(tokens[0],getPOS(pos),synonyms);
    //Adding relations
    limit=Integer.parseInt(tokens[i]);
    i++;
    for(int z=0;z<limit;z++)
    {
      String WNType=tokens[i];
      String type="";
      if(WNType.equals("!"))
        type="Antonym";
      if(WNType.equals("@"))
        type="Hypernym";
      if(WNType.equals("@i"))
        type="Instance Hypernym";
      if(WNType.equals("~"))
        type="Hyponym";
      if(WNType.equals("~i"))
        type="Instance Hyponym";
      if(WNType.equals("#m"))
        type="Member Holonym";
      if(WNType.equals("#s"))
        type="Substance Holonym";
      if(WNType.equals("#p"))
        type="Part Holonym";
      if(WNType.equals("%m"))
        type="Member Meronym";
      if(WNType.equals("%s"))
        type="Substance meronym";
      if(WNType.equals("%p"))
          type="Part meronym";
      if(WNType.equals("="))
        type="Attribute";
      if(WNType.equals("+"))
        type="Derivationally related form";
      if(WNType.equals(";c"))
        type="Domain of synset";
      if(WNType.equals("-c"))
        type="Member of this domain";
      if(WNType.equals(";r"))
        type="Region of synset";
      if(WNType.equals("-r"))
        type="Member of this Region";
      if(WNType.equals(";u"))
        type="Usage of synset";
      if(WNType.equals("-u"))
        type="Member of this Usage";
      if(WNType.equals("*"))
        type="Entailment";
      if(WNType.equals(">"))
        type="Cause";
      if(WNType.equals("^"))
        type="Also see";
      if(WNType.equals("$"))
        type="Verb group";
      if(WNType.equals("&"))
        type="Similar to";
      if(WNType.equals("<"))
        type="Participle of verb";
      if(WNType.equals("\\"))
        type="Pertainym";

       ps.addRelation(tokens[i], new Relation(type,tokens[i+1],tokens[i+2]));
      i+=4;
    }
    return ps;
  }
View Full Code Here

    //System.out.print(target.getIndex());
    ArrayList<String> rels=new ArrayList<String>(1);
    rels.add("all");
    for(int i=0;i<target.getSenses().size();i++)
    {
      Sense sense=target.getSenses().get(i);
      ArrayList<String>dwords=new ArrayList<String>();
      Graph g=this.graphs.loadGraph(sense, dict);
      double w=0.0;
      for(AmbiguousWord word:window)
      {
View Full Code Here

    while(index<this.nodes.size())
    {
      //System.out.print(String.valueOf(index)+"/"+String.valueOf(this.nodes.size()));
      //expand the graph until deep=5
      GraphNode g=this.nodes.get(index);
      Sense s=senses.get(index);
      if(g.getDeep()<4)
      {
        for(Relation r:s.getPlainRelations("all"))
        {
          Sense ns=dict.getSense(r.getSid());
          int idx=this.nodes.indexOf(new GraphNode(ns,0));
          if(idx<0)
          {
            GraphNode ng=g.addNode(ns,r.getType());
            this.nodes.add(ng);
View Full Code Here

      System.out.println("\n"+s.getSid());
      for(ArrayList<Relation> rels:s.getRelations().values())
      {
        for(Relation rel:rels)
        {
          Sense relsense=this.dict.getSense(rel.getSid());
          if(relsense!=null)
          {
            System.out.print("!");
            for(int i=0;i<relsense.getSamples().size();i++)
            {         
              s.addBagOfWords(relsense.getSamples().get(i), relsense.getParsedSamples().get(i),relsense.getSources().get(i));
              s.getBagOfWords().addAll(relsense.getSynonyms());
            }           
          }
          else
          {
            System.out.println(rel.getSid());
View Full Code Here

TOP

Related Classes of gannuNLP.data.Sense

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.