Package gannuNLP.dictionaries

Examples of gannuNLP.dictionaries.DataBroker


      f=new File("./data/"+dict.getName()+"/");
      f.mkdirs();
      f=new File("./data/"+dict.getName()+"/"+dict.getName()+".sta");
      if(f.exists())//then try to upload a corpus
      {
        DataBroker db=new DataBroker(args[0],args[1]);
        db.load("Glosses;");       
        System.out.println("Dictionary uploaded!");
        System.out.println("Loading samples from SemCor files!");
        f=new File(args[2]);
        if(f.exists())
        {
View Full Code Here


    DocumentBuilder builder = fact.newDocumentBuilder();
  Document testset=builder.parse(XMLfile);
  Element root=testset.getDocumentElement();
  //Initialize the DataBroker
  Element data=(Element)root.getElementsByTagName("dict").item(0);
  DataBroker dict=new DataBroker(data.getAttribute("connector"),data.getAttribute("version"));
  dict.setPath(data.getAttribute("path"));
  dict.getSource().setPath(data.getAttribute("path"));
 
  String sources=data.getAttribute("sources");
  dict.load(sources);
  NodeList bowMods=root.getElementsByTagName("bowmodifier");
  for(int i=0;i<bowMods.getLength();i++)
  {
    Element bowMod=(Element) bowMods.item(i);   
    BoWModifier mod=(BoWModifier) Class.forName(bowMod.getAttribute("class")).newInstance();
    if(bowMod.getAttribute("config")!=null)
    {
      mod.addParameters(bowMod.getAttribute("config"));
    }   
    mod.setDict(dict);
    mod.init();
    dict.addModifier(mod);
  }
  //Load the test sets
  System.out.println("Loading test sets");
  NodeList docs=root.getElementsByTagName("testset");
  ArrayList<String> testsetsnames=new ArrayList<String>(docs.getLength());
  ArrayList<String> sensefilters=new ArrayList<String>(docs.getLength());
  ArrayList<ArrayList<Input>> testsets=new ArrayList<ArrayList<Input>>(docs.getLength());
  ArrayList<String> prefixes=new ArrayList<String>(docs.getLength());
  ArrayList<Input> ins=new ArrayList<Input>();
  for(int i=0;i<docs.getLength();i++)
  {
    //Load the sense filter
      SenseFilter filter=null;
      Element doc=(Element)docs.item(i);
      String sfilter=doc.getAttribute("senses");
      if(sfilter!=null)
      {
        sensefilters.add(sfilter);
        if(sfilter.startsWith("+"))
          filter=new FirstSenses("N:"+sfilter.substring(1));
        if(sfilter.startsWith("*"))
          filter=new NthSenseOnly("N:"+sfilter.substring(1));
        if(sfilter.startsWith("-"))
          filter=new RemoveNthSense("N:"+sfilter.substring(1));
      }
      boolean noTag=false;
      if(doc.getAttribute("includeNoTags")!=null)
        noTag=Boolean.parseBoolean(doc.getAttribute("includeNoTags"));
      prefixes.add(doc.getAttribute("output"));
      testsetsnames.add(doc.getAttribute("path"));
      //Load the tests
      ArrayList<File> files=Util.getAllSGFFiles(new File(doc.getAttribute("path")));
    ArrayList<Input> inputs=new ArrayList<Input>(files.size());
    int x=1;
    for(File file:files)
    {
      System.out.println("Loading "+file.getName()+" "+String.valueOf(x)+"/"+String.valueOf(files.size()));
      x++;
      File tmp=new File("./data/inputs/"+dict.getCompleteName().replace(">", "@@@@@@")+"/"+file.getName()+"_"+noTag);
      Input in;
      if(tmp.exists())
      {
        in=(Input)Util.loadObject(tmp);
        inputs.add(in);
        ins.add(in);
        for(AmbiguousWord word:in.getAmbiguousWords())
        {
          word.setDict(dict);
        }
      }
      else
      {
        File d=new File("./data/inputs/"+dict.getCompleteName().replace(">", "@@@@@@")+"/");
        if(!d.exists())
          d.mkdirs();
        in=new Input(file,filter,dict,noTag,true);
        inputs.add(in);
        ins.add(in);
View Full Code Here

      System.out.println("java -cp \"gannu.jar\" gannuWSD.Raw2SGF textFile targetDictionaryClass version disambiguateFlag");
    }
    else
    {
      File target= new File(args[0]);     
      DataBroker data=new DataBroker(args[1],args[2]);
      data.load("Glosses");
     
      ArrayList<File> files=Util.getAllFiles(target);
      for(File f:files)
      {
        System.out.println("Processing file "+f.getName());
View Full Code Here

        if(args[i].equals("fix"))
          SemCorCleaner.fix=true;
      }
     
      ArrayList<File> files=Util.getAllFiles(new File(args[2]));
      DataBroker dic=new DataBroker(args[0],args[1]);
      dic.setPath(".");
      dic.load("Glosses");
      int x=1;
      for(File file:files)
      {
        File fout=new File(file.getCanonicalFile()+".sgf");
        if(!file.getName().endsWith(".sgf")&&(!fout.exists()||rewrite))
        {
          System.out.println("Repairing file: "+file.getName()+ " "+String.valueOf(x)+"/"+String.valueOf(files.size()));
          x++;
          FileReader f=new FileReader(file);
          BufferedReader in=new BufferedReader(f);
          FileWriter fo=new FileWriter(file.getCanonicalFile()+".sgf");
          BufferedWriter out=new BufferedWriter(fo);
          String line=in.readLine();
          while(line!=null)
          {
            String sout="";
            line=line.replace("&", "&amp;");
            String tokens[]=line.split("=");
            for(int i=0;i<tokens.length;i++)
            {
              if(i==0)
              {
                sout+=tokens[i];
             
              else
              {
                int j=tokens[i].indexOf(">");
                if(j<0)
                  j=tokens[i].indexOf(" ");
                sout+="=\""+tokens[i].substring(0, j)+"\""+tokens[i].substring(j);
              }
            }
            out.write(sout+"\n");
            line=in.readLine();         
          }
          in.close();
          f.close();
          out.close();
          fo.close();
        }
      }
      System.out.println("Repairing bad lemmas!");
      File target=new File(args[2]);
      if(!target.isDirectory())
        target=new File(args[2]+".sgf");
      files=Util.getAllSGFFiles(target);
      x=1;
      for(File file:files)
      {
        System.out.println("-----------Checking file: "+file.getName()+ " "+String.valueOf(x)+"/"+String.valueOf(files.size())+"--------------");
        x++;
        try
        {
          SAXBuilder builder=new SAXBuilder();
          Document xml=(Document)builder.build(file);
          ArrayList<Element> words=new ArrayList<Element>();
          for(Element word:xml.getDescendants(new ElementFilter("wf")))
            words.add(word);
          for(int w=0;w<words.size();w++)
          {
            Element word=words.get(w);
            if(word.getAttribute("ot")!=null)
            {
              word.setAttribute("cmd", "ignore");
            }
            if((word.getAttribute("cmd").getValue().equals("done"))||(tag&&(word.getAttribute("cmd").getValue().equals("tag"))))
            {
              String lemma=word.getValue();
              String pos=word.getAttribute("pos").getValue().substring(0,1);
              String lem;
              Lemma l=null;
              if(word.getAttribute("cmd").getValue().equals("done"))
                { 
                if(word.getAttribute("lemma")!=null)
                {
                  lemma=word.getAttribute("lemma").getValue();
                    pos=word.getAttribute("pos").getValue().substring(0,1);
                    lem=lemma+"_"+pos;
                    l=dic.getLemma(lem);                 
                }
                else
                {
                  lemma="";
                  pos="X";
                }
                }
              lem=lemma+"_"+pos;
 
             
              if(l==null)//lemma may not be in normal form
              {
               
                SemCorCleaner.displayWindow(words, w);
                SemCorCleaner.readLemma(dic, lemma, word, pos);
                l=dic.getLemma(word.getAttributeValue("lemma")+"_"+word.getAttributeValue("pos").substring(0,1));
              }
              if(l!=null)
              {
                if(!SemCorCleaner.isValidWNSN(word.getAttributeValue("wnsn"), l.getSenses().size()))
                {
View Full Code Here

    {
      System.out.println("java -cp \"gannu.jar\" gannuWSD.GetLemma dictionaryClass version lemma [URL of web dictionary]");
    }
    else
    {
      DataBroker data=new DataBroker(args[0],args[1]);     
      if(data.isWeb())
      {
        data.setBaseURL(args[3]);
        if(args[0].contains("Wiki"))
        {
          WikiCleaner w=new WikiCleaner();
          w.setDict(data);
          data.addModifier(w);
        }
      }
      data.load("all");
      Lemma l=data.getLemma(args[2]);
      if(l!=null)
      {
        System.out.println("Have "+l.getSenses().size()+" senses");
        System.out.println("Frequency: "+String.valueOf(l.getFrequency()));
        System.out.println("IDF: "+data.getIDF(l));
        for(Sense s:l.getSenses())
        {
          System.out.println(s.getSid());
          System.out.println(s.getSynonyms());       
          for(int i=0;i<s.getSamples().size();i++)
          {
            System.out.println("From "+s.getSources().get(i)+": "+s.getSamples().get(i).trim());           
          }
          System.out.println("=================================");
          System.out.println("Press any key to continue!");
          System.in.read();
        }
      }
      else
      {
        ArrayList<Sense> senses=data.getSenses(args[2]);
        if(senses.size()>0)
        {
          System.out.println("Have "+senses.size()+" senses");
          for(Sense s:senses)
          {
View Full Code Here

TOP

Related Classes of gannuNLP.dictionaries.DataBroker

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.