Package gannuWSD.testing

Source Code of gannuWSD.testing.XLSWriter

package gannuWSD.testing;

import gannuNLP.data.AmbiguousWord;
import gannuNLP.data.Input;
import gannuNLP.dictionaries.Dictionary;
import gannuNLP.dictionaries.WordNet;
import gannuUtil.KeyString;
import gannuUtil.Util;
import gannuWSD.skipfilters.SkipFilter;
import gannuWSD.windowfilters.WindowFilter;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map.Entry;


import jxl.Workbook;
import jxl.write.Label;
import jxl.write.Number;
import jxl.write.NumberFormat;
import jxl.write.WritableCellFormat;
import jxl.write.WritableFont;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;

/**
* Class for writing the test result in XLS format.
* @author Francisco Viveros-Jiménez
*
*/
public final class XLSWriter {
  /**
   * Number format for the cells having numeric values.
   */
  static WritableCellFormat number = new WritableCellFormat(new NumberFormat("###.##"));
  /**
   * Bold number format for the cells having numeric values.
   */
  static WritableCellFormat boldnumber= new WritableCellFormat(new WritableFont(WritableFont.ARIAL, 11, WritableFont.BOLD, true),new NumberFormat("###.##"));
  /**
   * Maximum intervals for IDF values ranging from [0,0.5,...,idfIntervals/2].
   */
  static int idfIntervals=50;
  /**
   * Maximum polysemy allowed.
   */
  static int maxSenseNumbers=2000;
  /**
   * Calculates all the necessary IDF values and stores them in the KeyString list.
   * @param target List for storing the generated IDF values.
   * @param tests Target test results.
   * @throws Exception
   */
  static void retrieveIDFs(ArrayList<KeyString> target,ArrayList<Test> tests)throws Exception
  {
    System.out.println("Calculating IDF values please wait some minutes...");
    for(Input in:tests.get(0).getDocs())
    {
      for(AmbiguousWord word:in.getAmbiguousWords())
      {
        if(word.getSenses().size()>0)
        {
          KeyString aux=new KeyString(word.getLemma(),String.valueOf(word.getIDF()));
          if(!target.contains(aux))
          {
            target.add(aux);
          }
        }
      }
    }
    Dictionary dict=tests.get(0).getDictionary();
    for(Test test:tests)
    {
      for(Summary s:test.getSummaries())
      {
        for(Decision d:s.getDecisions())
        {
          for(int sense=0;sense<d.getSenseCount();sense++)
          {
            for(String word:d.getDecisionWords(sense))
            {
              KeyString aux=new KeyString(word);
              if(!target.contains(aux))
              {
                target.add(new KeyString(word,String.valueOf(dict.getIDF(word))));
              }
            }
          }
        }
      }
    }
  }
  /**
   * Method for creating an R script for performing statistical analysis.
   * @param f Target file.
   * @param tests Target test results.
   * @throws Exception
   */
  public static void generateRScript(File f, ArrayList<Test> tests)throws Exception
  {
    System.out.println("Creating R script ");
    f.createNewFile();
    FileWriter fw=new FileWriter(f);
    BufferedWriter out=new BufferedWriter(fw);
    int i=1;
    for(Test test:tests)
    {
      String name="test"+String.valueOf(i);
     
      int dd=0;
      for(ArrayList<Decision> ds:test.getAnswers())
      {
        for(int d=0;d<ds.size();d++)
        {
          if((d==(0))&&(dd==(0)))
            out.write(name+"<-c("+String.valueOf(ds.get(d).getScore())+");\n");
          else
            out.write(name+"<-c("+name+","+String.valueOf(ds.get(d).getScore())+");\n");
        }
        dd++;
      }
      i++;
    }
    for(i=1;i<=tests.size();i++)
    {
      for(int j=1;j<=tests.size();j++)
      {
        out.write("data<-wilcox.test(test"+String.valueOf(i)+",test"+String.valueOf(j)+");\n");
        if(i==1&&j==1)
          out.write("results<-c(data$p.value);\n");
        else
          out.write("results<-c(results,data$p.value);\n");
      }
    }
    out.write("m=matrix(data=results,nrow="+tests.size()+",ncol="+tests.size()+");\n m \n");
    out.close();
    fw.close();
  }
  /**
   * Writes an XLS file.
   * @param file File to be created.
   * @param tests The tests to be included in the file.
   * @param detailed Tells if a decisions detail sheet will be added to the document.
   * @throws Exception
   */
  public static void writeXLS(File file, ArrayList<Test> tests, String summary,String detail,long time) throws Exception
  {
    ArrayList<KeyString> idflist=new ArrayList<KeyString>();
   
    System.out.println("Saving "+file.getName());
    WritableWorkbook workbook = Workbook.createWorkbook(file);
    WritableSheet header = workbook.createSheet("Experimental setup", 0);
    addHeader(header,tests);
    if(summary.contains("all")||summary.contains("performance"))
    {
      System.out.println("Generating performance summary sheet");
      WritableSheet sheet = workbook.createSheet("Performance summary", workbook.getNumberOfSheets());
      addPerformanceSheet(sheet,tests,time);
    }
    if(summary.contains("all")||summary.contains("disambiguation"))
    {
      System.out.println("Generating disambiguation summary sheet");
      WritableSheet  sheet = workbook.createSheet("Words leading to disambiguation", workbook.getNumberOfSheets());
      addDisambiguationSheet(sheet,tests);
    }
    if(summary.contains("all")||summary.contains("words"))
    {
      System.out.println("Problem summary sheet");
      WritableSheet docs = workbook.createSheet("Disambiguated words", workbook.getNumberOfSheets());
      addProblemSheet(docs,tests);
    }
    if(summary.contains("all")||summary.contains("overlap"))
    {
      System.out.println("Generating overlap summary sheet");
      WritableSheet sheet = workbook.createSheet("Overlap by IDF summary", workbook.getNumberOfSheets());
      if(idflist.size()==0)
        XLSWriter.retrieveIDFs(idflist, tests);
      addOverlapSheet(sheet, tests,idflist);
    }
    if(summary.contains("all")||summary.contains("sense"))
    {
      System.out.println("Generating sense stats sheet");
      WritableSheet sheet = workbook.createSheet("Algorithm behavior", workbook.getNumberOfSheets());
      addMiscSheet(sheet, tests);
    }
    if(detail.contains("all")||detail.contains("performance"))
    {
      System.out.println("Generating performance detail sheet");
      WritableSheet sheet = workbook.createSheet("Performance detail", workbook.getNumberOfSheets());
      addPerformanceDetailSheet(sheet,tests);
    }
    if(detail.contains("all")||detail.contains("disambiguation"))
    {
      System.out.println("Generating disambiguation detail sheet");
      WritableSheet  sheet = workbook.createSheet("Disambiguation detail", workbook.getNumberOfSheets());
      if(idflist.size()==0)
        XLSWriter.retrieveIDFs(idflist, tests);
      addDisambiguationDetailSheet(sheet,tests,idflist);
    }
    if(detail.contains("all")||detail.contains("words"))
    {
      System.out.println("Generating problem detail sheet");
      WritableSheet docs = workbook.createSheet("Problem detail", workbook.getNumberOfSheets());
      if(idflist.size()==0)
        XLSWriter.retrieveIDFs(idflist, tests);
      addProblemDetailSheet(docs,tests,idflist);
    }
    if(detail.contains("overlap"))
    {
      System.out.println("Generating overlap detail sheet");
      WritableSheet sheet = workbook.createSheet("Overlap detail", workbook.getNumberOfSheets());
      addOverlapDetailSheet(sheet,tests);
    }
   
   
    if(detail.contains("all")||detail.contains("sense"))
    {
      System.out.println("Generating sense detail sheet");
      WritableSheet sheet = workbook.createSheet("Disambiguation by sense number", workbook.getNumberOfSheets());
      addSenseSheet(sheet,tests);
    }   
    workbook.write();
    workbook.close();
    System.out.println(file.getName()+" saved");
  }
  /**
   * Sheet describing the file contents.
   * @param sheet The sheet to be filled.
   * @param tests Tests included in this file.
   * @throws Exception
   */
  private static void addHeader(WritableSheet sheet, ArrayList<Test> tests)throws Exception
  {
    Test sample=tests.get(0);
    sheet.addCell(new Label(0, 0, "CICWSD output file. Created by: Viveros-Jimenez F., Gelbukh A. & Sidorov G."));
    sheet.addCell(new Label(0, 2, "Knowledge Source:"));
    sheet.addCell(new Label(1, 2, sample.getKNSources()));
    sheet.addCell(new Label(4, 2, "Dictionary:"));
    sheet.addCell(new Label(5, 2, sample.getDictionary().toString()));
   
    sheet.addCell(new Label(1, 3, "* Retrieved Senses:"));
    sheet.addCell(new Label(2, 3, sample.getRetrievedSenses()));
    sheet.addCell(new Label(0, 5, "Tests:"));
    int offset=6;
    int i=1;
    for(Test test:tests)
    {
      sheet.addCell(new Label(1, offset, "Test "+i));
      offset++;
      sheet.addCell(new Label(2, offset, "* WSD method:"));
      sheet.addCell(new Label(3, offset, test.getAlgorithm().toString()));
      offset++;
      sheet.addCell(new Label(2, offset, "* Back-off method:"));
      if(test.getBackoff()!=null)
        sheet.addCell(new Label(3, offset, test.getBackoff().toString()));
      else
        sheet.addCell(new Label(3, offset, "none"));
      offset++;
      sheet.addCell(new Label(2, offset, "* Tie solving method:"));
      if(test.getTie()!=null)
        sheet.addCell(new Label(3, offset, test.getTie().toString()));
      else
        sheet.addCell(new Label(3, offset, "none"));
      offset++;
     
     
      sheet.addCell(new Label(2, offset, "* Skip Filters for WSD:"));
      offset++;
     
      for(SkipFilter condition:test.getAlgorithm().getSkipFilters())
      {
        sheet.addCell(new Label(3, offset, condition.toString()));
        offset++;
      }
      if(test.getAlgorithm().getSkipFilters().size()==0)
      {
        sheet.addCell(new Label(3, offset, "none"));
        offset++;
      }
      offset++;
      if(test.getTie()!=null)
      {
        offset++;
        sheet.addCell(new Label(2, offset, "* Skip Filters for tie:"));
        offset++;
       
        for(SkipFilter condition:test.getTie().getSkipFilters())
        {
          sheet.addCell(new Label(3, offset, condition.toString()));
          offset++;
        }
        if(test.getTie().getSkipFilters().size()==0)
        {
          sheet.addCell(new Label(3, offset, "none"));
          offset++;
        }
        offset++;       
      }
      offset++;
      if(test.getBackoff()!=null)
      {
        offset++;
        sheet.addCell(new Label(2, offset, "* Skip Filters for backoff:"));
        offset++;
       
        for(SkipFilter condition:test.getBackoff().getSkipFilters())
        {
          sheet.addCell(new Label(3, offset, condition.toString()));
          offset++;
        }
        if(test.getBackoff().getSkipFilters().size()==0)
        {
          sheet.addCell(new Label(3, offset, "none"));
          offset++;
        }
        offset++;       
      }
     
     
      offset++;
      sheet.addCell(new Label(2, offset, "* Window Selection Filters for WSD:"));
      offset++;
     
      for(WindowFilter condition:test.getAlgorithm().getWindowFilters())
      {
        sheet.addCell(new Label(3, offset, condition.toString()));
        offset++;
      }
      if(test.getAlgorithm().getWindowFilters().size()==0)
      {
        sheet.addCell(new Label(3, offset, "none"));
        offset++;
      }
      offset++;
      if(test.getTie()!=null)
      {
        offset++;
        sheet.addCell(new Label(2, offset, "* Window Selection Filters for tie:"));
        offset++;
       
        for(WindowFilter condition:test.getTie().getWindowFilters())
        {
          sheet.addCell(new Label(3, offset, condition.toString()));
          offset++;
        }
        if(test.getTie().getWindowFilters().size()==0)
        {
          sheet.addCell(new Label(3, offset, "none"));
          offset++;
        }
        offset++;
       
      }
      offset++;
      if(test.getBackoff()!=null)
      {
        offset++;
        sheet.addCell(new Label(2, offset, "* Window Selection Filters for backoff:"));
        offset++;
       
        for(WindowFilter condition:test.getBackoff().getWindowFilters())
        {
          sheet.addCell(new Label(3, offset, condition.toString()));
          offset++;
        }
        if(test.getBackoff().getWindowFilters().size()==0)
        {
          sheet.addCell(new Label(3, offset, "none"));
          offset++;
        }
        offset++;
      }
      i++;
    }
   
  }
 
  /**
   * Writes a overlap detail sheet.
   * This sheet contains a more detailed explanation of how decisions were made.
   * @param sheet The sheet to be created.
   * @param tests Data for creating the sheet.
   * @throws Exception
   */
  private static void addOverlapDetailSheet(WritableSheet sheet, ArrayList<Test> tests)throws Exception
  {
  
    
    int goffset=0;
    int t=1;
    for(Test test:tests)
    {
      int offset=4;
      sheet.addCell(new Label(goffset, offset, "Tests "+t));
      offset++; 
      t++;
      for(int i=0;i<test.getDocs().size();i++)
      {
        Input doc=test.getDocs().get(i);
        ArrayList<Decision> decisions=test.getAnswers().get(i);
        sheet.addCell(new Label(goffset, offset, "Document:"));
        sheet.addCell(new Label(goffset+1, offset, doc.toString()));
        offset++;
       
        for(Decision decision:decisions)
        {
          if(decision.isAttempted())
          {
            sheet.addCell(new Label(goffset+1,offset,"Lemma"));
            sheet.addCell(new Label(goffset+2,offset,"Polysemy"));
            sheet.addCell(new Label(goffset+3,offset,"Correct Answers"));
            sheet.addCell(new Label(goffset+4,offset,"Approach Answer"));
            sheet.addCell(new Label(goffset+5,offset,"Window"));
            offset++;
            sheet.addCell(new Label(goffset+1,offset,decision.getTarget().getLemma()));
            sheet.addCell(new Number(goffset+2,offset,decision.getTarget().getSenses().size()));
            sheet.addCell(new Label(goffset+3,offset,Arrays.toString(decision.getTarget().getCorrectSenseNumbers())));
            sheet.addCell(new Label(goffset+4,offset,decision.getAnswersString()));
            sheet.addCell(new Label(goffset+5,offset,decision.getWindowPrint()));
            offset++;
            sheet.addCell(new Label(goffset+1,offset,"Sense"));
            sheet.addCell(new Label(goffset+2,offset,"Weight"));
            sheet.addCell(new Label(goffset+3,offset,"Words"));
            offset++;
            int j=0;
            for(double w:decision.getWeights())
            {
              if(w>0.0)
              {
                sheet.addCell(new Number(goffset+1,offset,j+1));
                sheet.addCell(new Number(goffset+2,offset,w));
                sheet.addCell(new Label(goffset+3,offset,decision.getDecisionWords(j).toString()));
               
                offset++;
              }
              j++;
            }
           
            offset++;
            offset++;
          }
        
        }
       
       
        offset++;
        i++;
      }
     
      offset++;
      goffset+=6;
    }
  }
  /**
   * Writes a sheet containing detailed success rate on each attempted lemma.
   * @param sheet The sheet to write to.
   * @param tests Data for creating the sheet.
   * @throws Exception
   */
  private static void addDisambiguationDetailSheet(WritableSheet sheet, ArrayList<Test> tests, ArrayList<KeyString> idfList) throws Exception
  {
    int goffset=4;
    int t=1;
    for(Test test:tests)
    {
      HashMap<String,int[]> globalLemmas=new HashMap<String,int []>()
      int i=0;
      int docs=test.getSummaries().size();
      for(Summary summary:test.getSummaries())
      {
        for(Entry<String,int[]> entry:summary.getDecisionLemmas().entrySet())
        {
         
          if(globalLemmas.containsKey(entry.getKey()))
          {
            int []count=globalLemmas.get(entry.getKey());
            int []aux=entry.getValue();
            count[i]+=aux[0];
            count[docs]+=aux[0];
            count[docs+1]+=aux[1];
          }
          else
          {
            int []count=new int[docs+2];
            for(int j=0;j<(docs+1);j++)
              count[j]=0;
            int []aux=entry.getValue();
            count[i]+=aux[0];
            count[docs]+=aux[0];
            count[docs+1]+=aux[1];
            globalLemmas.put(entry.getKey(), count);
          }
        }
        i++;
      }
      //write the header parameters of the test
      int offset=0;
      sheet.addCell(new Label(0, goffset, "Test "+t));
      t++;
      goffset++;
      sheet.addCell(new Label(0, goffset, "Lemma"));
       
      for(Input doc:test.getDocs())
      {
        sheet.addCell(new Label(1+offset, goffset, doc.toString()));
        offset++;
      }
      sheet.addCell(new Label(1+offset, goffset, "Overall attempts"));
      sheet.addCell(new Label(2+offset, goffset, "Overall correct answers"));
      sheet.addCell(new Label(3+offset, goffset, "IDF"));
      goffset++;
      for(Entry<String,int[]> entry:globalLemmas.entrySet())
      {
        int[] counts=entry.getValue();
        sheet.addCell(new Label(0,goffset,entry.getKey()));
        for(int z=0;z<counts.length;z++)
        {
          if(z<counts.length)
            sheet.addCell(new Number(1+z, goffset, counts[z],number));
          else
            sheet.addCell(new Number(1+z, goffset, counts[z],boldnumber));
        }
        sheet.addCell(new Number(1+counts.length, goffset,XLSWriter.getIDF(idfList, entry.getKey()) ,boldnumber));
        goffset++;
      }
      goffset++;
      goffset++;
      goffset++;
    }
   
  }
  /**
   * Search for the IDF value of a target word in a target IDF list.
   * @param idfList Target list containing lemmas and its corresponding IDF values.
   * @param word Target word.
   * @return Corresponding IDF value or 0.0 in case of being unavailable.
   */
  private static double getIDF(ArrayList<KeyString> idfList,String word)
  {
    if(idfList.indexOf(new KeyString(word))<0)
      return 0.0;
    else 
      return Double.parseDouble(idfList.get(idfList.indexOf(new KeyString(word))).getString());
  }
  /**
   * Writes a sheet containing the success rate obtained in different IDF clusters of words. The IDF values ranges are [1,1.5,2.0,...,15.0].
   * @param sheet The sheet to write to.
   * @param tests Data for creating the sheet.
   * @throws Exception
   */
  private static void addDisambiguationSheet(WritableSheet sheet, ArrayList<Test> tests) throws Exception
  {
    int idfcount[][][];
    int doccount=tests.get(0).getDocs().size();
    idfcount=new int[doccount+1][XLSWriter.idfIntervals][2];

    int goffset=4;
    int t=1;
   
    for(Test test:tests)
    {
      for(int d=0;d<=doccount;d++)
      {
        for(int i=0;i<XLSWriter.idfIntervals;i++)
        {
          idfcount[d][i][0]=0;
          idfcount[d][i][1]=0;
        }
      }
      int d=0;
      for(Summary summary:test.getSummaries())
      {
        for(Decision D:summary.getDecisions())
        {
          if(D.isAttempted())
          {
            double idf=D.getTarget().getIDF();
            int i=((int)Math.floor(idf/0.5));
            idfcount[d][i][0]++;
            idfcount[doccount][i][0]++;
            if(D.isCorrectSenseAddressed())
            {
              idfcount[d][i][1]++;
              idfcount[doccount][i][1]++;
            }
          }
           
        }
        d++;
      }
      //write the header parameters of the test
     
     
      sheet.addCell(new Label(0, goffset, "Test "+t));
      t++;
      for(int type=0;type<2;type++)
      {
        int offset=0;
        goffset++;
        if(type==0)
          sheet.addCell(new Label(0, goffset, "Attempts"));
        else
          sheet.addCell(new Label(0, goffset, "Correct"));
        int i=1;
        for(double idf=1.0;idf<15.1;idf+=0.5)
        {
          sheet.addCell(new Label(offset+i, goffset, "IDF = "+String.valueOf(idf)));
          i++;
        }
        goffset++;
        i=0
        for(Input doc:test.getDocs())
        {
          sheet.addCell(new Label(0, goffset+i, doc.toString()));
          i++;
        }
        sheet.addCell(new Label(0, goffset+i, "Overall:"));
        offset++;
       
        for(d=0;d<=doccount;d++)
        {
          for(i=0;i<29;i++)
          {
            sheet.addCell(new Number(offset+i, goffset+d, idfcount[d][i][type]));
          }
        }
        goffset+=doccount;
        goffset++;
        goffset++;
        goffset++;

      }
    }
   
  }
  /**
   * Writes a sheet containing the success rate obtained in different IDF clusters of words. The IDF values ranges are [1,1.5,2.0,...,15.0].
   * @param sheet The sheet to write to.
   * @param tests Data for creating the sheet.
   * @throws Exception
   */
  //TODO
  private static void addOverlapSheet(WritableSheet sheet, ArrayList<Test> tests, ArrayList<KeyString> idfList) throws Exception
  {
    int idfcount[][][];
    int doccount=tests.get(0).getDocs().size();
    idfcount=new int[doccount+1][XLSWriter.idfIntervals][2];
 
    int goffset=4;
    int t=1;
   
    for(Test test:tests)
    {
      for(int d=0;d<=doccount;d++)
      {
        for(int i=0;i<XLSWriter.idfIntervals;i++)
        {
          idfcount[d][i][0]=0;
          idfcount[d][i][1]=0;
        }
      }
      int d=0;
      for(Summary summary:test.getSummaries())
      {
        for(Decision ds:summary.getDecisions())
        {
          for(int s=0;s<ds.getSenseCount();s++)
          {
            for(String word:ds.getDecisionWords(s))
            {
              double idf=XLSWriter.getIDF(idfList, word);
              int i=((int)Math.floor(idf/0.5));
              idfcount[d][i][0]+=1.0;
              idfcount[doccount][i][0]+=1.0;
              if(ds.isCorrect(s))
              {
                idfcount[d][i][1]+=1.0;
                idfcount[doccount][i][1]+=1.0;
              }
            }
          }
        }
        d++;
      }
      //write the header parameters of the test
     
     
      sheet.addCell(new Label(0, goffset, "Test "+t));
      t++;
      for(int type=0;type<2;type++)
      {
        int offset=0;
        goffset++;
        if(type==0)
          sheet.addCell(new Label(0, goffset, "Attempts"));
        else
          sheet.addCell(new Label(0, goffset, "Correct"));
        int i=1;
        for(double idf=1.0;idf<15.1;idf+=0.5)
        {
          sheet.addCell(new Label(offset+i, goffset, "IDF = "+String.valueOf(idf)));
          i++;
        }
        goffset++;
        i=0
        for(Input doc:test.getDocs())
        {
          sheet.addCell(new Label(0, goffset+i, doc.toString()));
          i++;
        }
        sheet.addCell(new Label(0, goffset+i, "Overall:"));
        offset++;
       
        for(d=0;d<=doccount;d++)
        {
          for(i=0;i<29;i++)
          {
            sheet.addCell(new Number(offset+i, goffset+d, idfcount[d][i][type]));
          }
        }
        goffset+=doccount;
        goffset++;
        goffset++;
        goffset++;

      }
    }
   
  }
  /**
   * Generates a summary of the frequencies of IDF clusters in each target text.
   * @param sheet The sheet to write to.
   * @param tests Data for creating the sheet.
   * @throws Exception
   */
  private static void addProblemSheet(WritableSheet sheet, ArrayList<Test> tests) throws Exception
  {
    int idfcount[][];
    int doccount=tests.get(0).getDocs().size();
    idfcount=new int[doccount+1][XLSWriter.idfIntervals];
    for(int d=0;d<=doccount;d++)
    {
      for(int i=0;i<XLSWriter.idfIntervals;i++)
      {
        idfcount[d][i]=0;
      }
    }
    int goffset=4;
    int d=0;
    for(Input input:tests.get(0).getDocs())
    {     
      for(AmbiguousWord word:input.getAmbiguousWords())
      {
        double idf=word.getIDF();
        int i=((int)Math.floor(idf/0.5));
        idfcount[d][i]++;
        idfcount[doccount][i]++;
      }
      d++;
    }
    int offset=0;
    goffset++;
    int i=1;
    for(double idf=1.0;idf<15.1;idf+=0.5)
    {
      sheet.addCell(new Label(offset+i, goffset, "IDF = "+String.valueOf(idf)));
      i++;
    }
    goffset++;
    i=0
    for(Input doc:tests.get(0).getDocs())
    {
      sheet.addCell(new Label(0, goffset+i, doc.toString()));
      i++;
    }
    sheet.addCell(new Label(0, goffset+i, "Overall:"));
    offset++;
    for(d=0;d<=doccount;d++)
    {
      for(i=0;i<29;i++)
      {
        sheet.addCell(new Number(offset+i, goffset+d, idfcount[d][i]));
      }
    }
    goffset+=doccount;
    goffset++;
    goffset++;
    goffset++;
  }
 
  /**
   * Generates a summary of the lemmas on the test-bed
   * @param sheet The sheet to write to.
   * @param tests Data for creating the sheet.
   * @throws Exception
   */
  private static void addProblemDetailSheet(WritableSheet sheet, ArrayList<Test> tests, ArrayList<KeyString> idfList) throws Exception
  {
    //Extract the decisions detail
    //Header

   
    HashMap<String,int[]> globalLemmas=new HashMap<String,int []>();
    int goffset=3;
    int i=0;
    int docs=tests.get(0).getDocs().size();
    for(Input doc:tests.get(0).getDocs())
    {
        for(Entry<String,ArrayList<Integer>> entry:doc.getIndex().entrySet())
        {
         
          if(globalLemmas.containsKey(entry.getKey()))
          {
            int []count=globalLemmas.get(entry.getKey());
            count[i]+=entry.getValue().size();
            count[docs]+=entry.getValue().size();
          }
          else
          {
            int []count=new int[docs+1];
            for(int j=0;j<(docs+1);j++)
              count[j]=0;
            count[i]+=entry.getValue().size();
            count[docs]+=entry.getValue().size();
            globalLemmas.put(entry.getKey(), count);
          }
        }
        i++;
    }
    sheet.addCell(new Label(0, goffset, "Lemma"));
    int offset=0;
    for(Input doc:tests.get(0).getDocs())
    {
      sheet.addCell(new Label(1+offset, goffset, doc.toString()));
      offset++;
    }
    sheet.addCell(new Label(1+offset, goffset, "Overall appareances"));
    sheet.addCell(new Label(2+offset, goffset, "IDF"));
    goffset++;
    for(Entry<String,int[]> entry:globalLemmas.entrySet())
    {
      int[] counts=entry.getValue();
      sheet.addCell(new Label(0,goffset,entry.getKey()));
      int z;
      for(z=0;z<counts.length;z++)
      {
          if(z==(counts.length-1))
            sheet.addCell(new Number(1+z, goffset, counts[z],boldnumber));
          else
            sheet.addCell(new Number(1+z, goffset, counts[z],number));
         
      }
      sheet.addCell(new Number(1+z,goffset,XLSWriter.getIDF(idfList,entry.getKey()),boldnumber));
      goffset++;
    }
   
  }
 
  /**
   * Writes a summary of the performance of the algorithm.
   * @param sheet The sheet to write to.
   * @param tests Data for creating the sheet.
   * @throws Exception
   */
  private static void addPerformanceSheet(WritableSheet sheet, ArrayList<Test> tests,long time) throws Exception
  {
    int offset=3;
    int t=1;
    for(Test test:tests)
    {
      sheet.addCell(new Label(0, offset, "Test "+t));
      offset++;
      t++;
      sheet.addCell(new Label(0, offset, "Document"));
      sheet.addCell(new Label(1, offset, "Precision"));
      sheet.addCell(new Label(2, offset, "Recall"));
      sheet.addCell(new Label(3, offset, "Coverage"));
      sheet.addCell(new Label(4, offset, "F1-measure"));
      sheet.addCell(new Label(5, offset, "Time"));
      offset++;
      ArrayList<Summary> global=new ArrayList<Summary>();
      int d=0;
      for(Input doc:test.getDocs())
      {
        sheet.addCell(new Label(0, offset, doc.toString()));
        Summary summary=test.getSummaries().get(d);
        double[]p=Measures.Precision(summary);
        double[]r=Measures.Recall(summary);
        double[]c=Measures.Coverage(summary);
        double[]f=Measures.Fmeasure(summary);
        sheet.addCell(new Number(1, offset, p[4],boldnumber));
        sheet.addCell(new Number(2, offset, r[4],boldnumber));
        sheet.addCell(new Number(3, offset, c[4],boldnumber));
        sheet.addCell(new Number(4, offset, f[4],boldnumber));
        global.add(summary);
        d++;
        offset++;
      }
      sheet.addCell(new Label(0,offset,"Overall:"));
      double[]p=Measures.Precision(global);
      double[]r=Measures.Recall(global);
      double[]c=Measures.Coverage(global);
      double[]f=Measures.Fmeasure(global);
      sheet.addCell(new Number(1, offset, p[4],boldnumber));
      sheet.addCell(new Number(2, offset, r[4],boldnumber));
      sheet.addCell(new Number(3, offset, c[4],boldnumber));
      sheet.addCell(new Number(4, offset, f[4],boldnumber));
      sheet.addCell(new Number(5, offset, time,boldnumber));
      offset++;
      offset++;
      offset++;
    }
  }
  /**
   * Writes a sheet containing the detailed performance results for each open-class type.
   * @param sheet The sheet to write to.
   * @param tests Data for creating the sheet.
   * @throws Exception
   */
  private static void addPerformanceDetailSheet(WritableSheet sheet, ArrayList<Test> tests) throws Exception
  {
    int offset=3;
    int t=1;
    for(Test test:tests)
    {
      sheet.addCell(new Label(0, offset, "Test "+t));
      t++;
     
      sheet.addCell(new Label(2, offset, "Nouns"));
      sheet.addCell(new Label(6, offset, "Verbs"));
      sheet.addCell(new Label(10, offset, "Adjectives"));
      sheet.addCell(new Label(14, offset, "Adverbs"));
      offset++;
      sheet.addCell(new Label(0, offset, "Document"));
      for(int i=0;i<4;i++)
      {
        sheet.addCell(new Label(i*4+1, offset, "Precision"));
        sheet.addCell(new Label(i*4+2, offset, "Recall"));
        sheet.addCell(new Label(i*4+3, offset, "Coverage"));
        sheet.addCell(new Label(i*4+4, offset, "F1-measure"));
      }
      offset++;
      ArrayList<Summary> global=new ArrayList<Summary>();
      int d=0;
      for(Input doc:test.getDocs())
      {
        sheet.addCell(new Label(0, offset, doc.toString()));
        Summary summary=test.getSummaries().get(d);
        double[]p=Measures.Precision(summary);
        double[]r=Measures.Recall(summary);
        double[]c=Measures.Coverage(summary);
        double[]f=Measures.Fmeasure(summary);
        for(int j=0;j<4;j++)
        {
          if(j==4)
          {
            sheet.addCell(new Number(j*4+1, offset, p[j],boldnumber));
            sheet.addCell(new Number(j*4+2, offset, r[j],boldnumber));
            sheet.addCell(new Number(j*4+3, offset, c[j],boldnumber));
            sheet.addCell(new Number(j*4+4, offset, f[j],boldnumber));
          }
          else
          {
            sheet.addCell(new Number(j*4+1, offset, p[j],number));
            sheet.addCell(new Number(j*4+2, offset, r[j],number));
            sheet.addCell(new Number(j*4+3, offset, c[j],number));
            sheet.addCell(new Number(j*4+4, offset, f[j],number));
          }
        }
        global.add(summary);
        d++;
        offset++;
      }
      sheet.addCell(new Label(0,offset,"Overall:"));
      double[]p=Measures.Precision(global);
      double[]r=Measures.Recall(global);
      double[]c=Measures.Coverage(global);
      double[]f=Measures.Fmeasure(global);
      for(int j=0;j<4;j++)
      {
        sheet.addCell(new Number(j*4+1, offset, p[j],boldnumber));
        sheet.addCell(new Number(j*4+2, offset, r[j],boldnumber));
        sheet.addCell(new Number(j*4+3, offset, c[j],boldnumber));
        sheet.addCell(new Number(j*4+4, offset, f[j],boldnumber));
      }
      offset++;
      offset++;
      offset++;
    }
  }
/**
* Adds a sheet with some interesting summaries.
* @param sheet The sheet to be filled.
* @param tests The tests to be included on this file.
* @throws Exception
*/
  private static void addMiscSheet(WritableSheet sheet, ArrayList<Test> tests) throws Exception
  {
   
    //Document summary
    int offset=3;
    int t=1;
    for(Test test:tests)
    {
      sheet.addCell(new Label(0, offset, "Test "+t));
      t++;
      sheet.addCell(new Label(1, offset, "Average words used"));
      sheet.addCell(new Label(2, offset, "Average senses addressed"));
      sheet.addCell(new Label(3, offset, "Probability of addresing the correct sense"));
      sheet.addCell(new Label(4, offset, "Average polisemy"));
      sheet.addCell(new Label(5, offset, "Average score"));
      offset++;
      int i=0;
      double total=0.0d;
      ArrayList<ArrayList<Decision>> ds=new ArrayList<ArrayList<Decision>>(test.getAnswers().size());
     
      for(ArrayList<Decision> decisions:test.getAnswers())
      {
        ArrayList<Decision> d=new ArrayList<Decision>(decisions.size());
        for(Decision decision:decisions)
        {
          if(decision.isAttempted())
          {
            d.add(decision);
          }
        }
        ds.add(d);
      }
     
      for(ArrayList<Decision> decisions:ds)
      {
        total+=(double)decisions.size();
      }
      double gwords=0.0d;
      double gsenses=0.0d;
      double gcorrect=0.0d;
      double gpolisemy=0.0d;
      double gscore=0.0d;
     
      for(ArrayList<Decision> decisions:ds)
      {
        double words=0.0d;
        double senses=0.0d;
        double correct=0.0d;
        double polisemy=0.0d;
        double score=0.0d;
        sheet.addCell(new Label(0,offset,test.getDocs().get(i).toString()));
        for(Decision decision:decisions)
        {
          words+=((double)Util.removeDuplicates(
              decision.getDecisionWords(decision.getAnswers()[0])
              ).size())/((double)decisions.size());
          gwords+=((double)Util.removeDuplicates(
              decision.getDecisionWords(decision.getAnswers()[0])
              ).size())/total;
          senses+=(double)decision.getSensesAddressedCount()/((double)decisions.size());
          gsenses+=(double)decision.getSensesAddressedCount()/total;
          if(decision.isAttempted())
          {
            score+=decision.getWeights()[decision.getAnswers()[0]]/((double)decisions.size());
            gscore+=decision.getWeights()[decision.getAnswers()[0]]/total;
          }
          if(decision.isCorrectSenseAddressed())
          {
            correct+=1.0d/((double)decisions.size());
            gcorrect+=1.0d/total;
          }
          polisemy+=(double)decision.getSenseCount()/((double)decisions.size());
          gpolisemy+=(double)decision.getSenseCount()/total;
        }
        sheet.addCell(new Number(1, offset, words,number));
        sheet.addCell(new Number(2, offset, senses,number));
        sheet.addCell(new Number(3, offset, correct,number));
        sheet.addCell(new Number(4, offset, polisemy,number));
        sheet.addCell(new Number(5, offset, score,number));
        offset++;
        i++;
      }
      sheet.addCell(new Label(0,offset,"overall"));
      sheet.addCell(new Number(1, offset, gwords,boldnumber));
      sheet.addCell(new Number(2, offset, gsenses,boldnumber));
      sheet.addCell(new Number(3, offset, gcorrect,boldnumber));
      sheet.addCell(new Number(4, offset, gpolisemy,boldnumber));
      sheet.addCell(new Number(5, offset, gscore,boldnumber));
      offset++;
      offset++;
    }
  }

  /**
   * Adds a sheet with the detailed performance observed in different sense numbers.
   * This sheet was created for confirming that the answers of WSDAlgorithms follow a Zipfian like distribution.
   * @param sheet The sheet to be filled.
   * @param tests The tests to be included on this file.
   * @throws Exception
   */
    private static void addSenseSheet(WritableSheet sheet, ArrayList<Test> tests) throws Exception
    {
     
      //Document summary
      int offset=0;
      int t=1;
      for(Test test:tests)
      {       
        int doc=0;
        offset++;
        int foffset=offset;
        sheet.addCell(new Label(0, offset, "Test "+t));
        t++;
        double given[]=new double[XLSWriter.maxSenseNumbers];
        double correct[]=new double[XLSWriter.maxSenseNumbers];
        double ans[]=new double[XLSWriter.maxSenseNumbers];
        double add[]=new double[XLSWriter.maxSenseNumbers];
        double addc[]=new double[XLSWriter.maxSenseNumbers];
        double pos[][]=new double[4][XLSWriter.maxSenseNumbers];
        double posc[][]=new double[4][XLSWriter.maxSenseNumbers];
        for(int i=0;i<XLSWriter.maxSenseNumbers;i++)
        {
          given[i]=0.0;
          correct[i]=0.0;
          ans[i]=0.0;
          add[i]=0.0;
          addc[i]=0.0;
          for(int j=0;j<4;j++)
          {
            pos[j][i]=0.0;
            posc[j][i]=0.0;
          }
        }

        for(ArrayList<Decision> decisions:test.getAnswers())
        {
          offset++;
          sheet.addCell(new Label(1, offset, test.getDocs().get(doc).toString()));
          doc++;
          offset++;
          sheet.addCell(new Label(0, offset, "Sense number"));
          sheet.addCell(new Label(1, offset, "Given answers"));
          sheet.addCell(new Label(2, offset, "Correct answers"));
          sheet.addCell(new Label(3, offset, "Total answers"));
          sheet.addCell(new Label(4, offset, "Total addresed"));
          sheet.addCell(new Label(5, offset, "Total addresed correctly"));
          sheet.addCell(new Label(6, offset, "P"));
          sheet.addCell(new Label(7, offset, "R"));
          sheet.addCell(new Label(8, offset, "C"));
          sheet.addCell(new Label(9, offset, "OverFit"));
          sheet.addCell(new Label(10, offset, "Addressing precision"));
          offset++;
          double lgiven[]=new double[XLSWriter.maxSenseNumbers];
          double lcorrect[]=new double[XLSWriter.maxSenseNumbers];
          double lans[]=new double[XLSWriter.maxSenseNumbers];
          double ladd[]=new double[XLSWriter.maxSenseNumbers];
          double laddc[]=new double[XLSWriter.maxSenseNumbers];
          for(int i=0;i<XLSWriter.maxSenseNumbers;i++)
          {
            lgiven[i]=0.0;
            lcorrect[i]=0.0;
            lans[i]=0.0;
            ladd[i]=0.0;
            laddc[i]=0.0;
          }
          for(Decision d:decisions)
          {
            for(int a:d.getAnswers())
            {
              lgiven[a]+=1.0;
              given[a]+=1.0;
              if(d.isCorrect(a))
              {
                lcorrect[a]+=1.0;
                correct[a]+=1.0;
                if(d.getTarget().getPos().equals(""))
                  posc[0][a]+=1.0;
                else
                  posc[WordNet.getPOS(d.getTarget().getPos())][a]+=1.0;
              }
              if(d.getTarget().getPos().equals(""))
                pos[0][a]+=1.0;
              else
                pos[WordNet.getPOS(d.getTarget().getPos())][a]+=1.0;
            }
            for(int a:d.getTarget().getCorrectSenseNumbers())
            {
              lans[a]+=1.0;
              ans[a]+=1.0;
            }
            for(int a=0;a<d.getWeights().length;a++)
            {
              if(d.getWeights()[a]>0.0)
              {
                ladd[a]+=1.0;
                add[a]+=1.0;
                if(d.isCorrect(a))
                {
                  laddc[a]+=1.0;
                  addc[a]+=1.0;
                }
              }
            }
          }
          //Print
          for(int i=0;i<10;i++)
          {
            sheet.addCell(new Label(0, offset, String.valueOf(i+1)));
            sheet.addCell(new Number(1, offset, lgiven[i]));
            sheet.addCell(new Number(2, offset, lcorrect[i]));
            sheet.addCell(new Number(3, offset, lans[i]));
            sheet.addCell(new Number(4, offset, ladd[i]));
            sheet.addCell(new Number(5, offset, laddc[i]));
            sheet.addCell(new Number(6, offset, 100.0*lcorrect[i]/lgiven[i]));
            sheet.addCell(new Number(7, offset, 100.0*lcorrect[i]/lans[i]));
            sheet.addCell(new Number(8, offset, 100.0*lgiven[i]/lans[i]));
            sheet.addCell(new Number(9, offset, 100.0*ladd[i]/lans[i]-100.0));
            sheet.addCell(new Number(10, offset, 100.0*laddc[i]/ladd[i]));
            offset++;
          }
        }
        sheet.addCell(new Label(12, foffset, "Overall"));
        foffset++;
        sheet.addCell(new Label(12, foffset, "Sense number"));
        sheet.addCell(new Label(13, foffset, "Given answers"));
        sheet.addCell(new Label(14, foffset, "Correct answers"));
        sheet.addCell(new Label(15, foffset, "Total answers"));
        sheet.addCell(new Label(16, foffset, "Total addresed"));
        sheet.addCell(new Label(17, foffset, "Total addresed correctly"));
        sheet.addCell(new Label(18, foffset, "P"));
        sheet.addCell(new Label(19, foffset, "R"));
        sheet.addCell(new Label(20, foffset, "C"));
        sheet.addCell(new Label(21, foffset, "OverFit"));
        sheet.addCell(new Label(22, foffset, "Addressing precision"));
        sheet.addCell(new Label(23, foffset, "Noun P"));
        sheet.addCell(new Label(24, foffset, "Verb P"));
        sheet.addCell(new Label(25, foffset, "Adjective P"));
        sheet.addCell(new Label(26, foffset, "Adverb P"));
        foffset++;
        for(int i=0;i<10;i++)
        {
          sheet.addCell(new Label(12, foffset, String.valueOf(i+1)));
          sheet.addCell(new Number(13, foffset, given[i]));
          sheet.addCell(new Number(14, foffset, correct[i]));
          sheet.addCell(new Number(15, foffset, ans[i]));
          sheet.addCell(new Number(16, foffset, add[i]));
          sheet.addCell(new Number(17, foffset, addc[i]));
          sheet.addCell(new Number(18, foffset, 100.0*correct[i]/given[i]));
          sheet.addCell(new Number(19, foffset, 100.0*correct[i]/ans[i]));
          sheet.addCell(new Number(20, foffset, 100.0*given[i]/ans[i]));
          sheet.addCell(new Number(21, foffset, 100.0*add[i]/ans[i]-100.0));
          sheet.addCell(new Number(22, foffset, 100.0*addc[i]/add[i]));
          for(int j=0;j<4;j++)
            sheet.addCell(new Number(23+j, foffset, 100.0*posc[j][i]/pos[j][i]));
          foffset++;
        }
      }
    }

 
TOP

Related Classes of gannuWSD.testing.XLSWriter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.