package gannuWSD.testing;
import gannuNLP.data.AmbiguousWord;
import gannuNLP.data.Input;
import gannuNLP.dictionaries.Dictionary;
import gannuNLP.dictionaries.WordNet;
import gannuUtil.KeyString;
import gannuUtil.Util;
import gannuWSD.skipfilters.SkipFilter;
import gannuWSD.windowfilters.WindowFilter;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map.Entry;
import jxl.Workbook;
import jxl.write.Label;
import jxl.write.Number;
import jxl.write.NumberFormat;
import jxl.write.WritableCellFormat;
import jxl.write.WritableFont;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
/**
* Class for writing the test result in XLS format.
* @author Francisco Viveros-Jiménez
*
*/
public final class XLSWriter {
/**
* Number format for the cells having numeric values.
*/
static WritableCellFormat number = new WritableCellFormat(new NumberFormat("###.##"));
/**
* Bold number format for the cells having numeric values.
*/
static WritableCellFormat boldnumber= new WritableCellFormat(new WritableFont(WritableFont.ARIAL, 11, WritableFont.BOLD, true),new NumberFormat("###.##"));
/**
* Maximum intervals for IDF values ranging from [0,0.5,...,idfIntervals/2].
*/
static int idfIntervals=50;
/**
* Maximum polysemy allowed.
*/
static int maxSenseNumbers=2000;
/**
* Calculates all the necessary IDF values and stores them in the KeyString list.
* @param target List for storing the generated IDF values.
* @param tests Target test results.
* @throws Exception
*/
static void retrieveIDFs(ArrayList<KeyString> target,ArrayList<Test> tests)throws Exception
{
System.out.println("Calculating IDF values please wait some minutes...");
for(Input in:tests.get(0).getDocs())
{
for(AmbiguousWord word:in.getAmbiguousWords())
{
if(word.getSenses().size()>0)
{
KeyString aux=new KeyString(word.getLemma(),String.valueOf(word.getIDF()));
if(!target.contains(aux))
{
target.add(aux);
}
}
}
}
Dictionary dict=tests.get(0).getDictionary();
for(Test test:tests)
{
for(Summary s:test.getSummaries())
{
for(Decision d:s.getDecisions())
{
for(int sense=0;sense<d.getSenseCount();sense++)
{
for(String word:d.getDecisionWords(sense))
{
KeyString aux=new KeyString(word);
if(!target.contains(aux))
{
target.add(new KeyString(word,String.valueOf(dict.getIDF(word))));
}
}
}
}
}
}
}
/**
* Method for creating an R script for performing statistical analysis.
* @param f Target file.
* @param tests Target test results.
* @throws Exception
*/
public static void generateRScript(File f, ArrayList<Test> tests)throws Exception
{
System.out.println("Creating R script ");
f.createNewFile();
FileWriter fw=new FileWriter(f);
BufferedWriter out=new BufferedWriter(fw);
int i=1;
for(Test test:tests)
{
String name="test"+String.valueOf(i);
int dd=0;
for(ArrayList<Decision> ds:test.getAnswers())
{
for(int d=0;d<ds.size();d++)
{
if((d==(0))&&(dd==(0)))
out.write(name+"<-c("+String.valueOf(ds.get(d).getScore())+");\n");
else
out.write(name+"<-c("+name+","+String.valueOf(ds.get(d).getScore())+");\n");
}
dd++;
}
i++;
}
for(i=1;i<=tests.size();i++)
{
for(int j=1;j<=tests.size();j++)
{
out.write("data<-wilcox.test(test"+String.valueOf(i)+",test"+String.valueOf(j)+");\n");
if(i==1&&j==1)
out.write("results<-c(data$p.value);\n");
else
out.write("results<-c(results,data$p.value);\n");
}
}
out.write("m=matrix(data=results,nrow="+tests.size()+",ncol="+tests.size()+");\n m \n");
out.close();
fw.close();
}
/**
* Writes an XLS file.
* @param file File to be created.
* @param tests The tests to be included in the file.
* @param detailed Tells if a decisions detail sheet will be added to the document.
* @throws Exception
*/
public static void writeXLS(File file, ArrayList<Test> tests, String summary,String detail,long time) throws Exception
{
ArrayList<KeyString> idflist=new ArrayList<KeyString>();
System.out.println("Saving "+file.getName());
WritableWorkbook workbook = Workbook.createWorkbook(file);
WritableSheet header = workbook.createSheet("Experimental setup", 0);
addHeader(header,tests);
if(summary.contains("all")||summary.contains("performance"))
{
System.out.println("Generating performance summary sheet");
WritableSheet sheet = workbook.createSheet("Performance summary", workbook.getNumberOfSheets());
addPerformanceSheet(sheet,tests,time);
}
if(summary.contains("all")||summary.contains("disambiguation"))
{
System.out.println("Generating disambiguation summary sheet");
WritableSheet sheet = workbook.createSheet("Words leading to disambiguation", workbook.getNumberOfSheets());
addDisambiguationSheet(sheet,tests);
}
if(summary.contains("all")||summary.contains("words"))
{
System.out.println("Problem summary sheet");
WritableSheet docs = workbook.createSheet("Disambiguated words", workbook.getNumberOfSheets());
addProblemSheet(docs,tests);
}
if(summary.contains("all")||summary.contains("overlap"))
{
System.out.println("Generating overlap summary sheet");
WritableSheet sheet = workbook.createSheet("Overlap by IDF summary", workbook.getNumberOfSheets());
if(idflist.size()==0)
XLSWriter.retrieveIDFs(idflist, tests);
addOverlapSheet(sheet, tests,idflist);
}
if(summary.contains("all")||summary.contains("sense"))
{
System.out.println("Generating sense stats sheet");
WritableSheet sheet = workbook.createSheet("Algorithm behavior", workbook.getNumberOfSheets());
addMiscSheet(sheet, tests);
}
if(detail.contains("all")||detail.contains("performance"))
{
System.out.println("Generating performance detail sheet");
WritableSheet sheet = workbook.createSheet("Performance detail", workbook.getNumberOfSheets());
addPerformanceDetailSheet(sheet,tests);
}
if(detail.contains("all")||detail.contains("disambiguation"))
{
System.out.println("Generating disambiguation detail sheet");
WritableSheet sheet = workbook.createSheet("Disambiguation detail", workbook.getNumberOfSheets());
if(idflist.size()==0)
XLSWriter.retrieveIDFs(idflist, tests);
addDisambiguationDetailSheet(sheet,tests,idflist);
}
if(detail.contains("all")||detail.contains("words"))
{
System.out.println("Generating problem detail sheet");
WritableSheet docs = workbook.createSheet("Problem detail", workbook.getNumberOfSheets());
if(idflist.size()==0)
XLSWriter.retrieveIDFs(idflist, tests);
addProblemDetailSheet(docs,tests,idflist);
}
if(detail.contains("overlap"))
{
System.out.println("Generating overlap detail sheet");
WritableSheet sheet = workbook.createSheet("Overlap detail", workbook.getNumberOfSheets());
addOverlapDetailSheet(sheet,tests);
}
if(detail.contains("all")||detail.contains("sense"))
{
System.out.println("Generating sense detail sheet");
WritableSheet sheet = workbook.createSheet("Disambiguation by sense number", workbook.getNumberOfSheets());
addSenseSheet(sheet,tests);
}
workbook.write();
workbook.close();
System.out.println(file.getName()+" saved");
}
/**
* Sheet describing the file contents.
* @param sheet The sheet to be filled.
* @param tests Tests included in this file.
* @throws Exception
*/
private static void addHeader(WritableSheet sheet, ArrayList<Test> tests)throws Exception
{
Test sample=tests.get(0);
sheet.addCell(new Label(0, 0, "CICWSD output file. Created by: Viveros-Jimenez F., Gelbukh A. & Sidorov G."));
sheet.addCell(new Label(0, 2, "Knowledge Source:"));
sheet.addCell(new Label(1, 2, sample.getKNSources()));
sheet.addCell(new Label(4, 2, "Dictionary:"));
sheet.addCell(new Label(5, 2, sample.getDictionary().toString()));
sheet.addCell(new Label(1, 3, "* Retrieved Senses:"));
sheet.addCell(new Label(2, 3, sample.getRetrievedSenses()));
sheet.addCell(new Label(0, 5, "Tests:"));
int offset=6;
int i=1;
for(Test test:tests)
{
sheet.addCell(new Label(1, offset, "Test "+i));
offset++;
sheet.addCell(new Label(2, offset, "* WSD method:"));
sheet.addCell(new Label(3, offset, test.getAlgorithm().toString()));
offset++;
sheet.addCell(new Label(2, offset, "* Back-off method:"));
if(test.getBackoff()!=null)
sheet.addCell(new Label(3, offset, test.getBackoff().toString()));
else
sheet.addCell(new Label(3, offset, "none"));
offset++;
sheet.addCell(new Label(2, offset, "* Tie solving method:"));
if(test.getTie()!=null)
sheet.addCell(new Label(3, offset, test.getTie().toString()));
else
sheet.addCell(new Label(3, offset, "none"));
offset++;
sheet.addCell(new Label(2, offset, "* Skip Filters for WSD:"));
offset++;
for(SkipFilter condition:test.getAlgorithm().getSkipFilters())
{
sheet.addCell(new Label(3, offset, condition.toString()));
offset++;
}
if(test.getAlgorithm().getSkipFilters().size()==0)
{
sheet.addCell(new Label(3, offset, "none"));
offset++;
}
offset++;
if(test.getTie()!=null)
{
offset++;
sheet.addCell(new Label(2, offset, "* Skip Filters for tie:"));
offset++;
for(SkipFilter condition:test.getTie().getSkipFilters())
{
sheet.addCell(new Label(3, offset, condition.toString()));
offset++;
}
if(test.getTie().getSkipFilters().size()==0)
{
sheet.addCell(new Label(3, offset, "none"));
offset++;
}
offset++;
}
offset++;
if(test.getBackoff()!=null)
{
offset++;
sheet.addCell(new Label(2, offset, "* Skip Filters for backoff:"));
offset++;
for(SkipFilter condition:test.getBackoff().getSkipFilters())
{
sheet.addCell(new Label(3, offset, condition.toString()));
offset++;
}
if(test.getBackoff().getSkipFilters().size()==0)
{
sheet.addCell(new Label(3, offset, "none"));
offset++;
}
offset++;
}
offset++;
sheet.addCell(new Label(2, offset, "* Window Selection Filters for WSD:"));
offset++;
for(WindowFilter condition:test.getAlgorithm().getWindowFilters())
{
sheet.addCell(new Label(3, offset, condition.toString()));
offset++;
}
if(test.getAlgorithm().getWindowFilters().size()==0)
{
sheet.addCell(new Label(3, offset, "none"));
offset++;
}
offset++;
if(test.getTie()!=null)
{
offset++;
sheet.addCell(new Label(2, offset, "* Window Selection Filters for tie:"));
offset++;
for(WindowFilter condition:test.getTie().getWindowFilters())
{
sheet.addCell(new Label(3, offset, condition.toString()));
offset++;
}
if(test.getTie().getWindowFilters().size()==0)
{
sheet.addCell(new Label(3, offset, "none"));
offset++;
}
offset++;
}
offset++;
if(test.getBackoff()!=null)
{
offset++;
sheet.addCell(new Label(2, offset, "* Window Selection Filters for backoff:"));
offset++;
for(WindowFilter condition:test.getBackoff().getWindowFilters())
{
sheet.addCell(new Label(3, offset, condition.toString()));
offset++;
}
if(test.getBackoff().getWindowFilters().size()==0)
{
sheet.addCell(new Label(3, offset, "none"));
offset++;
}
offset++;
}
i++;
}
}
/**
* Writes a overlap detail sheet.
* This sheet contains a more detailed explanation of how decisions were made.
* @param sheet The sheet to be created.
* @param tests Data for creating the sheet.
* @throws Exception
*/
private static void addOverlapDetailSheet(WritableSheet sheet, ArrayList<Test> tests)throws Exception
{
int goffset=0;
int t=1;
for(Test test:tests)
{
int offset=4;
sheet.addCell(new Label(goffset, offset, "Tests "+t));
offset++;
t++;
for(int i=0;i<test.getDocs().size();i++)
{
Input doc=test.getDocs().get(i);
ArrayList<Decision> decisions=test.getAnswers().get(i);
sheet.addCell(new Label(goffset, offset, "Document:"));
sheet.addCell(new Label(goffset+1, offset, doc.toString()));
offset++;
for(Decision decision:decisions)
{
if(decision.isAttempted())
{
sheet.addCell(new Label(goffset+1,offset,"Lemma"));
sheet.addCell(new Label(goffset+2,offset,"Polysemy"));
sheet.addCell(new Label(goffset+3,offset,"Correct Answers"));
sheet.addCell(new Label(goffset+4,offset,"Approach Answer"));
sheet.addCell(new Label(goffset+5,offset,"Window"));
offset++;
sheet.addCell(new Label(goffset+1,offset,decision.getTarget().getLemma()));
sheet.addCell(new Number(goffset+2,offset,decision.getTarget().getSenses().size()));
sheet.addCell(new Label(goffset+3,offset,Arrays.toString(decision.getTarget().getCorrectSenseNumbers())));
sheet.addCell(new Label(goffset+4,offset,decision.getAnswersString()));
sheet.addCell(new Label(goffset+5,offset,decision.getWindowPrint()));
offset++;
sheet.addCell(new Label(goffset+1,offset,"Sense"));
sheet.addCell(new Label(goffset+2,offset,"Weight"));
sheet.addCell(new Label(goffset+3,offset,"Words"));
offset++;
int j=0;
for(double w:decision.getWeights())
{
if(w>0.0)
{
sheet.addCell(new Number(goffset+1,offset,j+1));
sheet.addCell(new Number(goffset+2,offset,w));
sheet.addCell(new Label(goffset+3,offset,decision.getDecisionWords(j).toString()));
offset++;
}
j++;
}
offset++;
offset++;
}
}
offset++;
i++;
}
offset++;
goffset+=6;
}
}
/**
* Writes a sheet containing detailed success rate on each attempted lemma.
* @param sheet The sheet to write to.
* @param tests Data for creating the sheet.
* @throws Exception
*/
private static void addDisambiguationDetailSheet(WritableSheet sheet, ArrayList<Test> tests, ArrayList<KeyString> idfList) throws Exception
{
int goffset=4;
int t=1;
for(Test test:tests)
{
HashMap<String,int[]> globalLemmas=new HashMap<String,int []>();
int i=0;
int docs=test.getSummaries().size();
for(Summary summary:test.getSummaries())
{
for(Entry<String,int[]> entry:summary.getDecisionLemmas().entrySet())
{
if(globalLemmas.containsKey(entry.getKey()))
{
int []count=globalLemmas.get(entry.getKey());
int []aux=entry.getValue();
count[i]+=aux[0];
count[docs]+=aux[0];
count[docs+1]+=aux[1];
}
else
{
int []count=new int[docs+2];
for(int j=0;j<(docs+1);j++)
count[j]=0;
int []aux=entry.getValue();
count[i]+=aux[0];
count[docs]+=aux[0];
count[docs+1]+=aux[1];
globalLemmas.put(entry.getKey(), count);
}
}
i++;
}
//write the header parameters of the test
int offset=0;
sheet.addCell(new Label(0, goffset, "Test "+t));
t++;
goffset++;
sheet.addCell(new Label(0, goffset, "Lemma"));
for(Input doc:test.getDocs())
{
sheet.addCell(new Label(1+offset, goffset, doc.toString()));
offset++;
}
sheet.addCell(new Label(1+offset, goffset, "Overall attempts"));
sheet.addCell(new Label(2+offset, goffset, "Overall correct answers"));
sheet.addCell(new Label(3+offset, goffset, "IDF"));
goffset++;
for(Entry<String,int[]> entry:globalLemmas.entrySet())
{
int[] counts=entry.getValue();
sheet.addCell(new Label(0,goffset,entry.getKey()));
for(int z=0;z<counts.length;z++)
{
if(z<counts.length)
sheet.addCell(new Number(1+z, goffset, counts[z],number));
else
sheet.addCell(new Number(1+z, goffset, counts[z],boldnumber));
}
sheet.addCell(new Number(1+counts.length, goffset,XLSWriter.getIDF(idfList, entry.getKey()) ,boldnumber));
goffset++;
}
goffset++;
goffset++;
goffset++;
}
}
/**
* Search for the IDF value of a target word in a target IDF list.
* @param idfList Target list containing lemmas and its corresponding IDF values.
* @param word Target word.
* @return Corresponding IDF value or 0.0 in case of being unavailable.
*/
private static double getIDF(ArrayList<KeyString> idfList,String word)
{
if(idfList.indexOf(new KeyString(word))<0)
return 0.0;
else
return Double.parseDouble(idfList.get(idfList.indexOf(new KeyString(word))).getString());
}
/**
* Writes a sheet containing the success rate obtained in different IDF clusters of words. The IDF values ranges are [1,1.5,2.0,...,15.0].
* @param sheet The sheet to write to.
* @param tests Data for creating the sheet.
* @throws Exception
*/
private static void addDisambiguationSheet(WritableSheet sheet, ArrayList<Test> tests) throws Exception
{
int idfcount[][][];
int doccount=tests.get(0).getDocs().size();
idfcount=new int[doccount+1][XLSWriter.idfIntervals][2];
int goffset=4;
int t=1;
for(Test test:tests)
{
for(int d=0;d<=doccount;d++)
{
for(int i=0;i<XLSWriter.idfIntervals;i++)
{
idfcount[d][i][0]=0;
idfcount[d][i][1]=0;
}
}
int d=0;
for(Summary summary:test.getSummaries())
{
for(Decision D:summary.getDecisions())
{
if(D.isAttempted())
{
double idf=D.getTarget().getIDF();
int i=((int)Math.floor(idf/0.5));
idfcount[d][i][0]++;
idfcount[doccount][i][0]++;
if(D.isCorrectSenseAddressed())
{
idfcount[d][i][1]++;
idfcount[doccount][i][1]++;
}
}
}
d++;
}
//write the header parameters of the test
sheet.addCell(new Label(0, goffset, "Test "+t));
t++;
for(int type=0;type<2;type++)
{
int offset=0;
goffset++;
if(type==0)
sheet.addCell(new Label(0, goffset, "Attempts"));
else
sheet.addCell(new Label(0, goffset, "Correct"));
int i=1;
for(double idf=1.0;idf<15.1;idf+=0.5)
{
sheet.addCell(new Label(offset+i, goffset, "IDF = "+String.valueOf(idf)));
i++;
}
goffset++;
i=0;
for(Input doc:test.getDocs())
{
sheet.addCell(new Label(0, goffset+i, doc.toString()));
i++;
}
sheet.addCell(new Label(0, goffset+i, "Overall:"));
offset++;
for(d=0;d<=doccount;d++)
{
for(i=0;i<29;i++)
{
sheet.addCell(new Number(offset+i, goffset+d, idfcount[d][i][type]));
}
}
goffset+=doccount;
goffset++;
goffset++;
goffset++;
}
}
}
/**
* Writes a sheet containing the success rate obtained in different IDF clusters of words. The IDF values ranges are [1,1.5,2.0,...,15.0].
* @param sheet The sheet to write to.
* @param tests Data for creating the sheet.
* @throws Exception
*/
//TODO
private static void addOverlapSheet(WritableSheet sheet, ArrayList<Test> tests, ArrayList<KeyString> idfList) throws Exception
{
int idfcount[][][];
int doccount=tests.get(0).getDocs().size();
idfcount=new int[doccount+1][XLSWriter.idfIntervals][2];
int goffset=4;
int t=1;
for(Test test:tests)
{
for(int d=0;d<=doccount;d++)
{
for(int i=0;i<XLSWriter.idfIntervals;i++)
{
idfcount[d][i][0]=0;
idfcount[d][i][1]=0;
}
}
int d=0;
for(Summary summary:test.getSummaries())
{
for(Decision ds:summary.getDecisions())
{
for(int s=0;s<ds.getSenseCount();s++)
{
for(String word:ds.getDecisionWords(s))
{
double idf=XLSWriter.getIDF(idfList, word);
int i=((int)Math.floor(idf/0.5));
idfcount[d][i][0]+=1.0;
idfcount[doccount][i][0]+=1.0;
if(ds.isCorrect(s))
{
idfcount[d][i][1]+=1.0;
idfcount[doccount][i][1]+=1.0;
}
}
}
}
d++;
}
//write the header parameters of the test
sheet.addCell(new Label(0, goffset, "Test "+t));
t++;
for(int type=0;type<2;type++)
{
int offset=0;
goffset++;
if(type==0)
sheet.addCell(new Label(0, goffset, "Attempts"));
else
sheet.addCell(new Label(0, goffset, "Correct"));
int i=1;
for(double idf=1.0;idf<15.1;idf+=0.5)
{
sheet.addCell(new Label(offset+i, goffset, "IDF = "+String.valueOf(idf)));
i++;
}
goffset++;
i=0;
for(Input doc:test.getDocs())
{
sheet.addCell(new Label(0, goffset+i, doc.toString()));
i++;
}
sheet.addCell(new Label(0, goffset+i, "Overall:"));
offset++;
for(d=0;d<=doccount;d++)
{
for(i=0;i<29;i++)
{
sheet.addCell(new Number(offset+i, goffset+d, idfcount[d][i][type]));
}
}
goffset+=doccount;
goffset++;
goffset++;
goffset++;
}
}
}
/**
* Generates a summary of the frequencies of IDF clusters in each target text.
* @param sheet The sheet to write to.
* @param tests Data for creating the sheet.
* @throws Exception
*/
private static void addProblemSheet(WritableSheet sheet, ArrayList<Test> tests) throws Exception
{
int idfcount[][];
int doccount=tests.get(0).getDocs().size();
idfcount=new int[doccount+1][XLSWriter.idfIntervals];
for(int d=0;d<=doccount;d++)
{
for(int i=0;i<XLSWriter.idfIntervals;i++)
{
idfcount[d][i]=0;
}
}
int goffset=4;
int d=0;
for(Input input:tests.get(0).getDocs())
{
for(AmbiguousWord word:input.getAmbiguousWords())
{
double idf=word.getIDF();
int i=((int)Math.floor(idf/0.5));
idfcount[d][i]++;
idfcount[doccount][i]++;
}
d++;
}
int offset=0;
goffset++;
int i=1;
for(double idf=1.0;idf<15.1;idf+=0.5)
{
sheet.addCell(new Label(offset+i, goffset, "IDF = "+String.valueOf(idf)));
i++;
}
goffset++;
i=0;
for(Input doc:tests.get(0).getDocs())
{
sheet.addCell(new Label(0, goffset+i, doc.toString()));
i++;
}
sheet.addCell(new Label(0, goffset+i, "Overall:"));
offset++;
for(d=0;d<=doccount;d++)
{
for(i=0;i<29;i++)
{
sheet.addCell(new Number(offset+i, goffset+d, idfcount[d][i]));
}
}
goffset+=doccount;
goffset++;
goffset++;
goffset++;
}
/**
* Generates a summary of the lemmas on the test-bed
* @param sheet The sheet to write to.
* @param tests Data for creating the sheet.
* @throws Exception
*/
private static void addProblemDetailSheet(WritableSheet sheet, ArrayList<Test> tests, ArrayList<KeyString> idfList) throws Exception
{
//Extract the decisions detail
//Header
HashMap<String,int[]> globalLemmas=new HashMap<String,int []>();
int goffset=3;
int i=0;
int docs=tests.get(0).getDocs().size();
for(Input doc:tests.get(0).getDocs())
{
for(Entry<String,ArrayList<Integer>> entry:doc.getIndex().entrySet())
{
if(globalLemmas.containsKey(entry.getKey()))
{
int []count=globalLemmas.get(entry.getKey());
count[i]+=entry.getValue().size();
count[docs]+=entry.getValue().size();
}
else
{
int []count=new int[docs+1];
for(int j=0;j<(docs+1);j++)
count[j]=0;
count[i]+=entry.getValue().size();
count[docs]+=entry.getValue().size();
globalLemmas.put(entry.getKey(), count);
}
}
i++;
}
sheet.addCell(new Label(0, goffset, "Lemma"));
int offset=0;
for(Input doc:tests.get(0).getDocs())
{
sheet.addCell(new Label(1+offset, goffset, doc.toString()));
offset++;
}
sheet.addCell(new Label(1+offset, goffset, "Overall appareances"));
sheet.addCell(new Label(2+offset, goffset, "IDF"));
goffset++;
for(Entry<String,int[]> entry:globalLemmas.entrySet())
{
int[] counts=entry.getValue();
sheet.addCell(new Label(0,goffset,entry.getKey()));
int z;
for(z=0;z<counts.length;z++)
{
if(z==(counts.length-1))
sheet.addCell(new Number(1+z, goffset, counts[z],boldnumber));
else
sheet.addCell(new Number(1+z, goffset, counts[z],number));
}
sheet.addCell(new Number(1+z,goffset,XLSWriter.getIDF(idfList,entry.getKey()),boldnumber));
goffset++;
}
}
/**
* Writes a summary of the performance of the algorithm.
* @param sheet The sheet to write to.
* @param tests Data for creating the sheet.
* @throws Exception
*/
private static void addPerformanceSheet(WritableSheet sheet, ArrayList<Test> tests,long time) throws Exception
{
int offset=3;
int t=1;
for(Test test:tests)
{
sheet.addCell(new Label(0, offset, "Test "+t));
offset++;
t++;
sheet.addCell(new Label(0, offset, "Document"));
sheet.addCell(new Label(1, offset, "Precision"));
sheet.addCell(new Label(2, offset, "Recall"));
sheet.addCell(new Label(3, offset, "Coverage"));
sheet.addCell(new Label(4, offset, "F1-measure"));
sheet.addCell(new Label(5, offset, "Time"));
offset++;
ArrayList<Summary> global=new ArrayList<Summary>();
int d=0;
for(Input doc:test.getDocs())
{
sheet.addCell(new Label(0, offset, doc.toString()));
Summary summary=test.getSummaries().get(d);
double[]p=Measures.Precision(summary);
double[]r=Measures.Recall(summary);
double[]c=Measures.Coverage(summary);
double[]f=Measures.Fmeasure(summary);
sheet.addCell(new Number(1, offset, p[4],boldnumber));
sheet.addCell(new Number(2, offset, r[4],boldnumber));
sheet.addCell(new Number(3, offset, c[4],boldnumber));
sheet.addCell(new Number(4, offset, f[4],boldnumber));
global.add(summary);
d++;
offset++;
}
sheet.addCell(new Label(0,offset,"Overall:"));
double[]p=Measures.Precision(global);
double[]r=Measures.Recall(global);
double[]c=Measures.Coverage(global);
double[]f=Measures.Fmeasure(global);
sheet.addCell(new Number(1, offset, p[4],boldnumber));
sheet.addCell(new Number(2, offset, r[4],boldnumber));
sheet.addCell(new Number(3, offset, c[4],boldnumber));
sheet.addCell(new Number(4, offset, f[4],boldnumber));
sheet.addCell(new Number(5, offset, time,boldnumber));
offset++;
offset++;
offset++;
}
}
/**
* Writes a sheet containing the detailed performance results for each open-class type.
* @param sheet The sheet to write to.
* @param tests Data for creating the sheet.
* @throws Exception
*/
private static void addPerformanceDetailSheet(WritableSheet sheet, ArrayList<Test> tests) throws Exception
{
int offset=3;
int t=1;
for(Test test:tests)
{
sheet.addCell(new Label(0, offset, "Test "+t));
t++;
sheet.addCell(new Label(2, offset, "Nouns"));
sheet.addCell(new Label(6, offset, "Verbs"));
sheet.addCell(new Label(10, offset, "Adjectives"));
sheet.addCell(new Label(14, offset, "Adverbs"));
offset++;
sheet.addCell(new Label(0, offset, "Document"));
for(int i=0;i<4;i++)
{
sheet.addCell(new Label(i*4+1, offset, "Precision"));
sheet.addCell(new Label(i*4+2, offset, "Recall"));
sheet.addCell(new Label(i*4+3, offset, "Coverage"));
sheet.addCell(new Label(i*4+4, offset, "F1-measure"));
}
offset++;
ArrayList<Summary> global=new ArrayList<Summary>();
int d=0;
for(Input doc:test.getDocs())
{
sheet.addCell(new Label(0, offset, doc.toString()));
Summary summary=test.getSummaries().get(d);
double[]p=Measures.Precision(summary);
double[]r=Measures.Recall(summary);
double[]c=Measures.Coverage(summary);
double[]f=Measures.Fmeasure(summary);
for(int j=0;j<4;j++)
{
if(j==4)
{
sheet.addCell(new Number(j*4+1, offset, p[j],boldnumber));
sheet.addCell(new Number(j*4+2, offset, r[j],boldnumber));
sheet.addCell(new Number(j*4+3, offset, c[j],boldnumber));
sheet.addCell(new Number(j*4+4, offset, f[j],boldnumber));
}
else
{
sheet.addCell(new Number(j*4+1, offset, p[j],number));
sheet.addCell(new Number(j*4+2, offset, r[j],number));
sheet.addCell(new Number(j*4+3, offset, c[j],number));
sheet.addCell(new Number(j*4+4, offset, f[j],number));
}
}
global.add(summary);
d++;
offset++;
}
sheet.addCell(new Label(0,offset,"Overall:"));
double[]p=Measures.Precision(global);
double[]r=Measures.Recall(global);
double[]c=Measures.Coverage(global);
double[]f=Measures.Fmeasure(global);
for(int j=0;j<4;j++)
{
sheet.addCell(new Number(j*4+1, offset, p[j],boldnumber));
sheet.addCell(new Number(j*4+2, offset, r[j],boldnumber));
sheet.addCell(new Number(j*4+3, offset, c[j],boldnumber));
sheet.addCell(new Number(j*4+4, offset, f[j],boldnumber));
}
offset++;
offset++;
offset++;
}
}
/**
* Adds a sheet with some interesting summaries.
* @param sheet The sheet to be filled.
* @param tests The tests to be included on this file.
* @throws Exception
*/
private static void addMiscSheet(WritableSheet sheet, ArrayList<Test> tests) throws Exception
{
//Document summary
int offset=3;
int t=1;
for(Test test:tests)
{
sheet.addCell(new Label(0, offset, "Test "+t));
t++;
sheet.addCell(new Label(1, offset, "Average words used"));
sheet.addCell(new Label(2, offset, "Average senses addressed"));
sheet.addCell(new Label(3, offset, "Probability of addresing the correct sense"));
sheet.addCell(new Label(4, offset, "Average polisemy"));
sheet.addCell(new Label(5, offset, "Average score"));
offset++;
int i=0;
double total=0.0d;
ArrayList<ArrayList<Decision>> ds=new ArrayList<ArrayList<Decision>>(test.getAnswers().size());
for(ArrayList<Decision> decisions:test.getAnswers())
{
ArrayList<Decision> d=new ArrayList<Decision>(decisions.size());
for(Decision decision:decisions)
{
if(decision.isAttempted())
{
d.add(decision);
}
}
ds.add(d);
}
for(ArrayList<Decision> decisions:ds)
{
total+=(double)decisions.size();
}
double gwords=0.0d;
double gsenses=0.0d;
double gcorrect=0.0d;
double gpolisemy=0.0d;
double gscore=0.0d;
for(ArrayList<Decision> decisions:ds)
{
double words=0.0d;
double senses=0.0d;
double correct=0.0d;
double polisemy=0.0d;
double score=0.0d;
sheet.addCell(new Label(0,offset,test.getDocs().get(i).toString()));
for(Decision decision:decisions)
{
words+=((double)Util.removeDuplicates(
decision.getDecisionWords(decision.getAnswers()[0])
).size())/((double)decisions.size());
gwords+=((double)Util.removeDuplicates(
decision.getDecisionWords(decision.getAnswers()[0])
).size())/total;
senses+=(double)decision.getSensesAddressedCount()/((double)decisions.size());
gsenses+=(double)decision.getSensesAddressedCount()/total;
if(decision.isAttempted())
{
score+=decision.getWeights()[decision.getAnswers()[0]]/((double)decisions.size());
gscore+=decision.getWeights()[decision.getAnswers()[0]]/total;
}
if(decision.isCorrectSenseAddressed())
{
correct+=1.0d/((double)decisions.size());
gcorrect+=1.0d/total;
}
polisemy+=(double)decision.getSenseCount()/((double)decisions.size());
gpolisemy+=(double)decision.getSenseCount()/total;
}
sheet.addCell(new Number(1, offset, words,number));
sheet.addCell(new Number(2, offset, senses,number));
sheet.addCell(new Number(3, offset, correct,number));
sheet.addCell(new Number(4, offset, polisemy,number));
sheet.addCell(new Number(5, offset, score,number));
offset++;
i++;
}
sheet.addCell(new Label(0,offset,"overall"));
sheet.addCell(new Number(1, offset, gwords,boldnumber));
sheet.addCell(new Number(2, offset, gsenses,boldnumber));
sheet.addCell(new Number(3, offset, gcorrect,boldnumber));
sheet.addCell(new Number(4, offset, gpolisemy,boldnumber));
sheet.addCell(new Number(5, offset, gscore,boldnumber));
offset++;
offset++;
}
}
/**
* Adds a sheet with the detailed performance observed in different sense numbers.
* This sheet was created for confirming that the answers of WSDAlgorithms follow a Zipfian like distribution.
* @param sheet The sheet to be filled.
* @param tests The tests to be included on this file.
* @throws Exception
*/
private static void addSenseSheet(WritableSheet sheet, ArrayList<Test> tests) throws Exception
{
//Document summary
int offset=0;
int t=1;
for(Test test:tests)
{
int doc=0;
offset++;
int foffset=offset;
sheet.addCell(new Label(0, offset, "Test "+t));
t++;
double given[]=new double[XLSWriter.maxSenseNumbers];
double correct[]=new double[XLSWriter.maxSenseNumbers];
double ans[]=new double[XLSWriter.maxSenseNumbers];
double add[]=new double[XLSWriter.maxSenseNumbers];
double addc[]=new double[XLSWriter.maxSenseNumbers];
double pos[][]=new double[4][XLSWriter.maxSenseNumbers];
double posc[][]=new double[4][XLSWriter.maxSenseNumbers];
for(int i=0;i<XLSWriter.maxSenseNumbers;i++)
{
given[i]=0.0;
correct[i]=0.0;
ans[i]=0.0;
add[i]=0.0;
addc[i]=0.0;
for(int j=0;j<4;j++)
{
pos[j][i]=0.0;
posc[j][i]=0.0;
}
}
for(ArrayList<Decision> decisions:test.getAnswers())
{
offset++;
sheet.addCell(new Label(1, offset, test.getDocs().get(doc).toString()));
doc++;
offset++;
sheet.addCell(new Label(0, offset, "Sense number"));
sheet.addCell(new Label(1, offset, "Given answers"));
sheet.addCell(new Label(2, offset, "Correct answers"));
sheet.addCell(new Label(3, offset, "Total answers"));
sheet.addCell(new Label(4, offset, "Total addresed"));
sheet.addCell(new Label(5, offset, "Total addresed correctly"));
sheet.addCell(new Label(6, offset, "P"));
sheet.addCell(new Label(7, offset, "R"));
sheet.addCell(new Label(8, offset, "C"));
sheet.addCell(new Label(9, offset, "OverFit"));
sheet.addCell(new Label(10, offset, "Addressing precision"));
offset++;
double lgiven[]=new double[XLSWriter.maxSenseNumbers];
double lcorrect[]=new double[XLSWriter.maxSenseNumbers];
double lans[]=new double[XLSWriter.maxSenseNumbers];
double ladd[]=new double[XLSWriter.maxSenseNumbers];
double laddc[]=new double[XLSWriter.maxSenseNumbers];
for(int i=0;i<XLSWriter.maxSenseNumbers;i++)
{
lgiven[i]=0.0;
lcorrect[i]=0.0;
lans[i]=0.0;
ladd[i]=0.0;
laddc[i]=0.0;
}
for(Decision d:decisions)
{
for(int a:d.getAnswers())
{
lgiven[a]+=1.0;
given[a]+=1.0;
if(d.isCorrect(a))
{
lcorrect[a]+=1.0;
correct[a]+=1.0;
if(d.getTarget().getPos().equals(""))
posc[0][a]+=1.0;
else
posc[WordNet.getPOS(d.getTarget().getPos())][a]+=1.0;
}
if(d.getTarget().getPos().equals(""))
pos[0][a]+=1.0;
else
pos[WordNet.getPOS(d.getTarget().getPos())][a]+=1.0;
}
for(int a:d.getTarget().getCorrectSenseNumbers())
{
lans[a]+=1.0;
ans[a]+=1.0;
}
for(int a=0;a<d.getWeights().length;a++)
{
if(d.getWeights()[a]>0.0)
{
ladd[a]+=1.0;
add[a]+=1.0;
if(d.isCorrect(a))
{
laddc[a]+=1.0;
addc[a]+=1.0;
}
}
}
}
//Print
for(int i=0;i<10;i++)
{
sheet.addCell(new Label(0, offset, String.valueOf(i+1)));
sheet.addCell(new Number(1, offset, lgiven[i]));
sheet.addCell(new Number(2, offset, lcorrect[i]));
sheet.addCell(new Number(3, offset, lans[i]));
sheet.addCell(new Number(4, offset, ladd[i]));
sheet.addCell(new Number(5, offset, laddc[i]));
sheet.addCell(new Number(6, offset, 100.0*lcorrect[i]/lgiven[i]));
sheet.addCell(new Number(7, offset, 100.0*lcorrect[i]/lans[i]));
sheet.addCell(new Number(8, offset, 100.0*lgiven[i]/lans[i]));
sheet.addCell(new Number(9, offset, 100.0*ladd[i]/lans[i]-100.0));
sheet.addCell(new Number(10, offset, 100.0*laddc[i]/ladd[i]));
offset++;
}
}
sheet.addCell(new Label(12, foffset, "Overall"));
foffset++;
sheet.addCell(new Label(12, foffset, "Sense number"));
sheet.addCell(new Label(13, foffset, "Given answers"));
sheet.addCell(new Label(14, foffset, "Correct answers"));
sheet.addCell(new Label(15, foffset, "Total answers"));
sheet.addCell(new Label(16, foffset, "Total addresed"));
sheet.addCell(new Label(17, foffset, "Total addresed correctly"));
sheet.addCell(new Label(18, foffset, "P"));
sheet.addCell(new Label(19, foffset, "R"));
sheet.addCell(new Label(20, foffset, "C"));
sheet.addCell(new Label(21, foffset, "OverFit"));
sheet.addCell(new Label(22, foffset, "Addressing precision"));
sheet.addCell(new Label(23, foffset, "Noun P"));
sheet.addCell(new Label(24, foffset, "Verb P"));
sheet.addCell(new Label(25, foffset, "Adjective P"));
sheet.addCell(new Label(26, foffset, "Adverb P"));
foffset++;
for(int i=0;i<10;i++)
{
sheet.addCell(new Label(12, foffset, String.valueOf(i+1)));
sheet.addCell(new Number(13, foffset, given[i]));
sheet.addCell(new Number(14, foffset, correct[i]));
sheet.addCell(new Number(15, foffset, ans[i]));
sheet.addCell(new Number(16, foffset, add[i]));
sheet.addCell(new Number(17, foffset, addc[i]));
sheet.addCell(new Number(18, foffset, 100.0*correct[i]/given[i]));
sheet.addCell(new Number(19, foffset, 100.0*correct[i]/ans[i]));
sheet.addCell(new Number(20, foffset, 100.0*given[i]/ans[i]));
sheet.addCell(new Number(21, foffset, 100.0*add[i]/ans[i]-100.0));
sheet.addCell(new Number(22, foffset, 100.0*addc[i]/add[i]));
for(int j=0;j<4;j++)
sheet.addCell(new Number(23+j, foffset, 100.0*posc[j][i]/pos[j][i]));
foffset++;
}
}
}
}