package edu.harvard.wcfia.yoshikoder;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import org.xml.sax.SAXException;
import edu.harvard.wcfia.yoshikoder.dictionary.YKDictionary;
import edu.harvard.wcfia.yoshikoder.document.DocumentList;
import edu.harvard.wcfia.yoshikoder.document.DocumentListImpl;
import edu.harvard.wcfia.yoshikoder.document.YKDocument;
import edu.harvard.wcfia.yoshikoder.document.YKDocumentFactory;
import edu.harvard.wcfia.yoshikoder.util.FileUtil;
import edu.harvard.wcfia.yoshikoder.util.ImportUtil;
public class YKCommandLine {
protected YKDictionary dictionary;
protected File[] files;
public YKCommandLine(String[] args){
try {
parseCommandline(args);
Object[][] results = run();
System.out.println(csv(results));
} catch (Exception e){
System.err.println(e.getMessage());
}
}
protected String csv(Object [][] results){
StringBuffer sb = new StringBuffer();
for (int ii=0; ii<results.length; ii++){
for (int jj=0; jj<results[ii].length; jj++){
Object o = results[ii][jj];
if (o instanceof String)
sb.append("\"" + o + "\",");
else
sb.append(o + ",");
}
sb.setCharAt(sb.length()-1, '\n');
}
return sb.toString();
}
protected String html(Object [][] results){
StringBuffer sb = new StringBuffer();
sb.append("<html><body><table border=\"1\">\n");
for (int ii=0; ii<results.length; ii++){
sb.append("<tr>");
if (ii==0){
for (int jj=0; jj<results[ii].length; jj++){
sb.append("<th>" + results[ii][jj] + "</th>");
}
} else {
for (int jj=0; jj<results[ii].length; jj++){
sb.append("<td>" + results[ii][jj] + "</td>");
}
}
sb.append("</tr>\n");
}
sb.append("</table></body></html>");
return sb.toString();
}
protected Object[][] run(){
// do the analysis
DocumentList list = new DocumentListImpl();
for (int ii=0; ii<files.length; ii++){
YKDocument doc = YKDocumentFactory.createYKDocument(files[ii]);
list.add(doc);
}
return null;
}
// XXX use new reporting framework
/*
Object[][] table = null;
try {
Map results = new HashMap(); // doc -> report
DictionaryReport rep = dictionary.makeDictionaryReport(list);
List docs = rep.getDocuments();
for (Iterator iter = docs.iterator(); iter.hasNext();) {
YKDocument ykdoc = (YKDocument) iter.next();
EntryStatisticsMap m = rep.getSubReport(ykdoc).getEntryStatisticsMap();
results.put(ykdoc, m);
}
// get paths from first entrymap
Iterator it = results.keySet().iterator();
YKDocument ykd = (YKDocument)it.next();
EntryStatisticsMap esm = (EntryStatisticsMap)results.get(ykd);
List paths = esm.getCategoryEntries();
Collections.sort(paths);
table = new Object[paths.size()+1][list.size()+2];
// table header
table[0][0] = dictionary.getName() + " category";
table[0][1] = "Score";
int counter = 2;
for (Iterator diter = docs.iterator(); diter.hasNext();) {
YKDocument doc = (YKDocument) diter.next();
table[0][counter] = doc.getTitle();
counter++;
}
counter = 1;
for (Iterator iter = paths.iterator(); iter.hasNext();) {
String path = (String) iter.next();
//sb.append(path);
table[counter][0] = path;
Double scored = esm.getScore(path);
String sc = (scored==null) ? "None" : scored.toString();
table[counter][1] = sc;
// loop over documents
int count = 2;
for (Iterator iterator = docs.iterator(); iterator.hasNext();) {
YKDocument d = (YKDocument) iterator.next();
EntryStatisticsMap map = (EntryStatisticsMap)results.get(d);
Integer i = map.getCount(path);
table[counter][count] = i;
count++;
}
counter++;
}
} catch (Exception ioe){
ioe.printStackTrace();
System.err.println("Could not make a reports: " + ioe.getMessage());
}
return table;
}
*/
protected void parseCommandline(String[] args) throws Exception{
File f = new File(args[0]);
if (!f.exists()){
throw new Exception(f.getName() + " does not exist");
}
try {
YKProject proj = ImportUtil.importYKProject(f);
dictionary = proj.getDictionary();
} catch (SAXException sax){
throw new Exception("Could not parse the dictionary in " + f.getName());
} catch (Exception ex){
throw new Exception("Could not read " + f.getName());
}
f = new File(args[1]);
if (!f.exists()){
throw new Exception("Directory " + f.getName() + " does not exist");
}
if (!f.isDirectory()){
throw new Exception(f.getName() + " must be a directory");
}
FileFilter filter =
new FileFilter(){
public boolean accept(File arg0) {
return arg0.getName().endsWith(".txt");
}
};
files = f.listFiles(filter);
System.err.println("Analysing files:");
for (int ii = 0; ii < files.length; ii++) {
File file = files[ii];
System.err.println("\t" + file.getName());
}
System.err.println("using dictionary " + dictionary.getName());
}
protected String parseFile(File f) throws IOException {
String s = FileUtil.slurp(f, "UTF-8");
return s;
}
/**
* @param args
*/
public static void main(String[] args) {
new YKCommandLine(args);
// assume that the first argument is a dictionary file
// and the second is a directory
// output goes to stdout
}
}