/**
* @param args
*/
public static void main(String[] args) throws Exception {
Stemmer stemmer = new Stemmer(new PorterStemmer());
List<File> files = FileTools.getFilesFromDirectoryByName(new File("/home/ptc24/newows/reactnewpubmed"), "scrapbook.xml");
List<Event> events = new ArrayList<Event>();
List<BagEvent> eventBags = new ArrayList<BagEvent>();
for(File f : files) {
ScrapBook sb = new ScrapBook(f.getParentFile());
Document doc = (Document)sb.getDoc().copy();
Nodes nodes = doc.query("//cmlPile");
for(int i=0;i<nodes.size();i++) nodes.get(i).detach();
Document sourceDoc = (Document)doc.copy();
nodes = sourceDoc.query("//ne");
for(int i=0;i<nodes.size();i++) {
XOMTools.removeElementPreservingText((Element)nodes.get(i));
}
Document safDoc = InlineToSAF.extractSAFs(doc, sourceDoc, "foo");
ProcessingDocument procDoc = ProcessingDocumentFactory.getInstance().makeTokenisedDocument(sourceDoc, false, false, false);
//NameRecogniser nr = new NameRecogniser();
//nr.halfProcess(sourceDoc);
//nr.makeTokenisers(false);
Set<String> tokenSet = new HashSet<String>();
Bag<String> tokenBag = new Bag<String>();
for(TokenSequence t : procDoc.getTokenSequences()) {
//System.out.println(t.getSourceString());
for(Token token : t.getTokens()) {
//tokenSet.add("stem=" + stemmer.getStem(token.getValue().toLowerCase()));
//tokenSet.add(token.getValue().toLowerCase());
tokenBag.add(token.getValue().toLowerCase());
}
}
//for(String t : tokenBag.getList()) {
// System.out.println(t + "\t" + tokenBag.getCount(t));
//}
//File safFile = new File(f.getParentFile(), "saf.xml");
//Document safDoc = new Builder().build(safFile);
Nodes n = safDoc.query("/saf/annot[slot[@name='type']['PRW']]");
Set<String> wpss = new HashSet<String>();
boolean hasReact = false;
boolean hasPotentialReact = n.size() > 0;
for(int i=0;i<n.size();i++) {
Element annot = (Element)n.get(i);
String s = SafTools.getSlotValue(annot, "surface").toLowerCase();
String subtype = SafTools.getSlotValue(annot, "subtype");
if("REACT".equals(subtype)) hasReact = true;
String wps = s+"_"+subtype;
wpss.add(wps);
//tokenSet.remove(s);
//tokenSet.remove(stemmer.getStem(s));
tokenSet.add("PROTECT:" + s);
tokenSet.add("PROTECT:stem=" + stemmer.getStem(s));
}
if(hasPotentialReact) {
Event e = new Event(hasReact ? "TRUE" : "FALSE", tokenSet.toArray(new String[0]));
events.add(e);
BagEvent be = new BagEvent(hasReact ? "TRUE" : "FALSE", tokenBag);