// I'm sure there's a nice analytic way of doing this. Ah well...
public static void main(String[] args) {
List<Double> positiveExamples = new ArrayList<Double>();
List<Double> negativeExamples = new ArrayList<Double>();
ExtractTrainingData etd1 = ExtractTrainingData.getInstance();
List<File> sbFiles = new ArrayList<File>();
sbFiles.addAll(FileTools.getFilesFromDirectoryByName(new File("/home/ptc24/newows/goodrsc"), "scrapbook.xml"));
ExtractTrainingData etd2 = new ExtractTrainingData(sbFiles);
Set<String> chem = new HashSet<String>(etd2.chemicalWords);
//chem.removeAll(etd1.chemicalWords);
for(String w : chem) {
if(!NGramBuilder.getInstance().chemSet.contains(NGram.parseWord(w))) {
double score = NGram.getInstance().testWord(w);