private static final double THRESHOLD = -999;
public static void main(String[] args) throws IOException {
String folder = "..";
Hapax hapax = Hapax.newCorpus()
.useTFIDF()
.useCamelCaseScanner()
.addFiles(folder, ".java")
.build();
Matrix corr = hapax.getIndex().documentCorrelation();
PrintWriter f = new PrintWriter("data.xml");
f.println("<?xml version=\"1.0\"?>");
f.println("<!DOCTYPE ggobidata SYSTEM \"ggobi.dtd\">");
f.println("<ggobidata count=\"2\">");
f.println("<data name=\"points\">");
f.println("<variables count=\"1\">");
f.println(" <realvariable name=\"x\" nickname=\"x\" />");
f.println("</variables>");
f.printf("<records count=\"%d\">\n", hapax.getIndex().documentCount());
int n = 0;
for (String doc: hapax.getIndex().documents()) {
f.printf("<record id=\"%d\" label=\"%s\"> 0 </record>\n",
++n,
new File(doc).getName());
}
f.print("</records>\n</data>\n\n");