if(args[i].equals("fix"))
SemCorCleaner.fix=true;
}
ArrayList<File> files=Util.getAllFiles(new File(args[2]));
DataBroker dic=new DataBroker(args[0],args[1]);
dic.setPath(".");
dic.load("Glosses");
int x=1;
for(File file:files)
{
File fout=new File(file.getCanonicalFile()+".sgf");
if(!file.getName().endsWith(".sgf")&&(!fout.exists()||rewrite))
{
System.out.println("Repairing file: "+file.getName()+ " "+String.valueOf(x)+"/"+String.valueOf(files.size()));
x++;
FileReader f=new FileReader(file);
BufferedReader in=new BufferedReader(f);
FileWriter fo=new FileWriter(file.getCanonicalFile()+".sgf");
BufferedWriter out=new BufferedWriter(fo);
String line=in.readLine();
while(line!=null)
{
String sout="";
line=line.replace("&", "&");
String tokens[]=line.split("=");
for(int i=0;i<tokens.length;i++)
{
if(i==0)
{
sout+=tokens[i];
}
else
{
int j=tokens[i].indexOf(">");
if(j<0)
j=tokens[i].indexOf(" ");
sout+="=\""+tokens[i].substring(0, j)+"\""+tokens[i].substring(j);
}
}
out.write(sout+"\n");
line=in.readLine();
}
in.close();
f.close();
out.close();
fo.close();
}
}
System.out.println("Repairing bad lemmas!");
File target=new File(args[2]);
if(!target.isDirectory())
target=new File(args[2]+".sgf");
files=Util.getAllSGFFiles(target);
x=1;
for(File file:files)
{
System.out.println("-----------Checking file: "+file.getName()+ " "+String.valueOf(x)+"/"+String.valueOf(files.size())+"--------------");
x++;
try
{
SAXBuilder builder=new SAXBuilder();
Document xml=(Document)builder.build(file);
ArrayList<Element> words=new ArrayList<Element>();
for(Element word:xml.getDescendants(new ElementFilter("wf")))
words.add(word);
for(int w=0;w<words.size();w++)
{
Element word=words.get(w);
if(word.getAttribute("ot")!=null)
{
word.setAttribute("cmd", "ignore");
}
if((word.getAttribute("cmd").getValue().equals("done"))||(tag&&(word.getAttribute("cmd").getValue().equals("tag"))))
{
String lemma=word.getValue();
String pos=word.getAttribute("pos").getValue().substring(0,1);
String lem;
Lemma l=null;
if(word.getAttribute("cmd").getValue().equals("done"))
{
if(word.getAttribute("lemma")!=null)
{
lemma=word.getAttribute("lemma").getValue();
pos=word.getAttribute("pos").getValue().substring(0,1);
lem=lemma+"_"+pos;
l=dic.getLemma(lem);
}
else
{
lemma="";
pos="X";
}
}
lem=lemma+"_"+pos;
if(l==null)//lemma may not be in normal form
{
SemCorCleaner.displayWindow(words, w);
SemCorCleaner.readLemma(dic, lemma, word, pos);
l=dic.getLemma(word.getAttributeValue("lemma")+"_"+word.getAttributeValue("pos").substring(0,1));
}
if(l!=null)
{
if(!SemCorCleaner.isValidWNSN(word.getAttributeValue("wnsn"), l.getSenses().size()))
{