String fCollectionPath = args[3];
String eDir = args[4];
String fDir = args[5];
RetrievalEnvironment eEnv = new RetrievalEnvironment(eDir, FileSystem.get(conf));
String vocabDir = args[6];
String eLang = args[7];
String fLang = args[8];
String classifierFile = args[9];
float classifierThreshold = Float.parseFloat(args[10]);
int classifierId = Integer.parseInt(args[11]);
conf.setJobName("FindParallelSentences_" + fLang +"-" + eLang +"_F1="+classifierThreshold+"["+classifierId+"]");
String eSentDetect = vocabDir+"/"+eLang+"-sent.bin";
String eTokenizer = vocabDir+"/"+eLang+"-token.bin";
String eVocabSrc = vocabDir+"/vocab."+eLang+"-"+fLang+"."+eLang;
String eVocabTrg = vocabDir+"/vocab."+fLang+"-"+eLang+"."+eLang;
String fSentDetect = vocabDir+"/"+fLang+"-sent.bin";
String fTokenizer = vocabDir+"/"+fLang+"-token.bin";
String fVocabSrc = vocabDir+"/vocab."+fLang+"-"+eLang+"."+fLang;
String fVocabTrg = vocabDir+"/vocab."+eLang+"-"+fLang+"."+fLang;
String f2e_ttableFile = vocabDir+"/ttable."+fLang+"-"+eLang;
String e2f_ttableFile = vocabDir+"/ttable."+eLang+"-"+fLang;
int numReducers = 50;
conf.set("eDir", eDir);
conf.set("fDir", fDir);
conf.set("eLang", eLang);
conf.set("fLang", fLang);
conf.setInt("NumReducers", numReducers);
conf.setFloat("ClassifierThreshold", classifierThreshold);
conf.setInt("ClassifierId", classifierId);
sLogger.info("caching files...");
//e-files
sLogger.info("caching files...0,1,2,3,4");
DistributedCache.addCacheFile(new URI(eEnv.getDfByTermData()), conf);
DistributedCache.addCacheFile(new URI(eSentDetect), conf);
DistributedCache.addCacheFile(new URI(eTokenizer), conf);
DistributedCache.addCacheFile(new URI(eVocabSrc), conf);
DistributedCache.addCacheFile(new URI(eVocabTrg), conf);
//f-files
sLogger.info("caching files...5,6,7,8,9");
DistributedCache.addCacheFile(new URI(fDir+"/transDf.dat"), conf);
DistributedCache.addCacheFile(new URI(fSentDetect), conf);
DistributedCache.addCacheFile(new URI(fTokenizer), conf);
DistributedCache.addCacheFile(new URI(fVocabSrc), conf);
DistributedCache.addCacheFile(new URI(fVocabTrg), conf);
/////cross-lang files
sLogger.info("caching files...10,11,12,13,14");
DistributedCache.addCacheFile(new URI(f2e_ttableFile), conf);
DistributedCache.addCacheFile(new URI(e2f_ttableFile), conf);
DistributedCache.addCacheFile(new URI(eEnv.getIndexTermsData()), conf);
DistributedCache.addCacheFile(new URI(classifierFile), conf);
DistributedCache.addCacheFile(new URI(pwsimPairsPath), conf);
FileInputFormat.addInputPaths(conf, eCollectionPath);
FileInputFormat.addInputPaths(conf, fCollectionPath);