// regular 1 sentence per line, 1 sentence per doc format
private void readSentences(int sentsPerDoc, String eReadFile, String fReadFile, String eLang, String fLang,
String fToken, String eToken, String fStopwordsFile, String eStopwordsFile) throws IOException,
ClassNotFoundException, InstantiationException, IllegalAccessException {
Tokenizer eTokenizer = TokenizerFactory.createTokenizer(eLang, eToken, true, eStopwordsFile, eStopwordsFile + ".stemmed", null);
Tokenizer fTokenizer = TokenizerFactory.createTokenizer(fLang, fToken, true, fStopwordsFile, fStopwordsFile + ".stemmed", null);
float sumFLengs = 0, sumELengs = 0;
try {
BufferedReader dis1 = new BufferedReader(new InputStreamReader(new FileInputStream(new File(eReadFile)), "UTF-8"));
BufferedReader dis2 = new BufferedReader(new InputStreamReader(new FileInputStream(new File(fReadFile)), "UTF-8"));
HMapSIW fDoc = new HMapSIW();
HMapSIW eDoc = new HMapSIW();
String eLine = null, fLine = null;
int cntEDocs = 0, cntFDocs = 0, lastDocLenE = 0, lastDocLenF = 0, numSents = 0;
while ((eLine = dis1.readLine()) != null) {
fLine = dis2.readLine().trim();
eLine = eLine.trim();
String[] tokens = fTokenizer.processContent(fLine);
lastDocLenF += tokens.length;
for (String token : tokens) {
if (!fDoc.containsKey(token)) { // if this is first time we saw token in this sentence
dfD.increment(token);