try {
fis1 = new FileInputStream(eFile);
fis2 = new FileInputStream(fFile);
dis1 = new BufferedReader(new InputStreamReader(fis1, "UTF-8"));
dis2 = new BufferedReader(new InputStreamReader(fis2, "UTF-8"));
HMapSIW fSent = new HMapSIW();
HMapSIW eSent = new HMapSIW();
String eLine = null, fLine = null;
int cntE = 0, cntF = 0, lastSentLenE = 0, lastSentLenF = 0;
while ((eLine = dis1.readLine()) != null) {
fLine = dis2.readLine().trim();
eLine = eLine.trim();
String[] tokens;
if (fTokenizer == null) {
tokens = fLine.split(" ");
} else {
tokens = fTokenizer.processContent(fLine);
}
lastSentLenF = tokens.length;
for (String token : tokens) {
if (!fSent.containsKey(token)) { // if this is first time we saw token in this sentence
dfD.increment(token);
}
fSent.increment(token);
}
tokens = eTokenizer.processContent(eLine);
lastSentLenE = tokens.length;
for (String token : tokens) {
if (!eSent.containsKey(token)) {
dfE.increment(token);
}
eSent.increment(token);
}
sumFLengs += lastSentLenF;
sumELengs += lastSentLenE;
enSentLengths.add(lastSentLenE);
deSentLengths.add(lastSentLenF);
eSentTfs.add(eSent);
fSentTfs.add(fSent);
eSents.add(eLine);
fSents.add(fLine);
cntE++;
cntF++;
fSent = new HMapSIW();
eSent = new HMapSIW();
}
// dispose all the resources after using them.
fis1.close();
dis1.close();