IntsRef scratchInts = new IntsRef();
StringBuilder sb = new StringBuilder();
File unsorted = File.createTempFile("unsorted", "dat", tempDir);
ByteSequencesWriter writer = new ByteSequencesWriter(unsorted);
boolean success = false;
try {
for (InputStream dictionary : dictionaries) {
BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
String line = lines.readLine(); // first line is number of entries (approximately, sometimes)
while ((line = lines.readLine()) != null) {
line = unescapeEntry(line);
if (needsInputCleaning) {
int flagSep = line.lastIndexOf(FLAG_SEPARATOR);
if (flagSep == -1) {
CharSequence cleansed = cleanInput(line, sb);
writer.write(cleansed.toString().getBytes(StandardCharsets.UTF_8));
} else {
String text = line.substring(0, flagSep);
CharSequence cleansed = cleanInput(text, sb);
if (cleansed != sb) {
sb.setLength(0);
sb.append(cleansed);
}
sb.append(line.substring(flagSep));
writer.write(sb.toString().getBytes(StandardCharsets.UTF_8));
}
} else {
writer.write(line.getBytes(StandardCharsets.UTF_8));
}
}
}
success = true;
} finally {