* @param file The file to read
* @return the result of the operation.
* @throws Exception
*/
private static ReadFileResult readFileWithConversionToUtf8(File file) throws Exception {
ReadFileResult result = new ReadFileResult();
try {
String str = IOUtils.toString(new FileInputStream(file), "UTF-8");
result.setLogInfo("OK: file can be read as UTF-8 (no conversion necessary)");
result.setError("setting error to avoid unnecesary conversion");
result.setContents(str);
return result;
}
catch(Throwable ex) {
result.setLogInfo("OK: file cannot be read as UTF-8 directly.");
}
byte[] bytes = IOUtils.toByteArray(new FileInputStream(file));
Collection<String> charsets = Utf8Util.isUtf8(bytes);
if ( charsets == null ) {
// charsets == null means the bytes are good UTF-8, so this
// should NOT happen.
result.addLogInfo("OK: already in UTF-8.");
result.setError("setting error to avoid unnecesary conversion");
result.setContents(new String(bytes, "UTF-8"));
return result;
}
result.addLogInfo("Charset of the file may be one of: " +charsets+ "\n");
result.addLogInfo("Attempting conversiones..\n");
for ( String charsetName : charsets ) {
try {
String outputStr = _asString(bytes, charsetName);
result.addLogInfo("Conversion from " +charsetName+ ": OK.\n");
result.setContents(outputStr);
return result;
}
catch(CharacterCodingException ex) {
// continue with the other possible charsets...
}
}
result.setError("None of the conversions from the possible detected charsets "
+ " was successful: " +charsets
);
result.setContents(null);
return result;
}