public static Tuple3<List<String>, DenseDoubleVector, String[]> readTwentyNewsgroups(
File directory) {
String[] classList = directory.list();
Arrays.sort(classList);
List<String> docList = new ArrayList<>();
TDoubleArrayList prediction = new TDoubleArrayList();
String[] nameMapping = new String[classList.length];
int classIndex = 0;
for (String classDirString : classList) {
File classDir = new File(directory, classDirString);
String[] fileList = classDir.list();
for (String fileDoc : fileList) {
try (BufferedReader br = new BufferedReader(new FileReader(new File(
classDir, fileDoc)))) {
StringBuilder document = new StringBuilder();
String l = null;
while ((l = br.readLine()) != null) {
document.append(l);
}
docList.add(document.toString());
prediction.add(classIndex);
} catch (IOException e) {
e.printStackTrace();
}
}
nameMapping[classIndex++] = classDirString;
}
return new Tuple3<>(docList, new DenseDoubleVector(prediction.toArray()),
nameMapping);
}