return dbl;
}//TESTED
public static BasicDBList getBsonFromSequenceFile(CustomMapReduceJobPojo cmr, int nLimit, String fields) throws SAXException, IOException, ParserConfigurationException {
BasicDBList dbl = new BasicDBList();
PropertiesManager props = new PropertiesManager();
Configuration conf = getConfiguration(props);
Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false);
@SuppressWarnings({ "unchecked", "rawtypes" })
SequenceFileDirIterable<? extends Writable, ? extends Writable> seqFileDir =
new SequenceFileDirIterable(pathDir, PathType.LIST, PathFilters.logsCRCFilter(), conf);
// Very basic, only allow top level, 1 level of nesting, and field removal
HashSet<String> fieldLookup = null;
if (null != fields) {
fieldLookup = new HashSet<String>();
String[] fieldArray = fields.split(",");
for (String field: fieldArray) {
String[] fieldDecomp = field.split(":");
fieldLookup.add(fieldDecomp[0]);
}
}//TOTEST
int nRecords = 0;
for (Pair<? extends Writable, ? extends Writable> record: seqFileDir) {
BasicDBObject element = new BasicDBObject();
// KEY
Writable key = record.getFirst();
if (key instanceof org.apache.hadoop.io.Text) {
org.apache.hadoop.io.Text writable = (org.apache.hadoop.io.Text)key;
element.put("key", writable.toString());
}
else if (key instanceof org.apache.hadoop.io.DoubleWritable) {
org.apache.hadoop.io.DoubleWritable writable = (org.apache.hadoop.io.DoubleWritable)key;
element.put("key", Double.toString(writable.get()));
}
else if (key instanceof org.apache.hadoop.io.IntWritable) {
org.apache.hadoop.io.IntWritable writable = (org.apache.hadoop.io.IntWritable)key;
element.put("key", Integer.toString(writable.get()));
}
else if (key instanceof org.apache.hadoop.io.LongWritable) {
org.apache.hadoop.io.LongWritable writable = (org.apache.hadoop.io.LongWritable)key;
element.put("key", Long.toString(writable.get()));
}
else if (key instanceof BSONWritable) {
element.put("key", MongoDbUtil.convert((BSONWritable)key));
}
// VALUE
Writable value = record.getSecond();
if (value instanceof org.apache.hadoop.io.Text) {
org.apache.hadoop.io.Text writable = (org.apache.hadoop.io.Text)value;
element.put("value", writable.toString());
}
else if (value instanceof org.apache.hadoop.io.DoubleWritable) {
org.apache.hadoop.io.DoubleWritable writable = (org.apache.hadoop.io.DoubleWritable)value;
element.put("value", Double.toString(writable.get()));
}
else if (value instanceof org.apache.hadoop.io.IntWritable) {
org.apache.hadoop.io.IntWritable writable = (org.apache.hadoop.io.IntWritable)value;
element.put("value", Integer.toString(writable.get()));
}
else if (value instanceof org.apache.hadoop.io.LongWritable) {
org.apache.hadoop.io.LongWritable writable = (org.apache.hadoop.io.LongWritable)value;
element.put("value", Long.toString(writable.get()));
}
else if (value instanceof BSONWritable) {
element.put("value", MongoDbUtil.convert((BSONWritable)value));
}
else if (value instanceof org.apache.mahout.math.VectorWritable) {
Vector vec = ((org.apache.mahout.math.VectorWritable)value).get();
BasicDBList dbl2 = listFromMahoutVector(vec, "value", element);
element.put("value", dbl2);
}
else if (value instanceof org.apache.mahout.clustering.classify.WeightedVectorWritable) {
org.apache.mahout.clustering.classify.WeightedVectorWritable vecW = (org.apache.mahout.clustering.classify.WeightedVectorWritable)value;
element.put("valueWeight", vecW.getWeight());
BasicDBList dbl2 = listFromMahoutVector(vecW.getVector(), "value", element);
element.put("value", dbl2);
}
else if (value instanceof org.apache.mahout.clustering.iterator.ClusterWritable) {
Cluster cluster = ((org.apache.mahout.clustering.iterator.ClusterWritable)value).getValue();
BasicDBObject clusterVal = new BasicDBObject();