package no.priv.garshol.duke.datasources;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.FileInputStream;
import java.util.*;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonFactory;
import no.priv.garshol.duke.Record;
import no.priv.garshol.duke.DukeException;
import no.priv.garshol.duke.RecordIterator;
/**
* DataSource which reads a collection of JSON as records.
*
* <p>WARNING: this datasource isn't nesting-aware yet: if two fields
* share the same name while being at different nesting levels, they
* will be treated the same way.
*/
public class JsonDataSource extends ColumnarDataSource {
private String file;
public JsonDataSource() {
super();
}
public String getInputFile() {
return file;
}
public void setInputFile(String file) {
this.file = file;
}
public RecordIterator getRecordsFromString(String s) {
try {
JsonFactory jsonFactory = new JsonFactory();
return new JsonIterator(jsonFactory.createParser(new StringReader(s)));
} catch (IOException e) {
throw new DukeException(e);
}
}
public RecordIterator getRecordsFromStream(InputStream is) {
JsonFactory jsonFactory = new JsonFactory();
try {
JsonParser jp = jsonFactory.createParser(is);
return new JsonIterator(jp);
} catch (IOException e) {
throw new DukeException(e);
}
}
public RecordIterator getRecords() {
try {
return getRecordsFromStream(new FileInputStream(file));
} catch (IOException e) {
throw new DukeException("Error reading JSON from " + file, e);
}
}
public String getSourceName() {
return "JSON";
}
protected class JsonIterator extends RecordIterator {
private JsonParser parser;
private RecordBuilder builder;
private Map<String, Set<Column>> cols = new HashMap();
public JsonIterator(JsonParser jp) {
this.parser = jp;
this.builder = new RecordBuilder(JsonDataSource.this);
for (Column col : getColumns()) {
if (cols.containsKey(col.getName()))
cols.get(col.getName()).add(col);
else
cols.put(col.getName(), new HashSet(Arrays.asList(col)));
}
}
//buffer for a token, used to avoid hasNext() to impact the json parsing
private JsonToken tokenFromHasNext;
@Override
public boolean hasNext() {
try {
tokenFromHasNext = parser.nextToken();
return (tokenFromHasNext == JsonToken.START_OBJECT);
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
@Override
public Record next() {
builder.newRecord();
Set<Column> currentCols = null;
try {
JsonToken currentToken;
int nestingLevel = -1;
do {
if (tokenFromHasNext == null) {
currentToken = parser.nextToken();
} else {
currentToken = tokenFromHasNext;
tokenFromHasNext = null;
}
if (currentToken == JsonToken.START_OBJECT) {
nestingLevel++;
} else if (currentToken == JsonToken.END_OBJECT) {
nestingLevel--;
} else if (currentToken == JsonToken.FIELD_NAME) {
currentCols = cols.get(parser.getCurrentName());
} else if (currentToken == JsonToken.END_ARRAY) {
} else if (currentCols != null &&
currentToken != JsonToken.START_ARRAY &&
currentToken != JsonToken.END_ARRAY)
for (Column currentCol : currentCols)
builder.addValue(currentCol, parser.getText());
} while (currentToken != JsonToken.END_OBJECT || nestingLevel != -1);
} catch (IOException e) {
throw new DukeException(e);
}
return builder.getRecord();
}
}
}