public Table readTable(InputStream is) throws DataIOException {
// determine input stream capabilities
// if we can't reset the stream, we read in all the bytes
// and make our own local stream
ByteArrayList buf = null;
if ( is.markSupported() ) {
// mark the stream to our reset point
is.mark(Integer.MAX_VALUE);
} else {
// load in the entirety of the input stream
try {
buf = IOLib.readAsBytes(is);
} catch ( IOException ioe ) {
throw new DataIOException(ioe);
}
// create our own input stream
is = buf.getAsInputStream();
}
final TypeInferencer di = new TypeInferencer(m_pfactory);
final ArrayList headers = getColumnNames();
final int[] dim = new int[] { 0, 0 };
TableReadListener scanner = new TableReadListener() {
int prevLine = -1;
public void readValue(int line, int col, String value)
throws DataParseException
{
// sample value to determine data type
if ( line > 1 || !m_hasHeader ) {
di.sample(col-1, value);
// update num rows
if ( line != prevLine ) {
prevLine = line;
dim[0]++;
}
} else if ( line == 1 && m_hasHeader ) {
headers.add(value);
}
// update num cols
if ( col > dim[1] )
dim[1] = col;
}
};
// do a scan of the stream, collecting length and type data
try {
read(is, scanner);
} catch ( IOException ioe ) {
throw new DataIOException(ioe);
} catch ( DataParseException de ) {
// can't happen
}
// create the table
int nrows = dim[0];
int ncols = dim[1];
final Table table = new Table(nrows, ncols);
// create the table columns
for ( int i=0; i < ncols; ++i ) {
String header;
if ( m_hasHeader || i < headers.size() ) {
header = (String)headers.get(i);
} else {
header = getDefaultHeader(i);
}
table.addColumn(header, di.getType(i));
table.getColumn(i).setParser(di.getParser(i));
}
// reset dim array, will hold row/col indices
dim[0] = dim[1] = -1;
TableReadListener parser = new TableReadListener() {
int prevLine = -1;
public void readValue(int line, int col, String value)
throws DataParseException
{
// early exit on header value
if ( line == 1 && m_hasHeader )
return;
if ( line != prevLine ) {
prevLine = line;
++dim[0];
}
dim[1] = col-1;
// XXX NOTE-2005.08.29-jheer
// For now we use generic routines for filling column values.
// This results in the autoboxing of primitive types, slowing
// performance a bit and possibly triggering avoidable garbage
// collections. If this proves to be a problem down the road,
// we can add more nuance later.
DataParser dp = di.getParser(dim[1]);
table.set(dim[0], dim[1], dp.parse(value));
}
};
// read the data into the table
try {
// prepare the input stream
if ( is.markSupported() ) {
is.reset();
} else {
is = buf.getAsInputStream();
}
// read the data
read(is, parser);
} catch ( IOException ioe ) {
throw new DataIOException(ioe);