/***************************************************************************
* Copyright (C) 2012 by H-Store Project *
* Brown University *
* Massachusetts Institute of Technology *
* Yale University *
* *
* http://hstore.cs.brown.edu/ *
* *
* Permission is hereby granted, free of charge, to any person obtaining *
* a copy of this software and associated documentation files (the *
* "Software"), to deal in the Software without restriction, including *
* without limitation the rights to use, copy, modify, merge, publish, *
* distribute, sublicense, and/or sell copies of the Software, and to *
* permit persons to whom the Software is furnished to do so, subject to *
* the following conditions: *
* *
* The above copyright notice and this permission notice shall be *
* included in all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, *
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF *
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. *
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR *
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
* OTHER DEALINGS IN THE SOFTWARE. *
***************************************************************************/
/**
*
*/
package edu.brown.utils;
import java.io.File;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.log4j.Logger;
import org.voltdb.VoltType;
import org.voltdb.catalog.Column;
import org.voltdb.catalog.Table;
import org.voltdb.utils.CatalogUtil;
import org.voltdb.utils.VoltTypeUtil;
import au.com.bytecode.opencsv.CSVReader;
/**
* @author pavlo
*/
public class TableDataIterable implements Iterable<Object[]> {
private static final Logger LOG = Logger.getLogger(TableDataIterable.class.getName());
private final Table catalog_tbl;
private final File table_file;
private final CSVReader reader;
private final VoltType types[];
private final boolean fkeys[];
private final boolean nullable[];
private final boolean auto_generate_first_column;
private final DateFormat timestamp_formats[] = new DateFormat[] { new SimpleDateFormat("yyyy-MM-dd"), new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"), new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"), };
private Set<Column> truncate_warnings = new HashSet<Column>();
private int line_ctr = 0;
/**
* Constructor
*
* @param catalog_tbl
* @param table_file
* @param has_header
* whether we expect the data file to include a header in the
* first row
* @param auto_generate_first_column
* TODO
* @throws Exception
*/
public TableDataIterable(Table catalog_tbl, File table_file, boolean has_header, boolean auto_generate_first_column) throws Exception {
this.catalog_tbl = catalog_tbl;
this.table_file = table_file;
this.auto_generate_first_column = auto_generate_first_column;
this.reader = new CSVReader(FileUtil.getReader(this.table_file));
// Throw away the first row if there is a header
if (has_header) {
this.reader.readNext();
this.line_ctr++;
}
// Column Types + Foreign Keys
// Determine whether the column references a foreign key, and thus will
// need to be converted to an integer field later
this.types = new VoltType[catalog_tbl.getColumns().size()];
this.fkeys = new boolean[this.types.length];
this.nullable = new boolean[this.types.length];
for (Column catalog_col : catalog_tbl.getColumns()) {
int idx = catalog_col.getIndex();
this.types[idx] = VoltType.get((byte) catalog_col.getType());
this.fkeys[idx] = (CatalogUtil.getForeignKeyParent(catalog_col) != null);
this.nullable[idx] = catalog_col.getNullable();
} // FOR
}
/**
* Constructor
*
* @param catalog_tbl
* @param table_file
* @throws Exception
*/
public TableDataIterable(Table catalog_tbl, File table_file) throws Exception {
this(catalog_tbl, table_file, false, false);
}
public Iterator<Object[]> iterator() {
return (new TableIterator());
}
public class TableIterator implements Iterator<Object[]> {
String[] next = null;
private void getNext() {
if (next == null) {
try {
next = reader.readNext();
} catch (Exception ex) {
throw new RuntimeException("Unable to retrieve tuples from '" + table_file + "'", ex);
}
}
}
@Override
public boolean hasNext() {
this.getNext();
return (next != null);
}
@Override
public Object[] next() {
this.getNext();
if (next == null)
return (next);
String row[] = null;
synchronized (this) {
row = this.next;
this.next = null;
} // SYNCH
Object tuple[] = new Object[types.length];
int row_idx = 0;
for (int col_idx = 0; col_idx < types.length; col_idx++) {
Column catalog_col = catalog_tbl.getColumns().get(col_idx);
assert (catalog_col != null) : "The column at position " + col_idx + " for " + catalog_tbl + " is null";
// Auto-generate first column
if (col_idx == 0 && auto_generate_first_column) {
tuple[col_idx] = new Long(line_ctr);
}
// Null Values
else if (row_idx >= row.length) {
tuple[col_idx] = null;
}
// Foreign Keys
else if (fkeys[col_idx]) {
tuple[col_idx] = row[row_idx++];
}
// Timestamps
else if (types[col_idx] == VoltType.TIMESTAMP) {
for (DateFormat f : timestamp_formats) {
try {
tuple[col_idx] = f.parse(row[row_idx]);
} catch (ParseException ex) {
// Ignore...
}
if (tuple[col_idx] != null)
break;
} // FOR
if (tuple[col_idx] == null) {
throw new RuntimeException("Line " + TableDataIterable.this.line_ctr + ": Invalid timestamp format '" + row[row_idx] + "' for " + catalog_col);
}
row_idx++;
}
// Store string (truncate if necessary)
else if (types[col_idx] == VoltType.STRING) {
// Clip columns that are larger than our limit
int limit = catalog_col.getSize();
if (row[row_idx].length() > limit) {
if (!truncate_warnings.contains(catalog_col)) {
LOG.warn("Line " + TableDataIterable.this.line_ctr + ": Truncating data for " + catalog_col.fullName() + " because size " + row[row_idx].length() + " > " + limit);
truncate_warnings.add(catalog_col);
}
row[row_idx] = row[row_idx].substring(0, limit);
}
tuple[col_idx] = row[row_idx++];
}
// Default: Cast the string into the proper type
else {
if (row[row_idx].isEmpty() && nullable[col_idx]) {
tuple[col_idx] = null;
} else {
try {
tuple[col_idx] = VoltTypeUtil.getObjectFromString(types[col_idx], row[row_idx]);
} catch (Exception ex) {
throw new RuntimeException("Line " + TableDataIterable.this.line_ctr + ": Invalid value for " + catalog_col, ex);
}
}
row_idx++;
}
// System.out.println(col_idx + ": " + tuple[col_idx]);
} // FOR
TableDataIterable.this.line_ctr++;
return (tuple);
}
@Override
public void remove() {
// TODO Auto-generated method stub
}
}
}