Package edu.brown.utils

Source Code of edu.brown.utils.TableDataIterable$TableIterator

/***************************************************************************
*  Copyright (C) 2012 by H-Store Project                                  *
*  Brown University                                                       *
*  Massachusetts Institute of Technology                                  *
*  Yale University                                                        *
*                                                                         *
*  http://hstore.cs.brown.edu/                                            *
*                                                                         *
*  Permission is hereby granted, free of charge, to any person obtaining  *
*  a copy of this software and associated documentation files (the        *
*  "Software"), to deal in the Software without restriction, including    *
*  without limitation the rights to use, copy, modify, merge, publish,    *
*  distribute, sublicense, and/or sell copies of the Software, and to     *
*  permit persons to whom the Software is furnished to do so, subject to  *
*  the following conditions:                                              *
*                                                                         *
*  The above copyright notice and this permission notice shall be         *
*  included in all copies or substantial portions of the Software.        *
*                                                                         *
*  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        *
*  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     *
*  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. *
*  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR      *
*  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,  *
*  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR  *
*  OTHER DEALINGS IN THE SOFTWARE.                                        *
***************************************************************************/
/**
*
*/
package edu.brown.utils;

import java.io.File;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;

import org.apache.log4j.Logger;
import org.voltdb.VoltType;
import org.voltdb.catalog.Column;
import org.voltdb.catalog.Table;
import org.voltdb.utils.CatalogUtil;
import org.voltdb.utils.VoltTypeUtil;

import au.com.bytecode.opencsv.CSVReader;

/**
* @author pavlo
*/
public class TableDataIterable implements Iterable<Object[]> {
    private static final Logger LOG = Logger.getLogger(TableDataIterable.class.getName());

    private final Table catalog_tbl;
    private final File table_file;
    private final CSVReader reader;
    private final VoltType types[];
    private final boolean fkeys[];
    private final boolean nullable[];
    private final boolean auto_generate_first_column;

    private final DateFormat timestamp_formats[] = new DateFormat[] { new SimpleDateFormat("yyyy-MM-dd"), new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"), new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"), };
    private Set<Column> truncate_warnings = new HashSet<Column>();
    private int line_ctr = 0;

    /**
     * Constructor
     *
     * @param catalog_tbl
     * @param table_file
     * @param has_header
     *            whether we expect the data file to include a header in the
     *            first row
     * @param auto_generate_first_column
     *            TODO
     * @throws Exception
     */
    public TableDataIterable(Table catalog_tbl, File table_file, boolean has_header, boolean auto_generate_first_column) throws Exception {
        this.catalog_tbl = catalog_tbl;
        this.table_file = table_file;
        this.auto_generate_first_column = auto_generate_first_column;
        this.reader = new CSVReader(FileUtil.getReader(this.table_file));

        // Throw away the first row if there is a header
        if (has_header) {
            this.reader.readNext();
            this.line_ctr++;
        }

        // Column Types + Foreign Keys
        // Determine whether the column references a foreign key, and thus will
        // need to be converted to an integer field later
        this.types = new VoltType[catalog_tbl.getColumns().size()];
        this.fkeys = new boolean[this.types.length];
        this.nullable = new boolean[this.types.length];
        for (Column catalog_col : catalog_tbl.getColumns()) {
            int idx = catalog_col.getIndex();
            this.types[idx] = VoltType.get((byte) catalog_col.getType());
            this.fkeys[idx] = (CatalogUtil.getForeignKeyParent(catalog_col) != null);
            this.nullable[idx] = catalog_col.getNullable();
        } // FOR
    }

    /**
     * Constructor
     *
     * @param catalog_tbl
     * @param table_file
     * @throws Exception
     */
    public TableDataIterable(Table catalog_tbl, File table_file) throws Exception {
        this(catalog_tbl, table_file, false, false);
    }

    public Iterator<Object[]> iterator() {
        return (new TableIterator());
    }

    public class TableIterator implements Iterator<Object[]> {
        String[] next = null;

        private void getNext() {
            if (next == null) {
                try {
                    next = reader.readNext();
                } catch (Exception ex) {
                    throw new RuntimeException("Unable to retrieve tuples from '" + table_file + "'", ex);
                }
            }
        }

        @Override
        public boolean hasNext() {
            this.getNext();
            return (next != null);
        }

        @Override
        public Object[] next() {
            this.getNext();
            if (next == null)
                return (next);
            String row[] = null;
            synchronized (this) {
                row = this.next;
                this.next = null;
            } // SYNCH

            Object tuple[] = new Object[types.length];
            int row_idx = 0;
            for (int col_idx = 0; col_idx < types.length; col_idx++) {
                Column catalog_col = catalog_tbl.getColumns().get(col_idx);
                assert (catalog_col != null) : "The column at position " + col_idx + " for " + catalog_tbl + " is null";

                // Auto-generate first column
                if (col_idx == 0 && auto_generate_first_column) {
                    tuple[col_idx] = new Long(line_ctr);
                }
                // Null Values
                else if (row_idx >= row.length) {
                    tuple[col_idx] = null;
                }
                // Foreign Keys
                else if (fkeys[col_idx]) {
                    tuple[col_idx] = row[row_idx++];
                }
                // Timestamps
                else if (types[col_idx] == VoltType.TIMESTAMP) {
                    for (DateFormat f : timestamp_formats) {
                        try {
                            tuple[col_idx] = f.parse(row[row_idx]);
                        } catch (ParseException ex) {
                            // Ignore...
                        }
                        if (tuple[col_idx] != null)
                            break;
                    } // FOR
                    if (tuple[col_idx] == null) {
                        throw new RuntimeException("Line " + TableDataIterable.this.line_ctr + ": Invalid timestamp format '" + row[row_idx] + "' for " + catalog_col);
                    }
                    row_idx++;
                }
                // Store string (truncate if necessary)
                else if (types[col_idx] == VoltType.STRING) {
                    // Clip columns that are larger than our limit
                    int limit = catalog_col.getSize();
                    if (row[row_idx].length() > limit) {
                        if (!truncate_warnings.contains(catalog_col)) {
                            LOG.warn("Line " + TableDataIterable.this.line_ctr + ": Truncating data for " + catalog_col.fullName() + " because size " + row[row_idx].length() + " > " + limit);
                            truncate_warnings.add(catalog_col);
                        }
                        row[row_idx] = row[row_idx].substring(0, limit);
                    }
                    tuple[col_idx] = row[row_idx++];
                }
                // Default: Cast the string into the proper type
                else {
                    if (row[row_idx].isEmpty() && nullable[col_idx]) {
                        tuple[col_idx] = null;
                    } else {
                        try {
                            tuple[col_idx] = VoltTypeUtil.getObjectFromString(types[col_idx], row[row_idx]);
                        } catch (Exception ex) {
                            throw new RuntimeException("Line " + TableDataIterable.this.line_ctr + ": Invalid value for " + catalog_col, ex);
                        }
                    }
                    row_idx++;
                }
                // System.out.println(col_idx + ": " + tuple[col_idx]);
            } // FOR
            TableDataIterable.this.line_ctr++;
            return (tuple);
        }

        @Override
        public void remove() {
            // TODO Auto-generated method stub

        }
    }
}
TOP

Related Classes of edu.brown.utils.TableDataIterable$TableIterator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.