Source Code of no.priv.garshol.duke.datasources.CSVDataSource$CSVRecordIterator


package no.priv.garshol.duke.datasources;


import java.util.Map;
import java.util.HashMap;
import java.util.Iterator;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.io.Reader;
import java.io.FileReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.FileNotFoundException;
import java.io.IOException;


import no.priv.garshol.duke.Record;
import no.priv.garshol.duke.DukeException;
import no.priv.garshol.duke.RecordIterator;
import no.priv.garshol.duke.DukeConfigException;
import no.priv.garshol.duke.utils.CSVReader;


public class CSVDataSource extends ColumnarDataSource {
  private String file;
  private String encoding;
  private Reader directreader; // overrides 'file'; used for testing
  private int skiplines;
  private boolean hasheader;
  private char separator;


  public CSVDataSource() {
    super();
    this.hasheader = true;
  }


  public String getInputFile() {
    return file;
  }


  public void setInputFile(String file) {
    this.file = file;
  }


  public String getEncoding() {
    return encoding;
  }


  public void setEncoding(String encoding) {
    this.encoding = encoding;
  }


  public int getSkipLines() {
    return skiplines;
  }


  public void setSkipLines(int skiplines) {
    this.skiplines = skiplines;
  }


  public boolean getHeaderLine() {
    return hasheader;
  }


  public void setHeaderLine(boolean hasheader) {
    this.hasheader = hasheader;
  }


  public char getSeparator() {
    return separator;
  }


  public void setSeparator(char separator) {
    this.separator = separator;
  }


  // this is used only for testing
  public void setReader(Reader reader) {
    this.directreader = reader;
  }


  public RecordIterator getRecords() {
    if (directreader == null)
      verifyProperty(file, "input-file");


    try {
      Reader in;
      if (directreader != null)
        in = directreader;
      else {
        if (encoding == null)
          in = new FileReader(file);
        else
          in = new InputStreamReader(new FileInputStream(file), encoding);
      }


      CSVReader csv = new CSVReader(in);
      if (separator != 0)
        csv.setSeparator(separator);
      return new CSVRecordIterator(csv);
    } catch (FileNotFoundException e) {
      throw new DukeConfigException("Couldn't find CSV file '" + file + "'");
    } catch (IOException e) {
      throw new DukeException(e);
    }
  }


  protected String getSourceName() {
    return "CSV";
  }


  public class CSVRecordIterator extends RecordIterator {
    private CSVReader reader;
    private int[] index;     // what index in row to find colum[ix] value in
    private Column[] column; // all the columns, in random order
    private RecordBuilder builder;
    private Record nextrecord;


    public CSVRecordIterator(CSVReader reader) throws IOException {
      this.reader = reader;
      this.builder = new RecordBuilder(CSVDataSource.this);


      // index here is random 0-n. index[0] gives the column no in the CSV
      // file, while colname[0] gives the corresponding column name.
      index = new int[columns.size()];
      column = new Column[columns.size()];


      // skip the required number of lines before getting to the data
      for (int ix = 0; ix < skiplines; ix++)
        reader.next();


      // learn column indexes from header line (if there is one)
      String[] header = null;
      if (hasheader)
        header = reader.next();
      else {
        // find highest column number
        int high = 0;
        for (Column c : getColumns())
          high = Math.max(high, Integer.parseInt(c.getName()));


        // build corresponding index
        header = new String[high];
        for (int ix = 0; ix < high; ix++)
          header[ix] = "" + (ix + 1);
      }


      // what if there is no header?
      if (hasheader && !getColumns().isEmpty() && header == null)
        throw new DukeException("CSV file contained no header");


      // build the 'index' and 'column' indexes
      int count = 0;
      for (Column c : getColumns()) {
        boolean found = false;
        for (int ix = 0; ix < header.length; ix++) {
          if (header[ix].equals(c.getName())) {
            index[count] = ix;
            column[count++] = c;
            found = true;
            break;
          }
        }
        if (!found)
          throw new DukeConfigException("Column " + c.getName() + " not found "+
                                        "in CSV file");
      }


      findNextRecord();
    }


    private void findNextRecord() {
      String[] row;
      try {
        row = reader.next();
      } catch (IOException e) {
        throw new RuntimeException(e);
      }


      if (row == null) {
        nextrecord = null; // there isn't any next record
        return;
      }


      // build a record from the current row
      builder.newRecord();
      for (int ix = 0; ix < column.length; ix++) {
        if (index[ix] >= row.length)
          continue; // order is arbitrary, so we might not be done yet


        builder.addValue(column[ix], row[index[ix]]);
      }


      nextrecord = builder.getRecord();
    }


    public boolean hasNext() {
      return (nextrecord != null);
    }


    public Record next() {
      Record thenext = nextrecord;
      findNextRecord();
      return thenext;
    }
  }
}
Source Code of no.priv.garshol.duke.datasources.CSVDataSource$CSVRecordIterator

Related Classes of no.priv.garshol.duke.datasources.CSVDataSource$CSVRecordIterator