Package de.jungblut.reader

Source Code of de.jungblut.reader.CsvDatasetReader

package de.jungblut.reader;

import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;

import au.com.bytecode.opencsv.CSVReader;
import de.jungblut.math.DoubleVector;
import de.jungblut.math.dense.DenseDoubleVector;

/**
* Binary dataset reader from CSVs.
*
* @author thomas.jungblut
*
*/
public final class CsvDatasetReader {

  private CsvDatasetReader() {
    throw new IllegalAccessError();
  }

  /**
   * Reads a csv into feature and outcome arrays.
   *
   * @param path the path to read from
   * @param separator the separator to use
   * @param quote the quote, null if none present
   * @param numFeatures the number of features to expect
   * @param outcomeIndex the index of the outcome
   * @return a new dataset.
   */
  public static Dataset readCsv(String path, char separator, Character quote,
      int numFeatures, int outcomeIndex) {
    ArrayList<DoubleVector> featureList = new ArrayList<>();
    ArrayList<DenseDoubleVector> outcomeList = new ArrayList<>();

    try (CSVReader reader = (quote != null ? new CSVReader(
        new FileReader(path), separator, quote) : new CSVReader(new FileReader(
        path), separator))) {

      String[] line;
      while ((line = reader.readNext()) != null) {
        double[] fArray = new double[line.length - 1];
        int index = 0;
        for (int i = 0; i < line.length; i++) {
          if (i != outcomeIndex)
            fArray[index++] = Double.parseDouble(line[i]);
        }
        DoubleVector f = new DenseDoubleVector(fArray);
        DenseDoubleVector o = new DenseDoubleVector(1);
        o.set(0, Double.parseDouble(line[outcomeIndex]));
        featureList.add(f);
        outcomeList.add(o);
      }

    } catch (IOException e) {
      e.printStackTrace();
    }

    DoubleVector[] features = featureList.toArray(new DoubleVector[featureList
        .size()]);
    DoubleVector[] outcome = outcomeList.toArray(new DoubleVector[outcomeList
        .size()]);
    return new Dataset(features, outcome);

  }
}
TOP

Related Classes of de.jungblut.reader.CsvDatasetReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.