Package com.deafgoat.ml.prognosticator

Source Code of com.deafgoat.ml.prognosticator.InstancesReader

/**
* Copyright 2012, Wisdom Omuya.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.deafgoat.ml.prognosticator;

// Java
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;

// Weka
import weka.core.Instances;
import weka.core.converters.CSVLoader;
import weka.core.converters.ConverterUtils.DataSource;

/**
* Utility to read from several data sources. This class provides an interface
* to read from ARFF, CSV, and MongoDB. For MongoDB, it works with simple
* collections with no nested fields.
*/
public class InstancesReader {

    /**
     * @return all instances read from the data source
     */
    public Instances getInstances() {
        return _instances;
    }

    /**
     * Read from a .ARFF file
     *
     * @throws IOException
     *             If the ARFF file can not be found
     * @return the set of instances read
     */
    public Instances readFromARFF() throws IOException {
        BufferedReader reader = new BufferedReader(new FileReader(_filename));
        _instances = new Instances(reader);
        reader.close();
        return _instances;
    }

    /**
     * Read from a .CSV file
     *
     * @throws IOException
     * @return the set of instances read
     */
    public Instances readFromCSV() throws IOException {
        CSVLoader loader = new CSVLoader();
        loader.setSource(new File(_filename));
        _instances = loader.getDataSet();
        return _instances;
    }

    /**
     * Read directly from a MongoDB collection
     *
     * @param csv
     *            The CSV file to read into
     * @throws Exception
     *             If collection can not be read
     * @return the set of instances read
     */
    public Instances readFromMongoDB(String csv) throws Exception {
        _m2v = new Mongo2CSV(_host, _port, _db, _coll);
        _m2v.setFields(_fields);
        _m2v.writeCSV(csv);
        _m2v.close();
        _source = new DataSource(csv);
        _instances = _source.getDataSet();
        return _instances;
    }

    /**
     * working collection
     */
    private String _coll;

    /**
     * working database
     */
    private String _db;

    /**
     * working host
     */
    private String _host;

    /**
     * working instances to read to
     */
    private Instances _instances;

    /**
     * working port
     */
    private Integer _port;

    /**
     * sets of fields to read
     */
    private String[] _fields;

    /**
     * the file to write to
     */
    private String _filename;

    /**
     * Mongo2CSV handle
     */
    private Mongo2CSV _m2v;

    /**
     * handler for data source
     */
    private DataSource _source;

    /**
     * @param filename
     *            The source file to read from
     */
    public InstancesReader(String filename) {
        _filename = filename;
    }

    /**
     * @param host
     *            The hostname of the database
     * @param port
     *            The port number of the database
     * @param db
     *            The name of the database
     * @param coll
     *            The name of the collection
     * @param fields
     *            The set of fields to read from the collection
     */
    public InstancesReader(String host, Integer port, String db, String coll, String[] fields) {
        _host = host;
        _port = port;
        _db = db;
        _coll = coll;
        _fields = fields;
    }

}
TOP

Related Classes of com.deafgoat.ml.prognosticator.InstancesReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.