Package utils

Source Code of utils.DataFileReader

package utils;

import ca.uwo.csd.ai.nlp.common.SparseVector;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import ca.uwo.csd.ai.nlp.libsvm.ex.Instance;

/**
* <code>DataFileReader</code> reads data files written in LibSVM format.
* @author Syeed Ibn Faiz
*/
public class DataFileReader {
   
    public static Instance[] readDataFile(String fileName) throws IOException {
        BufferedReader reader = new BufferedReader(new FileReader(fileName));       
       
        ArrayList<Double> labels = new ArrayList<Double>();
        ArrayList<SparseVector> vectors = new ArrayList<SparseVector>();
       
        String line;
        int lineCount = 0;
        while ((line = reader.readLine()) != null) {
            lineCount++;
            String[] tokens = line.split("\\s+");
            if (tokens.length < 2) {               
                System.err.println("Inappropriate file format: " + fileName);
                System.err.println("Error in line " + lineCount);
                System.exit(-1);
            }
           
            labels.add(Double.parseDouble(tokens[0]));           
            SparseVector vector = new SparseVector(tokens.length - 1);
           
            for (int i = 1; i < tokens.length; i++) {
                String[] fields = tokens[i].split(":");
                if (fields.length < 2) {
                    System.err.println("Inappropriate file format: " + fileName);
                    System.err.println("Error in line " + lineCount);
                    System.exit(-1);
                }
                int index = Integer.parseInt(fields[0]);
                double value = Double.parseDouble(fields[1]);
                vector.add(index, value);
            }
           
            vectors.add(vector);
        }               
       
        Instance[] instances = new Instance[labels.size()];
        for (int i = 0; i < instances.length; i++) {
            instances[i] = new Instance(labels.get(i), vectors.get(i));
        }
       
        return instances;
    }
}
TOP

Related Classes of utils.DataFileReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.