Package kr.ac.yonsei.datacleanser

Source Code of kr.ac.yonsei.datacleanser.DataCleanserUsingJCSV

package kr.ac.yonsei.datacleanser;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.Arrays;

import com.googlecode.jcsv.CSVStrategy;
import com.googlecode.jcsv.reader.CSVReader;
import com.googlecode.jcsv.reader.internal.CSVReaderBuilder;
import com.googlecode.jcsv.reader.internal.DefaultCSVEntryParser;

public class DataCleanserUsingJCSV {
  private InputStream is;
  private String delim;

  public DataCleanserUsingJCSV(InputStream is, String delim) {
    this.is = is;
    this.delim = delim;
  }

  public void run(OutputStream out) throws IOException {
    PrintWriter writer = new PrintWriter(out);

    InputStreamReader reader = new InputStreamReader(is, "UTF-8");
    CSVReader<String[]> csvReader = new CSVReaderBuilder<String[]>(reader).strategy(CSVStrategy.UK_DEFAULT).entryParser(new DefaultCSVEntryParser()).build();
   
    for (String[] cols : csvReader) {
      if (isValue(cols) == false) {
        continue;
      }
      String[] cleansedCols = getCleansedColumns(cols);
      writeRow(writer, cleansedCols);
    }

    csvReader.close();
    reader.close();
    writer.flush();
  }

  public boolean isValue(String[] cols) {
    if (cols.length != 3) {
      System.out.println("aaa" + cols.length + Arrays.toString(cols));
      return false;
    }

    if (cols[0].startsWith("080") || cols[1].startsWith("080")) {
      return false;
    }

    if (cols[0].startsWith("1577") || cols[1].startsWith("1588")) {
      return false;
    }

    return true;
  }

  public String[] getCleansedColumns(String[] cols) {
    String[] result = new String[3];

    result[0] = getCleansedColumn(cols[0]);
    result[1] = getCleansedColumn(cols[1]);
    result[2] = cols[2];

    return result;
  }

  public String getCleansedColumn(String col) {
    String dashRemoved = col.replace("-", "");
    if (dashRemoved.startsWith("0") == false) {
      return "02" + dashRemoved;
    }

    return dashRemoved;
  }

  public void writeRow(PrintWriter writer, String[] cols) {
    writer.println(cols[0] + delim + cols[1] + delim + cols[2]);
  }
}
TOP

Related Classes of kr.ac.yonsei.datacleanser.DataCleanserUsingJCSV

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.