package kr.ac.yonsei.datacleanser;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.Arrays;
import com.googlecode.jcsv.CSVStrategy;
import com.googlecode.jcsv.reader.CSVReader;
import com.googlecode.jcsv.reader.internal.CSVReaderBuilder;
import com.googlecode.jcsv.reader.internal.DefaultCSVEntryParser;
public class DataCleanserUsingJCSV {
private InputStream is;
private String delim;
public DataCleanserUsingJCSV(InputStream is, String delim) {
this.is = is;
this.delim = delim;
}
public void run(OutputStream out) throws IOException {
PrintWriter writer = new PrintWriter(out);
InputStreamReader reader = new InputStreamReader(is, "UTF-8");
CSVReader<String[]> csvReader = new CSVReaderBuilder<String[]>(reader).strategy(CSVStrategy.UK_DEFAULT).entryParser(new DefaultCSVEntryParser()).build();
for (String[] cols : csvReader) {
if (isValue(cols) == false) {
continue;
}
String[] cleansedCols = getCleansedColumns(cols);
writeRow(writer, cleansedCols);
}
csvReader.close();
reader.close();
writer.flush();
}
public boolean isValue(String[] cols) {
if (cols.length != 3) {
System.out.println("aaa" + cols.length + Arrays.toString(cols));
return false;
}
if (cols[0].startsWith("080") || cols[1].startsWith("080")) {
return false;
}
if (cols[0].startsWith("1577") || cols[1].startsWith("1588")) {
return false;
}
return true;
}
public String[] getCleansedColumns(String[] cols) {
String[] result = new String[3];
result[0] = getCleansedColumn(cols[0]);
result[1] = getCleansedColumn(cols[1]);
result[2] = cols[2];
return result;
}
public String getCleansedColumn(String col) {
String dashRemoved = col.replace("-", "");
if (dashRemoved.startsWith("0") == false) {
return "02" + dashRemoved;
}
return dashRemoved;
}
public void writeRow(PrintWriter writer, String[] cols) {
writer.println(cols[0] + delim + cols[1] + delim + cols[2]);
}
}