String[] expectedFileContents = {
"*Champion, Mac*,*1234 Hoth St.\n\tApartment 101\n\tAtlanta, GA\n\t64086*,*30*,*M*,*5/28/2010 12:00:00 AM*,*Just some guy*",
"*Mac, Champion*,*5678 Tatooine Rd. Apt 5, Mobile, AL 36608*,*30*,*M*,*Some other date*,*short description*" };
String csvWithNewlines = tmpDir.copyResourceFileName("customQuoteCharWithNewlines.csv");
Pipeline pipeline = new MRPipeline(CSVFileSourceIT.class, tmpDir.getDefaultConfiguration());
PCollection<String> csvLines = pipeline.read(new CSVFileSource(new Path(csvWithNewlines),
CSVLineReader.DEFAULT_BUFFER_SIZE, CSVLineReader.DEFAULT_INPUT_FILE_ENCODING, '*', '*',
CSVLineReader.DEFAULT_ESCAPE_CHARACTER));
pipeline.run();
Collection<String> csvLinesList = csvLines.asCollection().getValue();
for (int i = 0; i < expectedFileContents.length; i++) {
assertTrue(csvLinesList.contains(expectedFileContents[i]));