package crunch;
import static com.cloudera.crunch.type.writable.Writables.ints;
import static com.cloudera.crunch.type.writable.Writables.strings;
import static com.cloudera.crunch.type.writable.Writables.tableOf;
import java.io.IOException;
import org.junit.Test;
import com.cloudera.crunch.CombineFn;
import com.cloudera.crunch.DoFn;
import com.cloudera.crunch.Emitter;
import com.cloudera.crunch.PCollection;
import com.cloudera.crunch.PTable;
import com.cloudera.crunch.Pair;
import com.cloudera.crunch.Pipeline;
import com.cloudera.crunch.impl.mr.MRPipeline;
public class MaxTemperatureCrunchTest {
private static final int MISSING = 9999;
@Test
public void test() throws IOException {
Pipeline pipeline = new MRPipeline(MaxTemperatureCrunchTest.class);
PCollection<String> records = pipeline.readTextFile("input");
PTable<String, Integer> maxTemps = records
.parallelDo(toYearTempPairsFn(), tableOf(strings(), ints()))
.groupByKey()
.combineValues(CombineFn.<String> MAX_INTS());
pipeline.writeTextFile(maxTemps, "output");
pipeline.run();
}
private static DoFn<String, Pair<String, Integer>> toYearTempPairsFn() {
return new DoFn<String, Pair<String, Integer>>() {
@Override
public void process(String input, Emitter<Pair<String, Integer>> emitter) {
String line = input.toString();
String year = line.substring(15, 19);
int airTemperature;
if (line.charAt(87) == '+') { // parseInt doesn't like leading plus signs
airTemperature = Integer.parseInt(line.substring(88, 92));
} else {
airTemperature = Integer.parseInt(line.substring(87, 92));
}
String quality = line.substring(92, 93);
if (airTemperature != MISSING && quality.matches("[01459]")) {
emitter.emit(Pair.of(year, airTemperature));
}
}
};
}
}