package com.manning.hip.ch11;
import com.maxmind.geoip.LookupService;
import org.apache.pig.EvalFunc;
import org.apache.pig.FuncSpec;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* a = load '1.txt';
* DEFINE GeoIP com.manning.hip.ch11.TypedCommonLogLoader("/tmp/GeoIP.dat");
* b = foreach a generate GeoIP(*);
* dump b;
*/
public class PigGeolocationUDF extends EvalFunc<String> {
private LookupService geoloc;
private static final String COUNTRY = "country";
private final static String DIST_CACHE_GEOIP_NAME = "geoip";
public String exec(Tuple input) throws IOException {
if (input == null || input.size() == 0) {
return null;
}
Object object = input.get(0);
if (object == null) {
return null;
}
String ip = (String) object;
return lookup(ip);
}
protected String lookup(String ip) throws IOException {
if (geoloc == null) {
geoloc =
new LookupService("./" + DIST_CACHE_GEOIP_NAME, LookupService.GEOIP_MEMORY_CACHE);
}
String country = geoloc.getCountry(ip).getName();
if ("N/A".equals(country)) {
return null;
}
return country;
}
@Override
public List<FuncSpec> getArgToFuncMapping()
throws FrontendException {
List<FuncSpec> funcList = new ArrayList<FuncSpec>();
funcList.add(new FuncSpec(this.getClass().getName(),
new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY))));
funcList.add(new FuncSpec(PigLongGeolocationUDF.class.getName(),
new Schema(new Schema.FieldSchema(null, DataType.LONG))));
return funcList;
}
@Override
public Schema outputSchema(Schema input) {
return new Schema(
new Schema.FieldSchema(COUNTRY, DataType.CHARARRAY));
}
}