@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException
{
Path outputPath = getOutputPath(context);
if (outputPath == null)
throw new KafkaException("no kafka output url specified");
URI uri = URI.create(outputPath.toString());
Configuration job = context.getConfiguration();
Properties props = new Properties();
String topic;
props.putAll(kafkaConfigMap); // inject default configuration
for (Map.Entry<String, String> m : job) { // handle any overrides
if (!m.getKey().startsWith(KAFKA_CONFIG_PREFIX))
continue;
if (m.getKey().equals(KAFKA_URL))
continue;
String kafkaKeyName = m.getKey().substring(KAFKA_CONFIG_PREFIX.length()+1);
props.setProperty(kafkaKeyName, m.getValue()); // set Kafka producer property
}
// inject Kafka producer props back into jobconf for easier debugging
for (Map.Entry<Object, Object> m : props.entrySet()) {
job.set(KAFKA_CONFIG_PREFIX + "." + m.getKey().toString(), m.getValue().toString());
}
// KafkaOutputFormat specific parameters
final int queueBytes = job.getInt(KAFKA_CONFIG_PREFIX + ".queue.bytes", KAFKA_QUEUE_BYTES);
if (uri.getScheme().equals("kafka")) {
// using the direct broker list
// URL: kafka://<kafka host>/<topic>
// e.g. kafka://kafka-server:9000,kafka-server2:9000/foobar
String brokerList = uri.getAuthority();
props.setProperty("metadata.broker.list", brokerList);
job.set(KAFKA_CONFIG_PREFIX + ".metadata.broker.list", brokerList);
if (uri.getPath() == null || uri.getPath().length() <= 1)
throw new KafkaException("no topic specified in kafka uri");
topic = uri.getPath().substring(1); // ignore the initial '/' in the path
job.set(KAFKA_CONFIG_PREFIX + ".topic", topic);
log.info(String.format("using kafka broker %s (topic %s)", brokerList, topic));
} else
throw new KafkaException("missing scheme from kafka uri (must be kafka://)");
Producer<Object, byte[]> producer = new Producer<Object, byte[]>(new ProducerConfig(props));
return new KafkaRecordWriter<K, V>(producer, topic, queueBytes);
}