/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.handlers.text;
import java.io.IOException;
import java.io.OutputStream;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventImpl;
import com.cloudera.flume.core.Event.Priority;
import com.cloudera.flume.handlers.text.FormatFactory.OutputFormatBuilder;
import com.cloudera.flume.handlers.text.output.AbstractOutputFormat;
import com.cloudera.flume.handlers.text.output.OutputFormat;
import com.cloudera.util.Clock;
import com.google.common.base.Preconditions;
/**
* This extracts values from a single text syslog line. The extract function
* needs to have a year specified because the syslog format does not specify a
* year!
*/
public class SyslogEntryFormat extends AbstractOutputFormat implements
InputFormat {
final static Pattern SYSLOG_PAT = Pattern
.compile("(\\S{3} \\d{1,2} \\d{2}:\\d{2}:\\d{2}) (\\S+) ([^:]*?)(:(.*))?");
private static final String NAME = "syslog";
// Not static because of concurrency bug in JDK on static DateFormats
// see: http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6231579
final DateFormat SYSLOG_DF = new SimpleDateFormat("MMM dd HH:mm:ss");
int year;
public SyslogEntryFormat() {
this(Calendar.getInstance().get(Calendar.YEAR));
}
public SyslogEntryFormat(int year) {
this.year = year;
}
public Event extract(String s, int year) throws EventExtractException {
Matcher m = SYSLOG_PAT.matcher(s);
if (!m.matches())
throw new EventExtractException("Does not match syslog format! " + s);
String date = m.group(1);
Date d = null;
try {
d = SYSLOG_DF.parse(date);
} catch (ParseException e) {
throw new EventExtractException("Invalid date format: " + date);
}
Calendar c = Calendar.getInstance();
c.setTime(d);
c.set(Calendar.YEAR, year);
d = c.getTime();
String host = m.group(2);
// TODO(jon) body should be the raw entry, and a new field should be
// created for this instead.
String body = m.group(5); // TODO (jon) make this another field
String service = m.group(3);
if (body == null || body.length() == 0) {
// body = service;
service = null;
}
Map<String, byte[]> fields = new HashMap<String, byte[]>();
if (service != null)
fields.put("service", service.getBytes());
// Event e = new EventImpl(body.getBytes(), d.getTime(), Priority.INFO,
Event e = new EventImpl(s.getBytes(), d.getTime(), Priority.INFO,
Clock.nanos(), host, fields);
return e;
}
@Override
public Event extract(String s) throws EventExtractException {
return extract(s, this.year);
}
/**
* This outputs a single line log entry similar to that generate by
* syslog/syslog-ng.
*
* It is generally in the form:
*
* <date> <sourcehost> <service>: <message body>
*
* Here is an example:
*
* Aug 21 08:02:39 soundwave NetworkManager: <info> (wlan0): supplicant
*
*/
private String format(Event e) {
StringBuilder b = new StringBuilder();
b.append(SYSLOG_DF.format(new Date(e.getTimestamp())));
b.append(" ");
b.append(e.getHost());
b.append(" ");
byte[] svc = e.get("service");
if (svc != null) {
b.append(new String(svc));
b.append(": ");
}
b.append(new String(e.getBody()));
b.append("\n");
return b.toString();
}
@Override
public void format(OutputStream o, Event e) throws IOException {
o.write(format(e).getBytes());
}
public static OutputFormatBuilder builder() {
return new OutputFormatBuilder() {
@Override
public OutputFormat build(String... args) {
Preconditions.checkArgument(args.length <= 1,
"usage: syslogEntry[(year)]");
int year = Calendar.getInstance().get(Calendar.YEAR);
if (args.length >= 1) {
year = Integer.parseInt(args[0]);
}
OutputFormat format = new SyslogEntryFormat(year);
format.setBuilder(this);
return format;
}
@Override
public String getName() {
return NAME;
}
};
}
}