// declares: "time", "method", "event", "status", "size"
Fields apacheFields = new Fields( "ip", "time", "method", "event", "status", "size" );
String apacheRegex = "^([^ ]*) +[^ ]* +[^ ]* +\\[([^]]*)\\] +\\\"([^ ]*) ([^ ]*) [^ ]*\\\" ([^ ]*) ([^ ]*).*$";
int[] apacheGroups = {1, 2, 3, 4, 5, 6};
RegexParser parser = new RegexParser( apacheFields, apacheRegex, apacheGroups );
Pipe importPipe = new Each( "import", new Fields( "line" ), parser );
// create tap to read a resource from the local file system, if not an url for an external resource
// Lfs allows for relative paths
Tap logTap =
inputPath.matches( "^[^:]+://.*" ) ? new Hfs( new TextLine(), inputPath ) : new Lfs( new TextLine(), inputPath );
// create a tap to read/write from the default filesystem
Tap parsedLogTap = new Hfs( apacheFields, logsPath );
// connect the assembly to source and sink taps
Flow importLogFlow = flowConnector.connect( logTap, parsedLogTap, importPipe );
// create an assembly to parse out the time field into a timestamp
// then count the number of requests per second and per minute
// apply a text parser to create a timestamp with 'second' granularity
// declares field "ts"
DateParser dateParser = new DateParser( new Fields( "ts" ), "dd/MMM/yyyy:HH:mm:ss Z" );
Pipe tsPipe = new Each( "arrival rate", new Fields( "time" ), dateParser, Fields.RESULTS );
// name the per second assembly and split on tsPipe
Pipe tsCountPipe = new Pipe( "tsCount", tsPipe );
tsCountPipe = new GroupBy( tsCountPipe, new Fields( "ts" ) );
tsCountPipe = new Every( tsCountPipe, Fields.GROUP, new Count() );
// apply expression to create a timestamp with 'minute' granularity
// declares field "tm"
Pipe tmPipe = new Each( tsPipe, new ExpressionFunction( new Fields( "tm" ), "ts - (ts % (60 * 1000))", long.class ) );
// name the per minute assembly and split on tmPipe
Pipe tmCountPipe = new Pipe( "tmCount", tmPipe );
tmCountPipe = new GroupBy( tmCountPipe, new Fields( "tm" ) );
tmCountPipe = new Every( tmCountPipe, Fields.GROUP, new Count() );