/*
* Copyright (C) 2007-2014 Christian Bockermann <chris@jwall.org>
*
* This file is part of the web-audit library.
*
* web-audit library is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* The web-audit library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package org.jwall.log.io;
import java.io.BufferedReader;
import java.io.Serializable;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jwall.audit.EventType;
import org.jwall.log.LogMessage;
import org.jwall.log.LogMessageImpl;
import org.jwall.web.audit.io.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class GenericLogParser
implements Parser<LogMessage>, Serializable
{
/** The unique class ID */
private static final long serialVersionUID = -7311309136161789947L;
public final static String TIME_PATTERN = "\\d\\d:\\d\\d:\\d\\d";
public final static String DATE_FORMAT1 = "EEE MMM dd HH:mm:ss yyyy";
public final static String DATE_PATTERN1 = "\\[(.*?)\\]";
public final static String DATE_FORMAT2 = "[dd/MMM/yyyy:HH:mm:ss Z]";
public final static String DATE_PATTERN2 = "\\[\\d\\d/\\w{1,3}\\d{4,4}\\s\\d\\d:\\d\\d:\\d\\d\\s(.*)?\\+?.*\\]";
public static Logger log = LoggerFactory.getLogger( GenericLogParser.class );
String dateFormat = DATE_FORMAT2;
String datePattern = DATE_PATTERN2;
DateFormat fmt = new SimpleDateFormat( dateFormat, Locale.ENGLISH );
Pattern p = Pattern.compile( dateFormat );
BufferedReader reader;
Map<String,String> defaultValues = new HashMap<String,String>();
Parser<Map<String,String>> extractor = null;
public GenericLogParser(){
}
public GenericLogParser( Parser<Map<String,String>> extractor ){
this.extractor = extractor;
}
public void setDefaults( Map<String,String> defaults ){
defaultValues = new HashMap<String,String>( defaults );
}
public Map<String,String> getDefaults(){
return defaultValues;
}
public void setDefault( String key, String val ){
defaultValues.put( key, val );
}
public String getDefault( String key ){
return defaultValues.get( key );
}
public void setDateFormat( String fmt ){
dateFormat = fmt;
}
public void setDatePattern( String pattern ){
p = Pattern.compile( pattern );
}
/**
* @see org.jwall.log.io.LogReader#readNext()
*/
public LogMessage parse( String line ) throws ParseException {
if( line == null )
return null;
Long time = extractTimestamp( line );
// maybe we need to extract the source from the message? e.g. in the remote-syslog setting?
//
LogMessageImpl msg = new LogMessageImpl( EventType.GENERIC, time, "", line );
for( String key : defaultValues.keySet() ){
if( msg.get( key ) != null )
msg.set( key, defaultValues.get( key ) );
}
if( extractor != null ){
Map<String,String> features = extractor.parse( line );
for( String key : features.keySet() ){
msg.set( key, features.get( key ) );
}
}
return msg;
}
protected Long extractTimestamp( String line ){
Matcher m = p.matcher( line );
while( m.find() ){
try {
int st = m.start() + 1;
int en = m.end() - 1;
log.info( "Found date at {},{}", st, en );
String str = m.group(); //line.substring( m.start() + 1, m.end() - 1 );
log.info( " date: {}", str );
return fmt.parse( str ).getTime();
} catch (Exception e) {
}
}
return System.currentTimeMillis();
}
}