Package org.dspace.statistics.util

Source Code of org.dspace.statistics.util.ClassicDSpaceLogConverter

/*
* ClassicDSpaceLogConverter.java
*
* Version: $Revision:$
*
* Date: $Date:$
*
* Copyright (c) 2002-2010, The DSpace Foundation.  All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the DSpace Foundation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/

package org.dspace.statistics.util;

import org.apache.commons.cli.*;
import org.dspace.app.statistics.LogAnalyser;
import org.dspace.app.statistics.LogLine;
import org.dspace.content.*;
import org.dspace.handle.HandleManager;
import org.dspace.core.Context;

import java.io.*;
import java.sql.SQLException;
import java.util.Hashtable;
import java.util.Vector;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.text.SimpleDateFormat;
import java.text.ParsePosition;

/**
* A utility class to convert the classic dspace.log (as generated
* by log4j) files into an intermediate format for ingestion into
* the new solr stats.
*
* @author Stuart Lewis
*/
public class ClassicDSpaceLogConverter {

    /** A DSpace context */
    private Context context;

    /** Whether or not to provide verbose output */
    private boolean verbose = false;

    /** Whether to include actions logged by org.dspace.usage.LoggerUsageEventListener */
    private boolean newEvents = false;

    /** A regular expression for extracting the IP address from a log line */
    private Pattern ipaddrPattern = Pattern.compile("ip_addr=(\\d*\\.\\d*\\.\\d*\\.\\d*):");

    /** Date format (in) from the log line */
    private SimpleDateFormat dateFormatIn = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    /** Date format out (for solr) */
    private SimpleDateFormat dateFormatOut = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");

    /** Date format (in) from the log line for the UID */
    private SimpleDateFormat dateFormatInUID = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");

    /** Date format out (for uid) */
    private SimpleDateFormat dateFormatOutUID = new SimpleDateFormat("yyyyMMddHHmmssSSS");


    /**
     * Create an instance of the converter utility
     *
     * @param c The context
     * @param v Whether or not to provide verbose output
     * @param nE Whether to include actions logged by org.dspace.usage.LoggerUsageEventListener
     */
    public ClassicDSpaceLogConverter(Context c, boolean v, boolean nE)
    {
        // Set up some variables
        context = c;
        verbose = v;
        newEvents = nE;
    }

    /**
     * Convert a classic log file
     *
     * @param in The filename to read from
     * @param out The filename to write to
     * @return The number of lines processed
     */
    public int convert(String in, String out)
    {
        // Line counter
        int counter = 0;
        int lines = 0;

        // Say what we're going to do
        System.out.println(" About to convert '" + in + "' to '" + out + "'");

        // Setup the regular expressions for the log file
        LogAnalyser.setRegex(in);

        // Open the file and read it line by line
        try
        {
            BufferedReader input =  new BufferedReader(new FileReader(new File(in)));
            Writer output = new BufferedWriter(new FileWriter(new File(out)));

            try {
                String line;
                LogLine lline;
                String lout;
                String id;
                String handle;
                String ip;
                String date;
                DSpaceObject dso;
                String uid;
                String lastLine = "";
                while ((line = input.readLine()) != null)
                {
                    // Read inthe line and covnert it to a LogLine
                    lines++;
                    if (verbose) System.out.println("  - IN: " + line);
                    lline = LogAnalyser.getLogLine(line);

                    // Get rid of any lines that aren't INFO
                    if ((lline == null) || (!lline.isLevel("INFO")))
                    {
                        if (verbose) System.out.println("   - IGNORED!");
                        continue;
                    }

                    // Get the IP address of the user
                    Matcher matcher = ipaddrPattern.matcher(line);
                    if (matcher.find())
                    {
                        ip = matcher.group(1);
                    }
                    else
                    {
                        ip = "unknown";
                    }

                    // Get and format the date
                    // We can use lline.getDate() as this strips the time element
                    date = dateFormatOut.format(
                           dateFormatIn.parse(line.substring(0, line.indexOf(',')),
                                                 new ParsePosition(0)));

                    // Generate a UID for the log line
                    // - based on the date/time
                    uid = dateFormatOutUID.format(
                           dateFormatInUID.parse(line.substring(0, line.indexOf(' ', line.indexOf(' ') + 1)),
                                                 new ParsePosition(0)));

                    try
                    {
                        // What sort of view is it?
                        // (ignore lines from org.dspace.usage.LoggerUsageEventListener which is 1.6 code)
                        if ((lline.getAction().equals("view_bitstream")) &&
                            (!lline.getParams().contains("invalid_bitstream_id")) &&
                            (!lline.getParams().contains("withdrawn")) &&
                            ((!line.contains("org.dspace.usage.LoggerUsageEventListener")) || newEvents))
                        {
                            id = lline.getParams().substring(13);
                        }
                        else if ((lline.getAction().equals("view_item")) &&
                                ((!line.contains("org.dspace.usage.LoggerUsageEventListener")) || newEvents))
                        {
                            handle = lline.getParams().substring(7);
                            dso = HandleManager.resolveToObject(context, handle);
                            id = "" + dso.getID();
                        }
                        else if ((lline.getAction().equals("view_collection")) &&
                                 ((!line.contains("org.dspace.usage.LoggerUsageEventListener")) || newEvents))
                        {
                            id = lline.getParams().substring(14);
                        }
                        else if ((lline.getAction().equals("view_community")) &&
                                 ((!line.contains("org.dspace.usage.LoggerUsageEventListener")) || newEvents))
                        {
                            id = lline.getParams().substring(13);
                        }
                        else
                        {
                            //if (verbose) System.out.println("   - IGNORED!");
                            continue;
                        }

                        // Construct the log line
                        lout = uid + "," +
                               lline.getAction() + "," +
                               id + "," +
                               date + "," +
                               lline.getUser() + "," +
                               ip + "\n";
                    }
                    catch (Exception e)
                    {
                        if (verbose) System.out.println("  - IN: " + line);
                        if (verbose) System.err.println("Error with log line! " + e.getMessage());
                        continue;
                    }

                    if ((verbose) && (!"".equals(lout)))
                    {
                        System.out.println("  - IN: " + line);
                        System.out.println("  - OUT: " + lout);
                    }

                    // Write the output line
                    if ((!"".equals(lout)) && (!lout.equals(lastLine)))
                    {
                        output.write(lout);
                        counter++;
                        lastLine = lout;
                    }
                }
            }
            finally
            {
                // Clean up the input and output streams
                input.close();
                output.flush();
                output.close();
            }
        }
        catch (Exception ex)
        {
            ex.printStackTrace();
        }

        // Tell the user what we have done
        System.out.println("  Read " + lines + " lines and recorded " + counter + " events");
        return counter;
    }

    /**
     * Print the help message
     *
     * @param options The command line options the user gave
     * @param exitCode the system exit code to use
     */
    private static void printHelp(Options options, int exitCode)
    {
        // print the help message
        HelpFormatter myhelp = new HelpFormatter();
        myhelp.printHelp("ClassicDSpaceLogConverter\n", options);
        System.out.println("\n\tClassicDSpaceLogConverter -i infilename -o outfilename -v (for verbose output)");
        System.exit(exitCode);
    }

    /**
     * Main method to execute the converter
     *
     * @param args CLI args
     */
    public static void main(String[] args)
    {
        CommandLineParser parser = new PosixParser();

        Options options = new Options();

        options.addOption("i", "in", true, "source file");
        options.addOption("o", "out", true, "destination directory");
        options.addOption("m", "multiple",false, "treat the input file as having a wildcard ending");
        options.addOption("n", "newformat",false, "process new format log lines (1.6+)");
        options.addOption("v", "verbose", false, "display verbose output (useful for debugging)");
        options.addOption("h", "help", false, "help");

        // Parse the command line arguments
        CommandLine line;
        try
        {
            line = parser.parse(options, args);
        }
        catch (ParseException pe)
        {
            System.err.println("Error parsing command line arguments: " + pe.getMessage());
            System.exit(1);
            return;
        }

        // Did the user ask to see the help?
        if (line.hasOption('h'))
        {
            printHelp(options, 0);
        }

        // Check we have an input and output file
        if ((!line.hasOption('i')) && (!line.hasOption('o')))
        {
            System.err.println("-i and -o input and output file names are required");
            printHelp(options, 1);
        }
        else if (!line.hasOption('i'))
        {
            System.err.println("-i input file name is required");
            printHelp(options, 1);
        }

        if (!line.hasOption('o'))
        {
            System.err.println("-o output file names is required");
            printHelp(options, 1);
        }

        // Whether or not to include event created by org.dspace.usage.LoggerUsageEventListener
        boolean newEvents = line.hasOption('n');

        // Create a copy of the converter
        Context context = null;
        try
        {
            context = new Context();
            context.turnOffAuthorisationSystem();
        }
        catch (SQLException sqle)
        {
            System.err.println("Unable to create DSpace context: " + sqle.getMessage());
            System.exit(1);
        }

        ClassicDSpaceLogConverter converter = new ClassicDSpaceLogConverter(context,
                                                                            line.hasOption('v'),
                                                                            newEvents);

        // Set up the log analyser
        try
        {
            LogAnalyser.readConfig(LogAnalyser.configFile);
        }
        catch (IOException ioe)
        {
            System.err.println("Unable to read config file: " + LogAnalyser.configFile);
            System.exit(1);
        }

        // Are we converting multiple files?
        if (line.hasOption('m'))
        {
            // Convert all the files
            final File sample = new File(line.getOptionValue('i'));
            File dir = sample.getParentFile();
            FilenameFilter filter = new FilenameFilter()
            {
                public boolean accept(File dir, String name)
                {
                    return name.startsWith(sample.getName());
                }
            };
            String[] children = dir.list(filter);
            for (String in : children)
            {
                System.out.println(in);
                String out = line.getOptionValue('o') +
                             (dir.getAbsolutePath() +
                              System.getProperty("file.separator") + in).substring(line.getOptionValue('i').length());
                int count = converter.convert(dir.getAbsolutePath() + System.getProperty("file.separator") + in,
                                              out);
            }
        }
        else
        {
            // Just convert the one file
            int count = converter.convert(line.getOptionValue('i'),
                                          line.getOptionValue('o'));
        }

        // Clean everything up
        context.restoreAuthSystemState();
        context.abort();
    }
}
TOP

Related Classes of org.dspace.statistics.util.ClassicDSpaceLogConverter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.