Package com.flaptor.hounder.loganalysis

Source Code of com.flaptor.hounder.loganalysis.ReportTest

/*
Copyright 2008 Flaptor (flaptor.com)

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package com.flaptor.hounder.loganalysis;

import java.io.File;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;

import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;

import com.flaptor.util.Config;
import com.flaptor.util.EmbeddedSqlServer;
import com.flaptor.util.Execute;
import com.flaptor.util.FileUtil;
import com.flaptor.util.Pair;
import com.flaptor.util.TestCase;
import com.flaptor.util.TestInfo;
import com.flaptor.util.TestUtils;
/**
* @author Flaptor Development Team
*/
public class ReportTest extends TestCase {

    Logger logger = Logger.getLogger(Execute.whoAmI());
    Random rnd = null;
    Config config;
    String tmpDir;
    EmbeddedSqlServer server;
    Connection con;




    // counters used to accumulate the info used for report verification
    HashMap<String,Integer> queryMap = null;
    HashMap<String,Integer> unclickedQueryMap = null;
    HashMap<String,Integer> unclickedResultMap = null;
    HashMap<String,ArrayList<Integer>> clickedResultMap = null;
    HashMap<String,SessionData> sessionMap = null;

    @Override
    public void setUp() throws Exception {
        String log4jConfigPath = com.flaptor.util.FileUtil.getFilePathFromClasspath("log4j.properties");
        if (null != log4jConfigPath) {
            PropertyConfigurator.configureAndWatch(log4jConfigPath);
        } else {
            System.err.println("log4j.properties not found on classpath!");
        }
//        rnd = new Random(System.currentTimeMillis());
        rnd = new Random(0);
        tmpDir = FileUtil.createTempDir("reporttest",".tmp").getAbsolutePath();

        server = new EmbeddedSqlServer(new File("src/com/flaptor/hounder/loganalysis/tables.sql"), "hsql");
        con = server.getConnection();
        config = Config.getConfig("loganalysis.properties");
        config.set("database.driver",server.getDriverSpec());
        config.set("database.url",server.getDBUrl());
        config.set("database.user",server.getUser());
        config.set("database.pass",server.getPass());

        queryMap = new HashMap<String,Integer>();
        unclickedQueryMap = new HashMap<String,Integer>();
        unclickedResultMap = new HashMap<String,Integer>();
        clickedResultMap = new HashMap<String,ArrayList<Integer>>();
        sessionMap = new HashMap<String,SessionData>();

        // create test data
        int loglen = 50;
        int maxQueries = 30;
        int maxResults = 50;
        int maxSources = 10;
        int maxCategories = 3;
        int maxTags = 5;
        String[] queries = new String[maxQueries];
        String[] results = new String[maxResults];
        String[] sources = new String[maxSources];
        for (int i=0; i<maxQueries; i++) {
            queries[i] = TestUtils.randomText(1,3).trim();
        }
        for (int i=0; i<maxResults; i++) {
            results[i] = TestUtils.randomText(1,4).trim();
        }
        for (int i=0; i<maxSources; i++) {
            sources[i] = rnd.nextInt(256)+"."+rnd.nextInt(256)+"."+rnd.nextInt(256)+"."+rnd.nextInt(256);
        }

        // fill tables with test data
        int catId = 10;
        int tagId = 10;
        for (int i=0; i<maxCategories; i++) { // add tag types
          PreparedStatement prep = con.prepareStatement("insert into tag_types (type_id, name) values (?,?)");
          prep.setInt(1, catId);
          prep.setString(2, "category "+catId);
          prep.executeUpdate();
          for (int j=0; j<maxTags; j++) { // add tags of that type
            prep = con.prepareStatement("insert into tags (tag_id, type_id, name) values (?,?,?)");
            prep.setInt(1, tagId);
            prep.setInt(2, catId);
            prep.setString(3, "tag " + tagId);
                prep.executeUpdate();
                tagId++;
            }
            catId++;
        }

        long time = System.currentTimeMillis();
        int rid = 1;
        for (int qid=1; qid<=loglen; qid++) {
            // add query data
            time += (rnd.nextFloat() < 0.2f) ? 1000*60*60 : rnd.nextInt(1000*60); // usually less than a minute and a few 1 hour jumps
            Timestamp ts = new Timestamp(time);
            String query = queries[rnd.nextInt(maxQueries)];
            String ip = sources[rnd.nextInt(maxSources)];
            String sql = "insert into queries (query_id,query,ip,time) values ("+qid+",'"+query+"','"+ip+"','"+ts+"');";
            PreparedStatement prep = con.prepareStatement(sql);
            prep.executeUpdate();
            countData(queryMap, query);
            addSessionData(sessionMap, ip, time, query);
            // add results data
            boolean click = rnd.nextBoolean();
            int dist = 1+rnd.nextInt(20); // position of clicked result
            int first = (dist > 3) ? 3 : dist;
            for (int f=1; f<=first; f++) { // add first unclicked results
                String result = results[rnd.nextInt(maxResults)];
                sql = "insert into results (result_id,query_id,link,clicked,distance) values ("+rid+","+qid+",'"+result+"',0,"+f+");";
                prep = con.prepareStatement(sql);
                prep.executeUpdate();
                countData(unclickedResultMap, result);
                addTagData(rid,maxCategories*maxTags);
                rid++;
            }
            if (click) { // add clicked result
                String result = results[rnd.nextInt(maxResults)];
                sql = "insert into results (result_id,query_id,link,clicked,distance) values ("+rid+","+qid+",'"+result+"',1,"+dist+");";
                prep = con.prepareStatement(sql);
                prep.executeUpdate();
                addData(clickedResultMap, result, dist);
                addTagData(rid,maxCategories*maxTags);
                rid++;
            } else {
                countData(unclickedQueryMap, query);
            }
        }

/* for debugging
        showTable("queries");
        showTable("results");
        showTable("tag_lists");
        showTable("tags");
        showTable("tag_types");
*/

        // disconnect from server
        con.close();
    }


    private void addTagData(int rid, int maxTagId) throws Exception {
        int tag_count = 1+rnd.nextInt(2);
        for (int t=0; t<tag_count; t++) { // add result-tag relationships
            int tagId = 10+rnd.nextInt(maxTagId);
            String sql = "insert into tag_lists (result_id,tag_id) values ("+rid+","+tagId+");";
            PreparedStatement prep = con.prepareStatement(sql);
            prep.executeUpdate();
        }
    }

    public void tearDown() {
        server.stop();
        FileUtil.deleteDir(tmpDir);
    }



    private void showTable(String tableName) throws Exception {
        System.out.println("\nTABLE: "+tableName);
        String sql = "select * from "+tableName+";";
        PreparedStatement prep = con.prepareStatement(sql);
        ResultSet rs = prep.executeQuery();
        ResultSetMetaData md = rs.getMetaData();
        int colCount = md.getColumnCount();
        String line = "---------------------------------------------------------------------------------------";
        line = line + line + line;
        int totalWidth = 0;
        int limit = 30;
        String title = "";
        String sep = " | ";
        for (int c=1; c<=colCount; c++) {
            String colName = md.getColumnName(c);
            int width = md.getColumnDisplaySize(c);
            if (width > limit) width = limit;
            String out = String.format("%"+width+"S",colName);
            if (out.length() > limit) out = out.substring(0,limit);
            title += sep+out;
            totalWidth += sep.length()+out.length();
        }
        title += sep;
        totalWidth += sep.length()-2;
        System.out.println(" "+line.substring(0,totalWidth));
        System.out.println(title);
        System.out.println(" "+line.substring(0,totalWidth));
        while (rs.next()) {
            for (int c=1; c<=colCount; c++) {
                int width = md.getColumnDisplaySize(c);
                if (width > limit) width = limit;
                String data = rs.getString(c);
                String out = String.format("%"+width+"s",data);
                if (out.length() > limit) out = out.substring(0,limit);
                System.out.print(sep+out);
            }
            System.out.println(sep);
        }
        System.out.println(" "+line.substring(0,totalWidth));
        rs.close();
    }


    // accumulate data
    private void countData(HashMap<String,Integer> dataMap, String text) {
        int count = (dataMap.containsKey(text)) ? dataMap.get(text) : 0;
        dataMap.put(text,count+1);
    }

    // store data
    private void addData(HashMap<String,ArrayList<Integer>> dataMap, String text, int num) {
        ArrayList<Integer> data = dataMap.containsKey(text) ? dataMap.get(text) : new ArrayList<Integer>();
        data.add(num);
        dataMap.put(text,data);
    }


    // struct for storing query data
    private class QueryData {
        int sessionId;
        String query;
        public QueryData(int sessionId, String query) {
            this.sessionId = sessionId;
            this.query = query;
        }
    }

    // struct for storing session data
    private class SessionData {
        long lastTime;
        ArrayList<QueryData> queries;
        public SessionData(long lastTime) {
            this.lastTime = lastTime;
            queries = new ArrayList<QueryData>();
        }
    }

    // store session data
    private void addSessionData(HashMap<String,SessionData> dataMap, String ip, long time, String query) {
        int sessionId = 1;
        SessionData data = null;
        if (dataMap.containsKey(ip)) {
            data = dataMap.get(ip);
            sessionId = data.queries.get(data.queries.size()-1).sessionId;
            if (time - data.lastTime > 30*60*1000) sessionId++;
            data.lastTime = time;
        } else {
            data = new SessionData(time);
            dataMap.put(ip, data);
        }
        data.queries.add(new QueryData(sessionId,query));
    }


    // extract the N most common query sequences
    private ArrayList<Pair<Integer,String>> filterSessionData(HashMap<String,SessionData> dataMap, int n, int order) {
        HashMap<String,Integer> sequenceMap = new HashMap<String,Integer>();
        for (SessionData sessionData : dataMap.values()) {
            String seq = "";
            int lastId = 0;
            int len = 0;
            for (QueryData queryData : sessionData.queries) {
                if (queryData.sessionId == lastId) {
                    seq += " --> " + queryData.query; // TODO: fix this, it should be a struct
                    len++;
                } else {
                    if (len > 1) {
                        countData(sequenceMap, seq);
                    }
                    lastId = queryData.sessionId;
                    seq = queryData.query;
                    len = 1;
                }
            }
            if (len > 1) {
                countData(sequenceMap, seq);
            }
        }
        return filterData(sequenceMap,n,order);
    }


    // sort the data by the number stored and returns the first N items.
    private ArrayList<Pair<Integer,String>> filterData(HashMap<String,Integer> dataMap, int n, final int order) {
        ArrayList<Pair<Integer,String>> dataArray = new ArrayList<Pair<Integer,String>>();
        for (Map.Entry<String,Integer> entry : dataMap.entrySet()) {
            dataArray.add(new Pair<Integer,String>(entry.getValue(),entry.getKey()));
        }
        Collections.sort(dataArray, new Comparator<Pair<Integer,String>>(){
                    public int compare(Pair<Integer,String> o1, Pair<Integer,String> o2) {
                        int cmp = order * o1.first().compareTo(o2.first());
                        if (0 == cmp) cmp = o1.last().compareTo(o2.last());
                        return cmp;
                    }
                }
            );
        while (dataArray.size() > n) {
            dataArray.remove(dataArray.size()-1);
        }
        return dataArray;
    }

    // same as above but for averaged data
    private ArrayList<Pair<Integer,String>> filterAvgData(HashMap<String,ArrayList<Integer>> dataMap, int n, final int order) {
        HashMap<String,Integer> avgMap = new HashMap<String,Integer>();
        for (Map.Entry<String,ArrayList<Integer>> entry : dataMap.entrySet()) {
            int avg = 0;
            ArrayList<Integer> data = entry.getValue();
            if (data.size() > 0) {
                for (int num : data) {
                    avg += num;
                }
                avg /= data.size();
            }
            avgMap.put(entry.getKey(), avg);
        }
        return filterData(avgMap, n, order);
       
    }

    private void show(String msg, ArrayList<Pair<Integer,String>> list) {
        System.out.println("\n"+msg);
        for (Pair<Integer,String> item : list) {
            System.out.println(item.first()+" "+item.last());
        }
        System.out.println("-----------");
    }

    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testReports() throws Exception {
        ArrayList<Pair<Integer,String>> report, control;

        // test the mostSearchedQueries report
        report = Report.mostSearchedQueries(10, null, null, null);
        control = filterData(queryMap,10,-1);
//show("mostSearchedQueries",report);
        assertTrue("mostSearchedQueries report did not match the expected results", report.equals(control));

        // test the worstPlacedResults report
        report = Report.worstPlacedResults(10, null, null, null);
        control = filterAvgData(clickedResultMap,10,-1);
//show("worstPlacedResults",report);
        assertTrue("worstPlacedResults report did not match the expected results", report.equals(control));
       
        // test the bestPlacedSpam report
        report = Report.bestPlacedSpam(10, null, null, null);
        control = filterData(unclickedResultMap,10,-1);
//show("bestPlacedSpam",report);
        assertTrue("bestPlacedSpam report did not match the expected results", report.equals(control));
       
        // test the mostUnsuccessfulQueries report
        report = Report.mostUnsuccessfulQueries(10, null, null, null);
        control = filterData(unclickedQueryMap,10,-1);
//show("mostUnsuccessfulQueries",report);
        assertTrue("mostUnsuccessfulQueries report did not match the expected results", report.equals(control));

        // test the mostCommonQuerySequences report
        report = Report.mostCommonQuerySequences(10, null, null, null);
        control = filterSessionData(sessionMap,10,-1);
//show("mostCommonQuerySequences",report);
        assertTrue("mostCommonQuerySequences report did not match the expected results", report.equals(control));
    }

}
TOP

Related Classes of com.flaptor.hounder.loganalysis.ReportTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.