Package edu.brown.benchmark.wikipedia.util

Source Code of edu.brown.benchmark.wikipedia.util.WikipediaUtil

/*******************************************************************************
* oltpbenchmark.com
*  Project Info:  http://oltpbenchmark.com
*  Project Members:  Carlo Curino <carlo.curino@gmail.com>
*                               Evan Jones <ej@evanjones.ca>
*         DIFALLAH Djellel Eddine <djelleleddine.difallah@unifr.ch>
*         Andy Pavlo <pavlo@cs.brown.edu>
*         CUDRE-MAUROUX Philippe <philippe.cudre-mauroux@unifr.ch>
*         Yang Zhang <yaaang@gmail.com>
*
*
*  This library is free software; you can redistribute it and/or modify it under the terms
*  of the GNU General Public License as published by the Free Software Foundation;
*  either version 3.0 of the License, or (at your option) any later version.
*
*  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
*  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*  See the GNU Lesser General Public License for more details.
******************************************************************************/
package edu.brown.benchmark.wikipedia.util;

import java.util.Random;

import edu.brown.benchmark.wikipedia.WikipediaConstants;
import edu.brown.benchmark.wikipedia.data.PageHistograms;
import edu.brown.benchmark.wikipedia.data.RevisionHistograms;
import edu.brown.benchmark.wikipedia.data.UserHistograms;
import edu.brown.rand.RandomDistribution.FlatHistogram;
import edu.brown.rand.RandomDistribution.Zipf;

/**
* Helper class that contains all of the useful information about the benchmark database
* @author pavlo
*/
public final class WikipediaUtil {
 
    private final Random rand;
    private final double scaleFactor;
   
    public final int num_users;
    public final int num_pages;
   
    public final FlatHistogram<Integer> h_nameLength;
    public final FlatHistogram<Integer> h_realNameLength;
    public final FlatHistogram<Integer> h_revCount;
    public final FlatHistogram<Integer> h_titleLength;
    public final FlatHistogram<String> h_restrictions;
    public final Zipf h_watchPageCount;
    public final Zipf h_watchPageId;
    public final FlatHistogram<Integer> h_commentLength;
    public final FlatHistogram<Integer> h_minorEdit;

    /**
     * Constructor
     * @param rand
     * @param scaleFactor
     */
    public WikipediaUtil(Random rand, double scaleFactor) {
        this.rand = rand;
        this.scaleFactor = scaleFactor;
       
        this.num_users = (int) Math.round(WikipediaConstants.USERS * this.scaleFactor);
        this.num_pages = (int) Math.round(WikipediaConstants.PAGES * this.scaleFactor);
       
        this.h_nameLength = new FlatHistogram<Integer>(this.rand, UserHistograms.NAME_LENGTH);
        this.h_realNameLength = new FlatHistogram<Integer>(this.rand, UserHistograms.REAL_NAME_LENGTH);
        this.h_revCount = new FlatHistogram<Integer>(this.rand, UserHistograms.REVISION_COUNT);
        this.h_titleLength = new FlatHistogram<Integer>(this.rand, PageHistograms.TITLE_LENGTH);
        this.h_restrictions = new FlatHistogram<String>(this.rand, PageHistograms.RESTRICTIONS);
        this.h_watchPageCount = new Zipf(this.rand, 0, this.num_pages, WikipediaConstants.NUM_WATCHES_PER_USER_SIGMA);
        this.h_watchPageId = new Zipf(this.rand, 1, this.num_pages, WikipediaConstants.WATCHLIST_PAGE_SIGMA);
        this.h_commentLength = new FlatHistogram<Integer>(this.rand, RevisionHistograms.COMMENT_LENGTH);
        this.h_minorEdit = new FlatHistogram<Integer>(this.rand, RevisionHistograms.MINOR_EDIT);
    }
 
    /**
     * Return the computed namespace for the given pageId
     * @param pageId
     * @return
     */
    public int getPageNameSpace(long pageId) {
        return (0);
    }
   
 
  /**
     *
     * @param orig_text
     * @return
     */
    public char[] generateRevisionText(char orig_text[]) {
        Random randGenerator = new Random();
      
        @SuppressWarnings("unchecked")
        FlatHistogram<Integer> revisionDeltas[] = (FlatHistogram<Integer>[])new FlatHistogram [RevisionHistograms.REVISION_DELTA_SIZES.length];
       
        for (int i = 0; i < revisionDeltas.length; i++) {
            revisionDeltas[i] = new FlatHistogram<Integer>(randGenerator, RevisionHistograms.REVISION_DELTAS[i]);
        } // FOR
        // Figure out how much we are going to change
        // If the delta is greater than the length of the original
        // text, then we will just cut our length in half. Where is your god now?
        // There is probably some sort of minimal size that we should adhere to, but
        // it's 12:30am and I simply don't feel like dealing with that now
        FlatHistogram<Integer> h = null;
        for (int i = 0; i < revisionDeltas.length-1; i++) {
            if (orig_text.length <= RevisionHistograms.REVISION_DELTA_SIZES[i]) {
                h = revisionDeltas[i];
            }
        } // FOR
        if (h == null) h = revisionDeltas[revisionDeltas.length-1];
        assert(h != null);
       
        int delta = h.nextValue().intValue();
        if (orig_text.length + delta <= 0) {
            delta = -1 * (int)Math.round(orig_text.length / 1.5);
            if (Math.abs(delta) == orig_text.length && delta < 0) delta /= 2;
        }
        if (delta != 0) orig_text = TextGenerator.resizeText(randGenerator, orig_text, delta);
       
        // And permute it a little bit. This ensures that the text is slightly
        // different than the last revision
        orig_text = TextGenerator.permuteText(randGenerator, orig_text);
       
        return (orig_text);
    }
 
}
TOP

Related Classes of edu.brown.benchmark.wikipedia.util.WikipediaUtil

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.