Package lupos.datastructures.sort

Source Code of lupos.datastructures.sort.Sort

/**
* Copyright (c) 2013, Institute of Information Systems (Sven Groppe and contributors of LUPOSDATE), University of Luebeck
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
* following conditions are met:
*
*   - Redistributions of source code must retain the above copyright notice, this list of conditions and the following
*     disclaimer.
*   - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
*     following disclaimer in the documentation and/or other materials provided with the distribution.
*   - Neither the name of the University of Luebeck nor the names of its contributors may be used to endorse or promote
*     products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package lupos.datastructures.sort;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.util.Arrays;
import java.util.Date;
import java.util.Iterator;

import lupos.datastructures.dbmergesortedds.DBMergeSortedBag;
import lupos.datastructures.dbmergesortedds.DiskCollection;
import lupos.datastructures.sort.run.Run;
import lupos.datastructures.sort.run.SORTTYPE;
import lupos.datastructures.sort.sorter.ExternalParallelSorter;
import lupos.datastructures.sort.sorter.ExternalSorter;
import lupos.datastructures.sort.sorter.ReplacementSelectionSorter;
import lupos.datastructures.sort.sorter.Sorter;
import lupos.engine.evaluators.QueryEvaluator;
import lupos.misc.TimeInterval;

public class Sort {
  public enum SORTER {
    PARALLEL {

      @Override
      public Sorter createInstance(final String[] args, final int pos) {
        if(args.length==pos){
          return new ExternalParallelSorter();
        } else if(args.length==pos+6){
          final int NUMBER_ELEMENTS_IN_INITIAL_RUNS = Integer.parseInt(args[pos+3]);
          final String detPar = args[pos+1].toUpperCase();
          final boolean isDeterministic =   detPar.compareTo("D")==0         ||
                            detPar.compareTo("DET")==0         ||
                            detPar.compareTo("DETERMINISTIC")==0;
          return new ExternalParallelSorter(Integer.parseInt(args[pos+2]), NUMBER_ELEMENTS_IN_INITIAL_RUNS, Integer.parseInt(args[pos+4]), Long.parseLong(args[pos+5]), SORTTYPE.valueOf(args[pos]).createRuns(NUMBER_ELEMENTS_IN_INITIAL_RUNS), isDeterministic);
        } else {
          return null;
        }
      }

      @Override
      public String getHelpText(final String indent) {
        return "[SORTTYPE (D|F) NUMBER_INITIAL_RUN_GENERATION_THREADS NUMBER_ELEMENTS_IN_INITIAL_RUNS NUMBER_OF_RUNS_TO_JOIN PARAMETER_FOR_SWAPPING]\n" + indent + "SORTTYPE can be "+Arrays.toString(SORTTYPE.values());
      }

      @Override
      public String getExampleText() {
        return "D BAG 8 10000 2 10";
      }

    },
    ASYNCHRONOUS {

      @Override
      public Sorter createInstance(final String[] args, final int pos) {
        if(args.length==pos){
          return new ExternalSorter();
        } else if(args.length==pos+2){
          final int NUMBER_ELEMENTS_IN_INITIAL_RUNS = Integer.parseInt(args[pos+1]);
          return new ExternalSorter(SORTTYPE.valueOf(args[pos]).createRuns(NUMBER_ELEMENTS_IN_INITIAL_RUNS), NUMBER_ELEMENTS_IN_INITIAL_RUNS);
        } else {
          return null;
        }
      }

      @Override
      public String getHelpText(final String indent) {
        return "[SORTTYPE NUMBER_ELEMENTS_IN_INITIAL_RUNS]\n" + indent + "SORTTYPE can be "+Arrays.toString(SORTTYPE.values());
      }

      @Override
      public String getExampleText() {
        return "BAG 10000";
      }

    },
    REPLACEMENTSELECTION{

      @Override
      public Sorter createInstance(final String[] args, final int pos) {
        if(args.length==pos+3){
          return new ReplacementSelectionSorter(args[pos].toUpperCase().compareTo("SET")==0, Integer.parseInt(args[pos+1]), Integer.parseInt(args[pos+2]));
        } else {
          return null;
        }
      }

      @Override
      public String getHelpText(final String indent) {
        return "(SET|BAG) HEIGHT HEIGHT_OF_MERGEHEAP\n" + indent + "HEIGHT is the height of the heap during generating the initial runs\n" + indent + "HEIGHT_OF_MERGEHEAP is the height of the heap used during merging\n" + indent + "The number of elements stored in main memory is (2^(HEIGHT+1))-1 and (2^(HEIGHT_OF_MERGEHEAP))-1 respectively";
      }

      @Override
      public String getExampleText() {
        return "BAG 12 5";
      }

    };

    public abstract Sorter createInstance(final String[] args, final int pos);
    public abstract String getHelpText(final String indent);
    public abstract String getExampleText();
  }

  /**
   * Main method to measure the execution time for different external sorting algorithms.
   * @param args command line arguments
   * @throws Exception in case of any errors
   */
  public static void main(final String[] args) throws Exception{
    System.out.println("Sorting a large collection of Strings or RDF terms of large RDF data...");
    if(args.length<4){
      System.out.println(Sort.getHelpText());
      return;
    }
    SORTER sorter = SORTER.valueOf(args[0]);
    if(sorter==null){
      System.out.println(Sort.getHelpText());
      return;
    }
    Sorter algo = sorter.createInstance(args, 4);
    if(algo==null){
      System.out.println(Sort.getHelpText());
      return;
    }

    final int times = Integer.parseInt(args[3]);

    // just to use for deleting temporary files...
    // final File file = new File("");
    // final String absolutePath = file.getAbsolutePath() + File.separator;

    System.out.println("\nParameters:\n-----------\nMain Strategy:" + sorter.name() + "\n" + algo.parametersToString() + "\n");

    final long[] execution_times = new long[times];
    long total_time = 0;
    for(int t=0; t<times; t++){
      sorter = SORTER.valueOf(args[0]);
      if(sorter==null){
        System.out.println(Sort.getHelpText());
        return;
      }
      algo = sorter.createInstance(args, 4);
      if(algo==null){
        System.out.println(Sort.getHelpText());
        return;
      }

      final Date start = new Date();
      System.out.println("\n"+t+": Start processing:"+start+"\n");

      final Run result = algo.sort(new BufferedInputStream(new FileInputStream(args[1])), args[2]);

      // just access all elements in the bag by iterating one time through
      final Iterator<String> it = result.iterator();
      long i=0;
      while(it.hasNext()){
        it.next();
        i++;
        // System.out.println((++i)+":"+it.next());
      }
      result.release();
      final Date end = new Date();

      System.out.println("\n"+t+": End processing:"+end);
      System.out.println("\nNumber of sorted RDF terms/Strings:"+i);
      System.out.println("Number of runs swapped to disk:" + algo.getNumberOfRunsOnDisk());

      execution_times[t] = end.getTime()-start.getTime();
      total_time += execution_times[t];

      DiskCollection.removeCollectionsFromDisk();
      DBMergeSortedBag.removeBagsFromDisk();
    }

    final long avg = total_time / times;

    System.out.println("\nDuration:   " + QueryEvaluator.toString(execution_times) + " = " + (((double) total_time / times) / 1000) + " seconds\n          = " + new TimeInterval(avg));
    System.out.println("Sample Standard Deviation: " + (QueryEvaluator.computeSampleStandardDeviation(execution_times) / 1000) + " seconds");
    System.out.println("Standard Deviation of the Sample: " + (QueryEvaluator.computeStandardDeviationOfTheSample(execution_times) / 1000) + " seconds");
  }

  public static String getHelpText(){
    String result = "Call Sort in the following way:\n\njava lupos.datastructures.sort.Sort ALGO DATAFILE FORMAT TIMES SORTARGS\n\n";
    result += "ALGO can be one of " + Arrays.toString(SORTER.values()) + "\n";
    result += "DATAFILE contains the file with data (containing strings or RDF data)\n";
    result += "FORMAT can be STRING for a large collection of strings in one file, MULTIPLESTRING for a list of files containing strings to be read, BZIP2STRING and MULTIPLEBZIP2STRING for analogous, but BZIP2 compressed files, or an RDF format like N3\n";
    result += "TIMES is the number of repetitions to calculate an average execution time\n\n";
    result += "ALGO                   | SORTARGS\n";
    result += "--------------------------------------------------------------------------------------------------------------------------------------------------\n";
    for(final SORTER sorter: SORTER.values()){
      result += sorter.name() + spaces(23-sorter.name().length()) + "| " + sorter.getHelpText("                       | ")+"\n";
    }
    result += "\nExamples:\n";
    for(final SORTER sorter: SORTER.values()){
      result +="java -server -XX:+UseParallelGC -XX:+AggressiveOpts -Xms60G -Xmx60G lupos.datastructures.sort.sorter.Sort " + sorter.name() + " SomeFiles.txt MULTIPLEN3 10 " +sorter.getExampleText() + "\n";
    }
    return result;
  }

  private static String spaces(final int number){
    final StringBuilder s=new StringBuilder();
    for(int i=0; i<number; i++){
      s.append(" ");
    }
    return s.toString();
  }
}
TOP

Related Classes of lupos.datastructures.sort.Sort

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.