Package com.datastax.tutorial.composite

Source Code of com.datastax.tutorial.composite.CompositeDataLoader

package com.datastax.tutorial.composite;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import com.datastax.tutorial.TutorialBase;
import me.prettyprint.cassandra.serializers.CompositeSerializer;
import me.prettyprint.cassandra.serializers.StringSerializer;
import me.prettyprint.hector.api.beans.Composite;
import me.prettyprint.hector.api.beans.HColumn;
import me.prettyprint.hector.api.factory.HFactory;
import me.prettyprint.hector.api.mutation.Mutator;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Multi-threaded bulk loader for geo data located in the data directory of the
* project root.
*
* Execute this class by invoking the following at the project root:
* mvn -e exec:java -Dexec.mainClass="com.datastax.tutorial.composite.CompositeDataLoader"
* @author zznate
*/
public class CompositeDataLoader extends TutorialBase {
  private static Logger log = LoggerFactory.getLogger(CompositeDataLoader.class);

  private static ExecutorService exec;
  // key for static composite, First row of dynamic composite
  public static final String COMPOSITE_KEY = "ALL";
  public static final String COUNTRY_STATE_CITY_CF = "CountryStateCity";

  public static void main(String[] args) {
    long startTime = System.currentTimeMillis();
    init();
    String fileLocation = properties.getProperty("composites.geodata.file.location","data/geodata.txt");
    BufferedReader reader;
    exec = Executors.newFixedThreadPool(5);
    try {
      reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileLocation)));
      // read 1000 and hand off to worker

      List<String> lines = new ArrayList<String>(1000);
      String line = reader.readLine();

      List<Future<Integer>> sums = new ArrayList<Future<Integer>>();
      while(line != null) {

        lines.add(line);
        if ( lines.size() % 250 == 0 ) {
          doParse(lines, sums);
        }
        line = reader.readLine();
      }
      doParse(lines, sums);

      int total = 0;
      for (Future<Integer> future : sums) {
        // naive wait for completion
        total = total + future.get().intValue();
      }

      log.info("Inserted a total of {} over duration ms: {}", total, System.currentTimeMillis() - startTime);
    } catch (Exception e) {
      log.error("Could not locate file",e);
    } finally {
      exec.shutdown();
    }
    tutorialCluster.getConnectionManager().shutdown();
  }

  private static void doParse(List<String> lines, List<Future<Integer>> sums) {
    Future<Integer> f = exec.submit(new CompositeDataLoader().new LineParser(new ArrayList(lines)));
    sums.add(f);
    lines.clear();
  }


  class LineParser implements Callable<Integer> {

    List<String> lines;
    LineParser(List<String> lines) {
      this.lines = lines;
    }

    public Integer call() throws Exception {
      int count = 0;
      GeoDataLine geoDataLine;
      Mutator<String> mutator = HFactory.createMutator(tutorialKeyspace, StringSerializer.get());

      for (String row : lines) {
        // parse
        geoDataLine = new GeoDataLine(row);
        // assemble the insertions
        mutator.addInsertion(COMPOSITE_KEY, COUNTRY_STATE_CITY_CF, geoDataLine.staticColumnFrom());
       
        count++;
      }
      mutator.execute();
      log.debug("Inserted {} columns", count);
      return Integer.valueOf(count);
    }

  }

  /**
   * This is probably overkill given the simplicity of the data, but is
   * good practice for separation of concerns and encapsulation
   */
  static class GeoDataLine {
    public static final char SEPARATOR_CHAR = ',';
    private String[] vals = new String[10];

    GeoDataLine(String line) {
      vals = StringUtils.split(StringEscapeUtils.unescapeCsv(line), SEPARATOR_CHAR);
      log.debug("array size: {} for row: {}", vals.length, line);
    }

    /**
     * Creates an HColumn with a column name composite of the form:
     *   ['country_code']:['state]:['city name'])
     * and a value of ['timezone']
     * @return
     */
    HColumn<Composite,String> staticColumnFrom() {

      Composite composite = new Composite();
      composite.addComponent(getCountryCode(), StringSerializer.get());
      composite.addComponent(getAdmin1Code(), StringSerializer.get());
      // extra un-escape to handle the case of "Washington, D.C."
      composite.addComponent(StringEscapeUtils.unescapeCsv(getAsciiName()), StringSerializer.get());
      HColumn<Composite,String> col =
        HFactory.createColumn(composite, getTimezone(), new CompositeSerializer(), StringSerializer.get());
      return col;
    }


    String getCountryCode() {
      return vals[0];
    }
    String getAdmin1Code() {
      return vals[1];
    }
    String getAsciiName() {
      return vals[2];
    }
    String getTimezone() {
      return vals[3];
    }
  }
}
TOP

Related Classes of com.datastax.tutorial.composite.CompositeDataLoader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.