Package eu.stratosphere.api.java.record.io

Examples of eu.stratosphere.api.java.record.io.CsvInputFormat


    String output = (args.length > 3 ? args[3] : "");
    int numIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 2);

    // data source data point input
    @SuppressWarnings("unchecked")
    FileDataSource pointsSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), dataPointInput, "Data Points");

    // data source for cluster center input
    @SuppressWarnings("unchecked")
    FileDataSource clustersSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), clusterInput, "Centers");
   
    MapOperator dataPoints = MapOperator.builder(new PointBuilder()).name("Build data points").input(pointsSource).build();
   
    MapOperator clusterPoints = MapOperator.builder(new PointBuilder()).name("Build cluster points").input(clustersSource).build();
   
View Full Code Here


    final String dependencySetInput = (args.length > 3 ? args[3] : "");
    final String output = (args.length > 4 ? args[4] : "");
    final int maxIterations = (args.length > 5 ? Integer.parseInt(args[5]) : 1);
   
    // create DataSourceContract for the initalSolutionSet
    FileDataSource initialSolutionSet = new FileDataSource(new CsvInputFormat(' ', LongValue.class, DoubleValue.class), solutionSetInput, "Initial Solution Set");

    // create DataSourceContract for the initalDeltaSet
    FileDataSource initialDeltaSet = new FileDataSource(new CsvInputFormat(' ', LongValue.class, DoubleValue.class), deltasInput, "Initial DeltaSet");
       
    // create DataSourceContract for the edges
    FileDataSource dependencySet = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class, LongValue.class), dependencySetInput, "Dependency Set");
   
    DeltaIteration iteration = new DeltaIteration(0, "Delta PageRank");
    iteration.setInitialSolutionSet(initialSolutionSet);
    iteration.setInitialWorkset(initialDeltaSet);
    iteration.setMaximumNumberOfIterations(maxIterations);
View Full Code Here

    String clusterInput = (args.length > 2 ? args[2] : "");
    String output = (args.length > 3 ? args[3] : "");

    // create DataSourceContract for data point input
    @SuppressWarnings("unchecked")
    FileDataSource pointsSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), dataPointInput, "Data Points");

    // create DataSourceContract for cluster center input
    @SuppressWarnings("unchecked")
    FileDataSource clustersSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), clusterInput, "Centers");
   
    MapOperator dataPoints = MapOperator.builder(new PointBuilder()).name("Build data points").input(pointsSource).build();
   
    MapOperator clusterPoints = MapOperator.builder(new PointBuilder()).name("Build cluster points").input(clustersSource).build();
View Full Code Here

    final String ordersPath    = (args.length > 1 ? args[1] : "");
    final String lineitemsPath = (args.length > 2 ? args[2] : "");
    final String output        = (args.length > 3 ? args[3] : "");

    // create DataSourceContract for Orders input
    FileDataSource orders = new FileDataSource(new CsvInputFormat(), ordersPath, "Orders");
    CsvInputFormat.configureRecordFormat(orders)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)    // order id
      .field(IntValue.class, 7)     // ship prio
      .field(StringValue.class, 2, 2// order status
      .field(StringValue.class, 4, 10// order date
      .field(StringValue.class, 5, 8)// order prio

    // create DataSourceContract for LineItems input
    FileDataSource lineitems = new FileDataSource(new CsvInputFormat(), lineitemsPath, "LineItems");
    CsvInputFormat.configureRecordFormat(lineitems)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)    // order id
      .field(DoubleValue.class, 5)// extended price
View Full Code Here

     * Output Format:
     * 0: URL
     * 1: DOCUMENT_TEXT
     */
    // Create DataSourceContract for documents relation
    @SuppressWarnings("unchecked")
    CsvInputFormat docsFormat = new CsvInputFormat('|', StringValue.class, StringValue.class);
    FileDataSource docs = new FileDataSource(docsFormat, docsInput, "Docs Input");
   
    /*
     * Output Format:
     * 0: URL
     * 1: RANK
     * 2: AVG_DURATION
     */
    // Create DataSourceContract for ranks relation
    FileDataSource ranks = new FileDataSource(new CsvInputFormat(), ranksInput, "Ranks input");
    CsvInputFormat.configureRecordFormat(ranks)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(StringValue.class, 1)
      .field(IntValue.class, 0)
      .field(IntValue.class, 2);

    /*
     * Output Format:
     * 0: URL
     * 1: DATE
     */
    // Create DataSourceContract for visits relation
    @SuppressWarnings("unchecked")
    CsvInputFormat visitsFormat = new CsvInputFormat('|', null, StringValue.class, StringValue.class);
    FileDataSource visits = new FileDataSource(visitsFormat, visitsInput, "Visits input:q");

    // Create MapOperator for filtering the entries from the documents
    // relation
    MapOperator filterDocs = MapOperator.builder(new FilterDocs())
View Full Code Here

      final int numSubtasks     = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
      final String recordsPath = (args.length > 1 ? args[1] : "");
      final String output      = (args.length > 2 ? args[2] : "");
     
      @SuppressWarnings("unchecked")
      FileDataSource source = new FileDataSource(new CsvInputFormat(',', IntValue.class, IntValue.class, IntValue.class), recordsPath);
     
      FileDataSink sink = new FileDataSink(CsvOutputFormat.class, output);
      CsvOutputFormat.configureRecordFormat(sink)
        .recordDelimiter('\n')
        .fieldDelimiter(',')
View Full Code Here

  @SuppressWarnings("unchecked")
  private static Plan getPlanForWorksetConnectedComponentsWithSolutionSetAsFirstInput(
      int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations)
  {
    // data source for initial vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // create DataSourceContract for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // create CrossOperator for distance computation
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
View Full Code Here

    /*
     * Output Schema:
     * 0: CUSTOMER_ID
     */
    // create DataSourceContract for Orders input
    FileDataSource orders = new FileDataSource(new CsvInputFormat(), ordersPath, "Orders");
    orders.setDegreeOfParallelism(numSubtasks);
    CsvInputFormat.configureRecordFormat(orders)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(IntValue.class, 1);
   
    /*
     * Output Schema:
     * 0: CUSTOMER_ID
     * 1: MKT_SEGMENT
     */
    // create DataSourceContract for Customer input
    FileDataSource customers = new FileDataSource(new CsvInputFormat(), customerPath, "Customers");
    customers.setDegreeOfParallelism(numSubtasks);
    CsvInputFormat.configureRecordFormat(customers)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(IntValue.class, 0)
View Full Code Here

 
  @SuppressWarnings("unchecked")
  public static Plan getPlan(int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations) {

    // data source for initial vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // create DataSourceContract for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // create CrossOperator for distance computation
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
View Full Code Here

    String input2Path    = (args.length > 2 ? args[2] : "");
    String output        = (args.length > 3 ? args[3] : "");
    int numSubtasksInput2 = (args.length > 4 ? Integer.parseInt(args[4]) : 1);

    // create DataSourceContract for Orders input
    @SuppressWarnings("unchecked")
    CsvInputFormat format1 = new CsvInputFormat('|', IntValue.class, IntValue.class);
    FileDataSource input1 = new FileDataSource(format1, input1Path, "Input 1");
   
    ReduceOperator aggInput1 = ReduceOperator.builder(DummyReduce.class, IntValue.class, 0)
      .input(input1)
      .name("AggOrders")
      .build();

   
    // create DataSourceContract for Orders input
    @SuppressWarnings("unchecked")
    CsvInputFormat format2 = new CsvInputFormat('|', IntValue.class, IntValue.class);
    FileDataSource input2 = new FileDataSource(format2, input2Path, "Input 2");
    input2.setDegreeOfParallelism(numSubtasksInput2);

    ReduceOperator aggInput2 = ReduceOperator.builder(DummyReduce.class, IntValue.class, 0)
      .input(input2)
View Full Code Here

TOP

Related Classes of eu.stratosphere.api.java.record.io.CsvInputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.