Package eu.stratosphere.api.java.record.operators

Examples of eu.stratosphere.api.java.record.operators.FileDataSink


      .input(findNearestClusterCenters)
      .name("Recompute Center Positions")
      .build();

    // create DataSinkContract for writing the new cluster positions
    FileDataSink newClusterPoints = new FileDataSink(new PointOutFormat(), output, recomputeClusterCenter, "New Center Positions");

    // return the plan
    Plan plan = new Plan(newClusterPoints, "KMeans Iteration");
    plan.setDefaultParallelism(numSubTasks);
    return plan;
View Full Code Here


      ReduceOperator counts = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
        .input(wordsFirstInput, wordsSecondInput)
        .name("Word Counts")
        .build();

      FileDataSink sink = new FileDataSink(CsvOutputFormat.class, outputPath, counts);
      CsvOutputFormat.configureRecordFormat(sink)
        .recordDelimiter('\n')
        .fieldDelimiter(' ')
        .field(StringValue.class, 0)
        .field(IntValue.class, 1);
View Full Code Here

   
    iteration.setNextPartialSolution(rankAggregation);
    iteration.setMaximumNumberOfIterations(numIterations);
    iteration.getAggregators().registerAggregationConvergenceCriterion(DotProductCoGroup.AGGREGATOR_NAME, PageRankStatsAggregator.class, DiffL1NormConvergenceCriterion.class);
   
    FileDataSink out = new FileDataSink(new PageWithRankOutFormat(), outputPath, iteration, "Final Ranks");

    Plan p = new Plan(out, "Dangling PageRank");
    p.setDefaultParallelism(dop);
    return p;
  }
View Full Code Here

      .input(joinLiO)
      .name("AggLio")
      .build();

    // create DataSinkContract for writing the result
    FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, aggLiO, "Output");
    CsvOutputFormat.configureRecordFormat(result)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .lenient(true)
      .field(LongValue.class, 0)
View Full Code Here

      .input2(filterVisits)
      .name("Antijoin DocsVisits")
      .build();

    // Create DataSinkContract for writing the result of the OLAP query
    FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, antiJoinVisits, "Result");
    result.setDegreeOfParallelism(numSubTasks);
    CsvOutputFormat.configureRecordFormat(result)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .lenient(true)
      .field(IntValue.class, 1)
View Full Code Here

        .name("Join Old and New")
        .build();
   
    iteration.setTerminationCriterion(termination);
   
    FileDataSink out = new FileDataSink(new PageWithRankOutFormat(), outputPath, iteration, "Final Ranks");

    Plan p = new Plan(out, "Simple PageRank");
    p.setDefaultParallelism(dop);
    return p;
  }
View Full Code Here

        .name("Compute termination criterion (Map)")
        .build();
   
    iteration.setTerminationCriterion(terminationMapper);

    FileDataSink finalResult = new FileDataSink(CsvOutputFormat.class, output, iteration, "Output");
    CsvOutputFormat.configureRecordFormat(finalResult)
      .recordDelimiter('\n')
      .fieldDelimiter(' ')
      .field(StringValue.class, 0);
View Full Code Here

      final String output      = (args.length > 2 ? args[2] : "");
     
      @SuppressWarnings("unchecked")
      FileDataSource source = new FileDataSource(new CsvInputFormat(',', IntValue.class, IntValue.class, IntValue.class), recordsPath);
     
      FileDataSink sink = new FileDataSink(CsvOutputFormat.class, output);
      CsvOutputFormat.configureRecordFormat(sink)
        .recordDelimiter('\n')
        .fieldDelimiter(',')
        .lenient(true)
        .field(IntValue.class, 0)
        .field(IntValue.class, 1)
        .field(IntValue.class, 2);
     
      sink.setGlobalOrder(
        new Ordering(0, IntValue.class, Order.DESCENDING)
          .appendOrdering(1, IntValue.class, Order.ASCENDING)
          .appendOrdering(2, IntValue.class, Order.DESCENDING),
        new TripleIntDistribution(Order.DESCENDING, Order.ASCENDING, Order.DESCENDING));
      sink.setInput(source);
     
      Plan p = new Plan(sink);
      p.setDefaultParallelism(numSubtasks);
      return p;
    }
View Full Code Here

   
    CrossOperator cross2 = CrossOperator.builder(new DummyCrossStub()).name("Cross2").input1(reduce2).input2(source).build();
   
    ReduceOperator reduce3 = ReduceOperator.builder(new IdentityReduce(), IntValue.class, 0).name("Reduce3").input(cross2).build();
   
    FileDataSink sink = new FileDataSink(new DummyOutputFormat(), OUT_FILE, "Sink");
    sink.setInput(reduce3);
   
    Plan plan = new Plan(sink, "Test Temp Task");
    plan.setDefaultParallelism(DEFAULT_PARALLELISM);
   
    OptimizedPlan oPlan = compileNoStats(plan);
View Full Code Here

        .name("Compute sum (Reduce)")
        .build();
   
    iteration.setNextPartialSolution(sumReduce);

    FileDataSink finalResult = new FileDataSink(CsvOutputFormat.class, output, iteration, "Output");
    CsvOutputFormat.configureRecordFormat(finalResult)
      .recordDelimiter('\n')
      .fieldDelimiter(' ')
      .field(StringValue.class, 0);
View Full Code Here

TOP

Related Classes of eu.stratosphere.api.java.record.operators.FileDataSink

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.