Package org.apache.flink.api.java.record.io

Examples of org.apache.flink.api.java.record.io.CsvInputFormat


  // Job vertex builder methods
  // -------------------------------------------------------------------------------------------------------------

  @SuppressWarnings("unchecked")
  private static InputFormatVertex createPointsInput(JobGraph jobGraph, String pointsPath, int numSubTasks, TypeSerializerFactory<?> serializer) {
    CsvInputFormat pointsInFormat = new CsvInputFormat(' ', LongValue.class, LongValue.class, LongValue.class, LongValue.class);
    InputFormatVertex pointsInput = JobGraphUtils.createInput(pointsInFormat, pointsPath, "Input[Points]", jobGraph, numSubTasks);

    {
      TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration());
      taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
View Full Code Here


    return pointsInput;
  }

  @SuppressWarnings("unchecked")
  private static InputFormatVertex createModelsInput(JobGraph jobGraph, String pointsPath, int numSubTasks, TypeSerializerFactory<?> serializer) {
    CsvInputFormat modelsInFormat = new CsvInputFormat(' ', LongValue.class, LongValue.class, LongValue.class, LongValue.class);
    InputFormatVertex modelsInput = JobGraphUtils.createInput(modelsInFormat, pointsPath, "Input[Models]", jobGraph, numSubTasks);

    {
      TaskConfig taskConfig = new TaskConfig(modelsInput.getConfiguration());
      taskConfig.addOutputShipStrategy(ShipStrategyType.BROADCAST);
View Full Code Here

  // -------------------------------------------------------------------------------------------------------------
  // Job vertex builder methods
  // -------------------------------------------------------------------------------------------------------------

  private static InputFormatVertex createPointsInput(JobGraph jobGraph, String pointsPath, int numSubTasks, TypeSerializerFactory<?> serializer) {
    @SuppressWarnings("unchecked")
    CsvInputFormat pointsInFormat = new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class);
    InputFormatVertex pointsInput = JobGraphUtils.createInput(pointsInFormat, pointsPath, "[Points]", jobGraph, numSubTasks);
    {
      TaskConfig taskConfig = new TaskConfig(pointsInput.getConfiguration());
      taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
      taskConfig.setOutputSerializer(serializer);
View Full Code Here

    return pointsInput;
  }

  private static InputFormatVertex createCentersInput(JobGraph jobGraph, String centersPath, int numSubTasks, TypeSerializerFactory<?> serializer) {
    @SuppressWarnings("unchecked")
    CsvInputFormat modelsInFormat = new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class);
    InputFormatVertex modelsInput = JobGraphUtils.createInput(modelsInFormat, centersPath, "[Models]", jobGraph, numSubTasks);

    {
      TaskConfig taskConfig = new TaskConfig(modelsInput.getConfiguration());
      taskConfig.addOutputShipStrategy(ShipStrategyType.FORWARD);
View Full Code Here

    String input2Path    = (args.length > 2 ? args[2] : "");
    String output        = (args.length > 3 ? args[3] : "");
    int numSubtasksInput2 = (args.length > 4 ? Integer.parseInt(args[4]) : 1);

    // create DataSourceContract for Orders input
    @SuppressWarnings("unchecked")
    CsvInputFormat format1 = new CsvInputFormat('|', IntValue.class, IntValue.class);
    FileDataSource input1 = new FileDataSource(format1, input1Path, "Input 1");
   
    ReduceOperator aggInput1 = ReduceOperator.builder(DummyReduce.class, IntValue.class, 0)
      .input(input1)
      .name("AggOrders")
      .build();

   
    // create DataSourceContract for Orders input
    @SuppressWarnings("unchecked")
    CsvInputFormat format2 = new CsvInputFormat('|', IntValue.class, IntValue.class);
    FileDataSource input2 = new FileDataSource(format2, input2Path, "Input 2");
    input2.setDegreeOfParallelism(numSubtasksInput2);

    ReduceOperator aggInput2 = ReduceOperator.builder(DummyReduce.class, IntValue.class, 0)
      .input(input2)
View Full Code Here

    final String dependencySetInput = (args.length > 3 ? args[3] : "");
    final String output = (args.length > 4 ? args[4] : "");
    final int maxIterations = (args.length > 5 ? Integer.parseInt(args[5]) : 1);
   
    // create DataSourceContract for the initalSolutionSet
    FileDataSource initialSolutionSet = new FileDataSource(new CsvInputFormat(' ', LongValue.class, DoubleValue.class), solutionSetInput, "Initial Solution Set");

    // create DataSourceContract for the initalDeltaSet
    FileDataSource initialDeltaSet = new FileDataSource(new CsvInputFormat(' ', LongValue.class, DoubleValue.class), deltasInput, "Initial DeltaSet");
       
    // create DataSourceContract for the edges
    FileDataSource dependencySet = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class, LongValue.class), dependencySetInput, "Dependency Set");
   
    DeltaIteration iteration = new DeltaIteration(0, "Delta PageRank");
    iteration.setInitialSolutionSet(initialSolutionSet);
    iteration.setInitialWorkset(initialDeltaSet);
    iteration.setMaximumNumberOfIterations(maxIterations);
View Full Code Here

   
    String lineitemsPath = (args.length > 5 ? args[5] : "");
    String output        = (args.length > 6 ? args[6] : "");

    // create DataSourceContract for Orders input
    FileDataSource orders1 = new FileDataSource(new CsvInputFormat(), orders1Path, "Orders 1");
    CsvInputFormat.configureRecordFormat(orders1)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)    // order id
      .field(IntValue.class, 7)     // ship prio
      .field(StringValue.class, 2, 2// order status
      .field(StringValue.class, 4, 10// order date
      .field(StringValue.class, 5, 8)// order prio
   
    FileDataSource orders2 = new FileDataSource(new CsvInputFormat(), orders2Path, "Orders 2");
    CsvInputFormat.configureRecordFormat(orders2)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)    // order id
      .field(IntValue.class, 7)     // ship prio
      .field(StringValue.class, 2, 2// order status
      .field(StringValue.class, 4, 10// order date
      .field(StringValue.class, 5, 8)// order prio
   
    // create DataSourceContract for LineItems input
    FileDataSource lineitems = new FileDataSource(new CsvInputFormat(), lineitemsPath, "LineItems");
    CsvInputFormat.configureRecordFormat(lineitems)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)
      .field(DoubleValue.class, 5);

    // create MapOperator for filtering Orders tuples
    MapOperator filterO1 = MapOperator.builder(new FilterO())
      .name("FilterO")
      .input(orders1)
      .build();
    // filter configuration
    filterO1.setParameter(TPCHQuery3.YEAR_FILTER, 1993);
    filterO1.setParameter(TPCHQuery3.PRIO_FILTER, "5");
    filterO1.getCompilerHints().setFilterFactor(0.05f);
   
    // create MapOperator for filtering Orders tuples
    MapOperator filterO2 = MapOperator.builder(new FilterO())
      .name("FilterO")
      .input(orders2)
      .build();
    // filter configuration
    filterO2.setParameter(TPCHQuery3.YEAR_FILTER, 1993);
    filterO2.setParameter(TPCHQuery3.PRIO_FILTER, "5");

    // create JoinOperator for joining Orders and LineItems
    @SuppressWarnings("unchecked")
    JoinOperator joinLiO = JoinOperator.builder(new JoinLiO(), LongValue.class, 0, 0)
      .input1(filterO2, filterO1)
      .input2(lineitems)
      .name("JoinLiO")
      .build();
   
    FileDataSource partJoin1 = new FileDataSource(new CsvInputFormat(), partJoin1Path, "Part Join 1");
    CsvInputFormat.configureRecordFormat(partJoin1)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)
      .field(IntValue.class, 1)
      .field(DoubleValue.class, 2);
   
    FileDataSource partJoin2 = new FileDataSource(new CsvInputFormat(), partJoin2Path, "Part Join 2");
    CsvInputFormat.configureRecordFormat(partJoin2)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(LongValue.class, 0)
      .field(IntValue.class, 1)
View Full Code Here

    // parse job parameters
    int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    String dataInput = (args.length > 1 ? args[1] : "");
    String output = (args.length > 2 ? args[2] : "");

    @SuppressWarnings("unchecked")
    CsvInputFormat format = new CsvInputFormat(' ', IntValue.class, IntValue.class);
    FileDataSource input = new FileDataSource(format, dataInput, "Input");
   
    // create the reduce contract and sets the key to the first field
    ReduceOperator sorter = ReduceOperator.builder(new IdentityReducer(), IntValue.class, 0)
      .input(input)
View Full Code Here

  //                      /
  //    Sc3(id,y) --------
  @Override
  protected Plan getTestJob() {
    // Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id
    FileDataSource sc1 = new FileDataSource(new CsvInputFormat(), sc1Path);
    CsvInputFormat.configureRecordFormat(sc1).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1)
        .field(IntValue.class, 2).field(IntValue.class, 3);

    // Sc2 generates N x values to be evaluated with the polynomial identified by id
    FileDataSource sc2 = new FileDataSource(new CsvInputFormat(), sc2Path);
    CsvInputFormat.configureRecordFormat(sc2).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1);

    // Sc3 generates N y values to be evaluated with the polynomial identified by id
    FileDataSource sc3 = new FileDataSource(new CsvInputFormat(), sc3Path);
    CsvInputFormat.configureRecordFormat(sc3).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1);

    // Jn1 matches x and y values on id and emits (id, x, y) triples
    JoinOperator jn1 = JoinOperator.builder(Jn1.class, StringValue.class, 0, 0).input1(sc2).input2(sc3).build();
View Full Code Here

  @SuppressWarnings("unchecked")
  private static Plan getPlanForWorksetConnectedComponentsWithSolutionSetAsFirstInput(
      int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations)
  {
    // create DataSourceContract for the vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // create DataSourceContract for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // create CrossOperator for distance computation
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
View Full Code Here

TOP

Related Classes of org.apache.flink.api.java.record.io.CsvInputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.