Examples of org.apache.flink.api.java.record.operators.FileDataSource

Package org.apache.flink.api.java.record.operators

Examples of org.apache.flink.api.java.record.operators.FileDataSource

org.apache.flink.api.java.record.operators.FileDataSource
s.apache.org/jira/browse/FLINK-1106">FLINK-1106 for more details. Operator for input nodes which read data from files. (For Record data model)


  }


  @Override
  protected Plan getTestJob() {
    FileDataSource input_left =  new FileDataSource(new CoGroupTestInFormat(), leftInPath);
    DelimitedInputFormat.configureDelimitedFormat(input_left)
      .recordDelimiter('\n');
    input_left.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1));


    FileDataSource input_right =  new FileDataSource(new CoGroupTestInFormat(), rightInPath);
    DelimitedInputFormat.configureDelimitedFormat(input_right)
      .recordDelimiter('\n');
    input_right.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1));


    CoGroupOperator testCoGrouper = CoGroupOperator.builder(new TestCoGrouper(), StringValue.class, 0, 0)
      .build();
    testCoGrouper.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1));
    testCoGrouper.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY,

View Full Code Here

     *                   +-------------+
     * </pre>
     */
    public Plan getPlan(String inputPath, String outputPath, int numSubtasks) {


      FileDataSource source = new FileDataSource(TextInputFormat.class, inputPath, "First Input");


      MapOperator wordsFirstInput = MapOperator.builder(TokenizeLine.class)
        .input(source)
        .name("Words (First Input)")
        .build();

View Full Code Here

    String dataPointInput = (args.length > 1 ? args[1] : "");
    String clusterInput = (args.length > 2 ? args[2] : "");
    String output = (args.length > 3 ? args[3] : "");


    // create DataSourceContract for data point input
    @SuppressWarnings("unchecked")
    FileDataSource pointsSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), dataPointInput, "Data Points");


    // create DataSourceContract for cluster center input
    @SuppressWarnings("unchecked")
    FileDataSource clustersSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), clusterInput, "Centers");
    
    MapOperator dataPoints = MapOperator.builder(new PointBuilder()).name("Build data points").input(pointsSource).build();
    
    MapOperator clusterPoints = MapOperator.builder(new PointBuilder()).name("Build cluster points").input(clustersSource).build();

View Full Code Here

    }else
    {
      setArgs(args);
    }
    
    FileDataSource orders = 
      new FileDataSource(new IntTupleDataInFormat(), this.ordersInputPath, "Orders");
    orders.setDegreeOfParallelism(this.degreeOfParallelism);
    //orders.setOutputContract(UniqueKey.class);
    
    FileDataSource lineItems =
      new FileDataSource(new IntTupleDataInFormat(), this.lineItemInputPath, "LineItems");
    lineItems.setDegreeOfParallelism(this.degreeOfParallelism);
    
    FileDataSink result = 
        new FileDataSink(new StringTupleDataOutFormat(), this.outputPath, "Output");
    result.setDegreeOfParallelism(degreeOfParallelism);

View Full Code Here

  protected JobGraph getJobGraph() throws Exception {
    
    String path1 = config.getBoolean("input1PathHasData", false) ? textInput : emptyInput;
    String path2 = config.getBoolean("input2PathHasData", false) ? textInput : emptyInput;
    
    FileDataSource input1 = new FileDataSource(new ContractITCaseInputFormat(), path1);
    FileDataSource input2 = new FileDataSource(new ContractITCaseInputFormat(), path2);
    
    MapOperator testMapper1 = MapOperator.builder(new TestMapper()).build();
    MapOperator testMapper2 = MapOperator.builder(new TestMapper()).build();


    FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultDir);

View Full Code Here

      // parse program parameters
      final int numSubtasks     = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
      final String recordsPath = (args.length > 1 ? args[1] : "");
      final String output      = (args.length > 2 ? args[2] : "");


      @SuppressWarnings("unchecked")
      FileDataSource source = new FileDataSource(new CsvInputFormat(',', IntValue.class, IntValue.class, IntValue.class), recordsPath);


      FileDataSink sink = new FileDataSink(CsvOutputFormat.class, output);
      CsvOutputFormat.configureRecordFormat(sink)
        .recordDelimiter('\n')
        .fieldDelimiter(',')

View Full Code Here

    }
  }


  static Plan getTestPlan(int numSubTasks, String input, String output) {


    FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input");
    initialInput.setDegreeOfParallelism(1);


    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(initialInput);
    iteration.setMaximumNumberOfIterations(2);

View Full Code Here

    final String edgeInput = (args.length > 2 ? args[2] : "");
    final String output = (args.length > 3 ? args[3] : "");
    final int maxIterations = (args.length > 4 ? Integer.parseInt(args[4]) : 1);


    // data source for initial vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
    
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
    
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
    
    // create DataSourceContract for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");


    // create CrossOperator for distance computation
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)

View Full Code Here

    /*
     * Output Schema:
     * 0: CUSTOMER_ID
     */
    // create DataSourceContract for Orders input
    FileDataSource orders = new FileDataSource(new CsvInputFormat(), ordersPath, "Orders");
    orders.setDegreeOfParallelism(numSubtasks);
    CsvInputFormat.configureRecordFormat(orders)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(IntValue.class, 1);
    
    /*
     * Output Schema:
     * 0: CUSTOMER_ID
     * 1: MKT_SEGMENT
     */
    // create DataSourceContract for Customer input
    FileDataSource customers = new FileDataSource(new CsvInputFormat(), customerPath, "Customers");
    customers.setDegreeOfParallelism(numSubtasks);
    CsvInputFormat.configureRecordFormat(customers)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(IntValue.class, 0)
      .field(StringValue.class, 6);

View Full Code Here


  }


  @Override
  protected Plan getTestJob() {
    FileDataSource input_left = new FileDataSource(
        new ContractITCaseInputFormat(), leftInPath);
    DelimitedInputFormat.configureDelimitedFormat(input_left)
      .recordDelimiter('\n');
    input_left.setDegreeOfParallelism(config.getInteger("MatchTest#NoSubtasks", 1));


    FileDataSource input_right = new FileDataSource(
        new ContractITCaseInputFormat(), rightInPath);
    DelimitedInputFormat.configureDelimitedFormat(input_right)
      .recordDelimiter('\n');
    input_right.setDegreeOfParallelism(config.getInteger("MatchTest#NoSubtasks", 1));


    JoinOperator testMatcher = JoinOperator.builder(new TestMatcher(), StringValue.class, 0, 0)
      .build();
    testMatcher.setDegreeOfParallelism(config.getInteger("MatchTest#NoSubtasks", 1));
    testMatcher.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY,

View Full Code Here

0 1 2 3 4 5 6 7 8

TOP

Related Classes of org.apache.flink.api.java.record.operators.FileDataSource

org.apache.flink.compiler.AdditionalOperatorsTest

org.apache.flink.compiler.BranchingPlansCompilerTest

org.apache.flink.compiler.DOPChangeTest

org.apache.flink.compiler.GroupOrderTest

org.apache.flink.compiler.HardPlansCompilationTest

org.apache.flink.compiler.ReduceAllTest

org.apache.flink.compiler.UnionPropertyPropagationTest

org.apache.flink.compiler.WorksetIterationsRecordApiCompilerTest

org.apache.flink.test.accumulators.AccumulatorIterativeITCase

org.apache.flink.test.broadcastvars.BroadcastBranchingITCase

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.