Package org.apache.flink.api.java.record.operators

Examples of org.apache.flink.api.java.record.operators.FileDataSource


  @SuppressWarnings("unchecked")
  private static Plan getPlanForWorksetConnectedComponentsWithSolutionSetAsFirstInput(
      int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations)
  {
    // create DataSourceContract for the vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // create DataSourceContract for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // create CrossOperator for distance computation
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
View Full Code Here


    KMeansBroadcast kmi = new KMeansBroadcast();
    Plan p = kmi.getPlan(String.valueOf(DEFAULT_PARALLELISM), IN_FILE, IN_FILE, OUT_FILE, String.valueOf(20));
   
    // set the statistics
    OperatorResolver cr = getContractResolver(p);
    FileDataSource pointsSource = cr.getNode(DATAPOINTS);
    FileDataSource centersSource = cr.getNode(CENTERS);
    setSourceStatistics(pointsSource, 100l*1024*1024*1024, 32f);
    setSourceStatistics(centersSource, 1024*1024, 32f);
   
    OptimizedPlan plan = compileWithStats(p);
    checkPlan(plan);
View Full Code Here

  @Override
  protected JobGraph getFailingJobGraph() throws Exception {
   
    // init data source
    FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath);

    // init failing map task
    MapOperator testMapper = MapOperator.builder(FailingMapper.class).build();

    // init data sink
View Full Code Here

  @Override
  protected JobGraph getJobGraph() throws Exception {
   
    // init data source
    FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath);

    // init (working) map task
    MapOperator testMapper = MapOperator.builder(TestMapper.class).build();

    // init data sink
View Full Code Here

  }

  @Override
  protected Plan getTestJob() {

    FileDataSource input_left = new FileDataSource(
        new ContractITCaseInputFormat(), leftInPath);
    DelimitedInputFormat.configureDelimitedFormat(input_left)
      .recordDelimiter('\n');
    input_left.setDegreeOfParallelism(config.getInteger("CrossTest#NoSubtasks", 1));

    FileDataSource input_right = new FileDataSource(
        new ContractITCaseInputFormat(), rightInPath);
    DelimitedInputFormat.configureDelimitedFormat(input_right)
      .recordDelimiter('\n');
    input_right.setDegreeOfParallelism(config.getInteger("CrossTest#NoSubtasks", 1));

    CrossOperator testCross = CrossOperator.builder(new TestCross()).build();
    testCross.setDegreeOfParallelism(config.getInteger("CrossTest#NoSubtasks", 1));
    testCross.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY,
        config.getString("CrossTest#LocalStrategy", ""));
View Full Code Here

 
  @SuppressWarnings("unchecked")
  public static Plan getPlan(int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations) {

    // data source for initial vertices
    FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices");
   
    MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build();
   
    DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration");
    iteration.setInitialSolutionSet(verticesWithId);
    iteration.setInitialWorkset(verticesWithId);
    iteration.setMaximumNumberOfIterations(maxIterations);
   
    // create DataSourceContract for the edges
    FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges");

    // create CrossOperator for distance computation
    JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0)
        .input1(iteration.getWorkset())
        .input2(edges)
View Full Code Here

     * 1: DOCUMENT_TEXT
     */
    // Create DataSourceContract for documents relation
    @SuppressWarnings("unchecked")
    CsvInputFormat docsFormat = new CsvInputFormat('|', StringValue.class, StringValue.class);
    FileDataSource docs = new FileDataSource(docsFormat, docsInput, "Docs Input");
   
    /*
     * Output Format:
     * 0: URL
     * 1: RANK
     * 2: AVG_DURATION
     */
    // Create DataSourceContract for ranks relation
    FileDataSource ranks = new FileDataSource(new CsvInputFormat(), ranksInput, "Ranks input");
    CsvInputFormat.configureRecordFormat(ranks)
      .recordDelimiter('\n')
      .fieldDelimiter('|')
      .field(StringValue.class, 1)
      .field(IntValue.class, 0)
      .field(IntValue.class, 2);

    /*
     * Output Format:
     * 0: URL
     * 1: DATE
     */
    // Create DataSourceContract for visits relation
    @SuppressWarnings("unchecked")
    CsvInputFormat visitsFormat = new CsvInputFormat('|', null, StringValue.class, StringValue.class);
    FileDataSource visits = new FileDataSource(visitsFormat, visitsInput, "Visits input:q");

    // Create MapOperator for filtering the entries from the documents
    // relation
    MapOperator filterDocs = MapOperator.builder(new FilterDocs())
      .input(docs)
View Full Code Here

    final String edgeInput = args.length > 1 ? args[1] : "";
    final String output    = args.length > 2 ? args[2] : "";
    final char delimiter   = args.length > 3 ? (char) Integer.parseInt(args[3]) : ',';
   

    FileDataSource edges = new FileDataSource(new EdgeInputFormat(), edgeInput, "Input Edges");
    edges.setParameter(EdgeInputFormat.ID_DELIMITER_CHAR, delimiter);
   
    MapOperator projectEdge = MapOperator.builder(new ProjectEdge())
      .input(edges).name("Project Edge").build();
   
    ReduceOperator edgeCounter = ReduceOperator.builder(new CountEdges(), IntValue.class, 0)
View Full Code Here

    return toParameterList(config1);
  }
 
  static Plan getTestPlanPlan(int numSubTasks, String input, String output) {

    FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input");
   
    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(initialInput);
    iteration.setMaximumNumberOfIterations(NUM_ITERATIONS);
View Full Code Here

    return getTestPlanPlan(DOP, dataPath, resultPath);
  }
 
  private static Plan getTestPlanPlan(int numSubTasks, String input, String output) {

    FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input");
   
    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(initialInput);
    iteration.setMaximumNumberOfIterations(5);
    Assert.assertTrue(iteration.getMaximumNumberOfIterations() > 1);
View Full Code Here

TOP

Related Classes of org.apache.flink.api.java.record.operators.FileDataSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.