Package org.apache.flink.api.java.record.operators

Examples of org.apache.flink.api.java.record.operators.FileDataSource


      numIterations = Integer.parseInt(args[4]);
      numVertices = Long.parseLong(args[5]);
      numDanglingVertices = Long.parseLong(args[6]);
    }
   
    FileDataSource pageWithRankInput = new FileDataSource(new DanglingPageRankInputFormat(),
      pageWithRankInputPath, "DanglingPageWithRankInput");
    pageWithRankInput.getParameters().setLong(DanglingPageRankInputFormat.NUM_VERTICES_PARAMETER, numVertices);
   
    BulkIteration iteration = new BulkIteration("Page Rank Loop");
    iteration.setInput(pageWithRankInput);
   
    FileDataSource adjacencyListInput = new FileDataSource(new ImprovedAdjacencyListInputFormat(),
      adjacencyListInputPath, "AdjancencyListInput");
   
    JoinOperator join = JoinOperator.builder(new DotProductMatch(), LongValue.class, 0, 0)
        .input1(iteration.getPartialSolution())
        .input2(adjacencyListInput)
View Full Code Here


      customersPath = args[3];
      nationsPath = args[4];
      resultPath = args[5];
    }
   
    FileDataSource orders = new FileDataSource(new IntTupleDataInFormat(), ordersPath, "Orders");
    // orders.setOutputContract(UniqueKey.class);
    // orders.getCompilerHints().setAvgNumValuesPerKey(1);

    FileDataSource lineitems = new FileDataSource(new IntTupleDataInFormat(), lineitemsPath, "LineItems");
    // lineitems.getCompilerHints().setAvgNumValuesPerKey(4);

    FileDataSource customers = new FileDataSource(new IntTupleDataInFormat(), customersPath, "Customers");

    FileDataSource nations = new FileDataSource(new IntTupleDataInFormat(), nationsPath, "Nations");


    MapOperator mapO = MapOperator.builder(FilterO.class)
      .name("FilterO")
      .build();
View Full Code Here

    final int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    final String input = (args.length > 1 ? args[1] : "");
    final String output = (args.length > 2 ? args[2] : "");

    // This task will read the input data and generate the key/value pairs
    final FileDataSource source =
        new FileDataSource(new TeraInputFormat(), input, "Data Source");
    source.setDegreeOfParallelism(numSubTasks);

    // This task writes the sorted data back to disk
    final FileDataSink sink =
        new FileDataSink(new TeraOutputFormat(), output, "Data Sink");
    sink.setDegreeOfParallelism(numSubTasks);
View Full Code Here

      outputPath = args[3];
      numIterations = Integer.parseInt(args[4]);
      numVertices = Long.parseLong(args[5]);
    }
   
    FileDataSource pageWithRankInput = new FileDataSource(new DanglingPageRankInputFormat(),
      pageWithRankInputPath, "PageWithRank Input");
    pageWithRankInput.getParameters().setLong(NUM_VERTICES_CONFIG_PARAM, numVertices);
   
    BulkIteration iteration = new BulkIteration("Page Rank Loop");
    iteration.setInput(pageWithRankInput);
   
    FileDataSource adjacencyListInput = new FileDataSource(new ImprovedAdjacencyListInputFormat(),
      adjacencyListInputPath, "AdjancencyListInput");
   
    JoinOperator join = JoinOperator.builder(new JoinVerexWithEdgesMatch(), LongValue.class, 0, 0)
        .input1(iteration.getPartialSolution())
        .input2(adjacencyListInput)
View Full Code Here

    // parse job parameters
    int numSubTasks   = args.length > 0 ? Integer.parseInt(args[0]) : 1;
    String edgeInput = args.length > 1 ? args[1] : "";
    String output    = args.length > 2 ? args[2] : "";

    FileDataSource edges = new FileDataSource(new EdgeWithDegreesInputFormat(), edgeInput, "Input Edges with Degrees");
    edges.setParameter(EdgeWithDegreesInputFormat.VERTEX_DELIMITER_CHAR, '|');
    edges.setParameter(EdgeWithDegreesInputFormat.DEGREE_DELIMITER_CHAR, ',');

    // =========================== Triangle Enumeration ============================
   
    MapOperator toLowerDegreeEdge = MapOperator.builder(new ProjectToLowerDegreeVertex())
        .input(edges)
View Full Code Here

    KMeansSingleStep kmi = new KMeansSingleStep();
    Plan p = kmi.getPlan(String.valueOf(DEFAULT_PARALLELISM), IN_FILE, IN_FILE, OUT_FILE, String.valueOf(20));
   
    // set the statistics
    OperatorResolver cr = getContractResolver(p);
    FileDataSource pointsSource = cr.getNode(DATAPOINTS);
    FileDataSource centersSource = cr.getNode(CENTERS);
    setSourceStatistics(pointsSource, 100l*1024*1024*1024, 32f);
    setSourceStatistics(centersSource, 1024*1024, 32f);
   
    OptimizedPlan plan = compileWithStats(p);
    checkPlan(plan);
View Full Code Here

      WordCount wc = new WordCount();
      Plan p = wc.getPlan(DEFAULT_PARALLELISM_STRING, IN_FILE, OUT_FILE);
     
      OptimizedPlan plan;
      if (estimates) {
        FileDataSource source = getContractResolver(p).getNode("Input Lines");
        setSourceStatistics(source, 1024*1024*1024*1024L, 24f);
        plan = compileWithStats(p);
      } else {
        plan = compileNoStats(p);
      }
View Full Code Here

    checkWordCountWithSortedSink(false);
  }
 
  private void checkWordCountWithSortedSink(boolean estimates) {
    try {
      FileDataSource sourceNode = new FileDataSource(new TextInputFormat(), IN_FILE, "Input Lines");
      MapOperator mapNode = MapOperator.builder(new TokenizeLine())
        .input(sourceNode)
        .name("Tokenize Lines")
        .build();
      ReduceOperator reduceNode = ReduceOperator.builder(new CountWords(), StringValue.class, 0)
View Full Code Here

      boolean hashJoinFirstOkay, boolean hashJoinSecondOkay, boolean mergeJoinOkay)
  {
    try {
      // set statistics
      OperatorResolver cr = getContractResolver(p);
      FileDataSource ordersSource = cr.getNode(ORDERS);
      FileDataSource lineItemSource = cr.getNode(LINEITEM);
      MapOperator mapper = cr.getNode(MAPPER_NAME);
      JoinOperator joiner = cr.getNode(JOIN_NAME);
      setSourceStatistics(ordersSource, orderSize, 100f);
      setSourceStatistics(lineItemSource, lineitemSize, 140f);
      mapper.getCompilerHints().setAvgOutputRecordSize(16f);
View Full Code Here

  //                      /
  //    Sc3(id,y) --------
  @Override
  protected Plan getTestJob() {
    // Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id
    FileDataSource sc1 = new FileDataSource(new CsvInputFormat(), sc1Path);
    CsvInputFormat.configureRecordFormat(sc1).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1)
        .field(IntValue.class, 2).field(IntValue.class, 3);

    // Sc2 generates N x values to be evaluated with the polynomial identified by id
    FileDataSource sc2 = new FileDataSource(new CsvInputFormat(), sc2Path);
    CsvInputFormat.configureRecordFormat(sc2).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1);

    // Sc3 generates N y values to be evaluated with the polynomial identified by id
    FileDataSource sc3 = new FileDataSource(new CsvInputFormat(), sc3Path);
    CsvInputFormat.configureRecordFormat(sc3).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1);

    // Jn1 matches x and y values on id and emits (id, x, y) triples
    JoinOperator jn1 = JoinOperator.builder(Jn1.class, StringValue.class, 0, 0).input1(sc2).input2(sc3).build();
View Full Code Here

TOP

Related Classes of org.apache.flink.api.java.record.operators.FileDataSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.