Examples of org.apache.crunch.Pipeline.run()

org.apache.crunch.Pipeline.run()
Constructs and executes a series of MapReduce jobs in order to write data to the output targets.

    String[] expectedFileContents = { "1,2,3,4", "5,6,7,8", "9,10,11", "12,13,14" };


    String vanillaCSVFile = tmpDir.copyResourceFileName("vanilla.csv");
    Pipeline pipeline = new MRPipeline(CSVFileSourceIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> csvLines = pipeline.read(new CSVFileSource(new Path(vanillaCSVFile)));
    pipeline.run();


    Collection<String> csvLinesList = csvLines.asCollection().getValue();


    for (int i = 0; i < expectedFileContents.length; i++) {
      assertTrue(csvLinesList.contains(expectedFileContents[i]));

View Full Code Here

        "\"Champion, Mac\",\"5678 Tatooine Rd. Apt 5, Mobile, AL 36608\",\"30\",\"M\",\"Some other date\",\"short description\"" };


    String csvWithNewlines = tmpDir.copyResourceFileName("withNewlines.csv");
    Pipeline pipeline = new MRPipeline(CSVFileSourceIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> csvLines = pipeline.read(new CSVFileSource(new Path(csvWithNewlines)));
    pipeline.run();


    Collection<String> csvLinesList = csvLines.asCollection().getValue();


    for (int i = 0; i < expectedFileContents.length; i++) {
      assertTrue(csvLinesList.contains(expectedFileContents[i]));

View Full Code Here

    String csvWithNewlines = tmpDir.copyResourceFileName("customQuoteCharWithNewlines.csv");
    Pipeline pipeline = new MRPipeline(CSVFileSourceIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> csvLines = pipeline.read(new CSVFileSource(new Path(csvWithNewlines),
        CSVLineReader.DEFAULT_BUFFER_SIZE, CSVLineReader.DEFAULT_INPUT_FILE_ENCODING, '*', '*',
        CSVLineReader.DEFAULT_ESCAPE_CHARACTER));
    pipeline.run();


    Collection<String> csvLinesList = csvLines.asCollection().getValue();


    for (int i = 0; i < expectedFileContents.length; i++) {
      assertTrue(csvLinesList.contains(expectedFileContents[i]));

View Full Code Here

    String chineseLines = tmpDir.copyResourceFileName("brokenChineseLines.csv");


    Pipeline pipeline = new MRPipeline(CSVFileSourceIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> csvLines = pipeline.read(new CSVFileSource(new Path(chineseLines),
        CSVLineReader.DEFAULT_BUFFER_SIZE, CSVLineReader.DEFAULT_INPUT_FILE_ENCODING, '“', '”', '、'));
    pipeline.run();
    Collection<String> csvLinesList = csvLines.asCollection().getValue();
    for (int i = 0; i < expectedChineseLines.length; i++) {
      assertTrue(csvLinesList.contains(expectedChineseLines[i]));
    }
  }

View Full Code Here

        Avros.records(Person.class)));
    File output1File = tmpDir.getFile("output1");
    File output2File = tmpDir.getFile("output2");
    pipeline.write(genericCollection, new TrevniKeyTarget(output1File.getAbsolutePath()));
    pipeline.write(genericCollection, new TrevniKeyTarget(output2File.getAbsolutePath()));
    pipeline.run();


    Person person = genericCollection.materialize().iterator().next();


    File trv1File = new File(output1File, "part-m-00000-part-0.trv");
    File trv2File = new File(output2File, "part-m-00000-part-0.trv");

View Full Code Here

    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));
    File outputFile = tmpDir.getFile("output");
    Target trevniFile = new TrevniKeyTarget(outputFile.getAbsolutePath());
    pipeline.write(genericCollection, trevniFile);
    pipeline.run();


    Person person = genericCollection.materialize().iterator().next();


    PCollection<Person> retrievedPeople = pipeline.read(new TrevniKeySource<Person>(
        new Path(outputFile.toURI()), Avros.records(Person.class)));

View Full Code Here

    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));
    File outputFile = tmpDir.getFile("output");
    Target trevniFile = new TrevniKeyTarget(outputFile.getAbsolutePath());
    pipeline.write(genericCollection, trevniFile);
    pipeline.run();


    Person person = genericCollection.materialize().iterator().next();


    File trvFile = new File(outputFile, "part-m-00000-part-0.trv");

View Full Code Here


    // The aggregate method groups a collection into a single PObject.
    PObject<Long> totalCount = numberOfWords.aggregate(Aggregators.SUM_LONGS()).first();


    // Execute the pipeline as a MapReduce.
    PipelineResult result = pipeline.run();


    System.out.println("Total number of words: " + totalCount.getValue());
    
    pipeline.done();

View Full Code Here

    PCollection<String> words = split(shakespeare, "\\s+");
    PTable<String,Long> wordCounts = words.count();
    PCollection<KeyValue> wordCountKeyValues = convertToKeyValues(wordCounts);
    pipeline.write(wordCountKeyValues, ToHBase.hfile(outputPath));


    PipelineResult result = pipeline.run();
    assertTrue(result.succeeded());


    FileSystem fs = FileSystem.get(conf);
    KeyValue kv = readFromHFiles(fs, outputPath, "and");
    assertEquals(427L, Bytes.toLong(kv.getValue()));

View Full Code Here

    HFileUtils.writePutsToHFilesForIncrementalLoad(
        wordCountPuts,
        testTable,
        outputPath);


    PipelineResult result = pipeline.run();
    assertTrue(result.succeeded());


    new LoadIncrementalHFiles(HBASE_TEST_UTILITY.getConfiguration())
        .doBulkLoad(outputPath, testTable);

View Full Code Here

0 1 2 3 4

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.