Package org.apache.crunch

Examples of org.apache.crunch.Pipeline.run()


    String[] expectedFileContents = { "1,2,3,4", "5,6,7,8", "9,10,11", "12,13,14" };

    String vanillaCSVFile = tmpDir.copyResourceFileName("vanilla.csv");
    Pipeline pipeline = new MRPipeline(CSVFileSourceIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> csvLines = pipeline.read(new CSVFileSource(new Path(vanillaCSVFile)));
    pipeline.run();

    Collection<String> csvLinesList = csvLines.asCollection().getValue();

    for (int i = 0; i < expectedFileContents.length; i++) {
      assertTrue(csvLinesList.contains(expectedFileContents[i]));
View Full Code Here


        "\"Champion, Mac\",\"5678 Tatooine Rd. Apt 5, Mobile, AL 36608\",\"30\",\"M\",\"Some other date\",\"short description\"" };

    String csvWithNewlines = tmpDir.copyResourceFileName("withNewlines.csv");
    Pipeline pipeline = new MRPipeline(CSVFileSourceIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> csvLines = pipeline.read(new CSVFileSource(new Path(csvWithNewlines)));
    pipeline.run();

    Collection<String> csvLinesList = csvLines.asCollection().getValue();

    for (int i = 0; i < expectedFileContents.length; i++) {
      assertTrue(csvLinesList.contains(expectedFileContents[i]));
View Full Code Here

    String csvWithNewlines = tmpDir.copyResourceFileName("customQuoteCharWithNewlines.csv");
    Pipeline pipeline = new MRPipeline(CSVFileSourceIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> csvLines = pipeline.read(new CSVFileSource(new Path(csvWithNewlines),
        CSVLineReader.DEFAULT_BUFFER_SIZE, CSVLineReader.DEFAULT_INPUT_FILE_ENCODING, '*', '*',
        CSVLineReader.DEFAULT_ESCAPE_CHARACTER));
    pipeline.run();

    Collection<String> csvLinesList = csvLines.asCollection().getValue();

    for (int i = 0; i < expectedFileContents.length; i++) {
      assertTrue(csvLinesList.contains(expectedFileContents[i]));
View Full Code Here

    String chineseLines = tmpDir.copyResourceFileName("brokenChineseLines.csv");

    Pipeline pipeline = new MRPipeline(CSVFileSourceIT.class, tmpDir.getDefaultConfiguration());
    PCollection<String> csvLines = pipeline.read(new CSVFileSource(new Path(chineseLines),
        CSVLineReader.DEFAULT_BUFFER_SIZE, CSVLineReader.DEFAULT_INPUT_FILE_ENCODING, '“', '”', '、'));
    pipeline.run();
    Collection<String> csvLinesList = csvLines.asCollection().getValue();
    for (int i = 0; i < expectedChineseLines.length; i++) {
      assertTrue(csvLinesList.contains(expectedChineseLines[i]));
    }
  }
View Full Code Here

        Avros.records(Person.class)));
    File output1File = tmpDir.getFile("output1");
    File output2File = tmpDir.getFile("output2");
    pipeline.write(genericCollection, new TrevniKeyTarget(output1File.getAbsolutePath()));
    pipeline.write(genericCollection, new TrevniKeyTarget(output2File.getAbsolutePath()));
    pipeline.run();

    Person person = genericCollection.materialize().iterator().next();

    File trv1File = new File(output1File, "part-m-00000-part-0.trv");
    File trv2File = new File(output2File, "part-m-00000-part-0.trv");
View Full Code Here

    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));
    File outputFile = tmpDir.getFile("output");
    Target trevniFile = new TrevniKeyTarget(outputFile.getAbsolutePath());
    pipeline.write(genericCollection, trevniFile);
    pipeline.run();

    Person person = genericCollection.materialize().iterator().next();

    PCollection<Person> retrievedPeople = pipeline.read(new TrevniKeySource<Person>(
        new Path(outputFile.toURI()), Avros.records(Person.class)));
View Full Code Here

    PCollection<Person> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(),
        Avros.records(Person.class)));
    File outputFile = tmpDir.getFile("output");
    Target trevniFile = new TrevniKeyTarget(outputFile.getAbsolutePath());
    pipeline.write(genericCollection, trevniFile);
    pipeline.run();

    Person person = genericCollection.materialize().iterator().next();

    File trvFile = new File(outputFile, "part-m-00000-part-0.trv");
View Full Code Here

    // The aggregate method groups a collection into a single PObject.
    PObject<Long> totalCount = numberOfWords.aggregate(Aggregators.SUM_LONGS()).first();

    // Execute the pipeline as a MapReduce.
    PipelineResult result = pipeline.run();

    System.out.println("Total number of words: " + totalCount.getValue());
   
    pipeline.done();
View Full Code Here

    PCollection<String> words = split(shakespeare, "\\s+");
    PTable<String,Long> wordCounts = words.count();
    PCollection<KeyValue> wordCountKeyValues = convertToKeyValues(wordCounts);
    pipeline.write(wordCountKeyValues, ToHBase.hfile(outputPath));

    PipelineResult result = pipeline.run();
    assertTrue(result.succeeded());

    FileSystem fs = FileSystem.get(conf);
    KeyValue kv = readFromHFiles(fs, outputPath, "and");
    assertEquals(427L, Bytes.toLong(kv.getValue()));
View Full Code Here

    HFileUtils.writePutsToHFilesForIncrementalLoad(
        wordCountPuts,
        testTable,
        outputPath);

    PipelineResult result = pipeline.run();
    assertTrue(result.succeeded());

    new LoadIncrementalHFiles(HBASE_TEST_UTILITY.getConfiguration())
        .doBulkLoad(outputPath, testTable);
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.