Examples of org.apache.crunch.Source

org.apache.crunch.Source
A {@code Source} represents an input data set that is an input to one or moreMapReduce jobs.

          // Add this to the set of output assignments
          for (Target t : outputs.get(pcollect)) {
            assignments.put(t, e.getValue());
          }
        } else {
          Source source = e.getKey().getSource();
          if (source != null && source instanceof Target) {
            JobPrototype current = e.getValue();
            Collection<JobPrototype> parentJobPrototypes = previousStages.get((Target) source);
            if (parentJobPrototypes != null) {
              for (JobPrototype parentJobProto : parentJobPrototypes) {

View Full Code Here

    Set<Target> deps = Sets.newHashSet(callable.getAllTargets().values());
    for (PCollection pc : callable.getAllPCollections().values()) {
      PCollectionImpl pcImpl = (PCollectionImpl) pc;
      deps.addAll(pcImpl.getTargetDependencies());
      MaterializableIterable iter = (MaterializableIterable) pc.materialize();
      Source pcSrc = iter.getSource();
      if (pcSrc instanceof Target) {
        deps.add((Target) pcSrc);
      }
    }
    return deps;

View Full Code Here

          // Add this to the set of output assignments
          for (Target t : outputs.get(pcollect)) {
            assignments.put(t, e.getValue());
          }
        } else {
          Source source = e.getKey().getSource();
          if (source != null && source instanceof Target) {
            JobPrototype current = e.getValue();
            Collection<JobPrototype> parentJobPrototypes = previousStages.get((Target) source);
            if (parentJobPrototypes != null) {
              for (JobPrototype parentJobProto : parentJobPrototypes) {

View Full Code Here

          // (i.e. we should do the same as above for avro files).
          throw new IllegalArgumentException(
              "--input-file-reader-schema must be specified when using --input-file-format=avroParquet");
        }
        Schema schema = opts.inputFileReaderSchema;
        Source source = new AvroParquetFileSource(filePaths, Avros.generics(schema), opts.inputFileProjectionSchema);
        return pipeline.read(source);
      } else if (opts.inputFileFormat.isAssignableFrom(TextInputFormat.class)) {
        Source source = From.textFile(filePaths);
        return pipeline.read(source);
      } else {
        // FIXME drop support for this stuff? (doesn't seem to work with spark)
        // TODO: intentionally restrict to only allow org.apache.hadoop.mapreduce.lib.input.TextInputFormat ?
        TableSource source = new FileTableSourceImpl(

View Full Code Here

TOP

Related Classes of org.apache.crunch.Source

org.apache.crunch.impl.dist.DistributedPipeline

org.apache.crunch.impl.mr.plan.MSCRPlanner

org.apache.solr.crunch.CrunchIndexerTool

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.