Examples of SequenceFile

cascading.scheme.SequenceFile
cascading.scheme.hadoop.SequenceFile
A SequenceFile is a type of {@link cascading.scheme.Scheme}, which is a flat file consisting of binary key/value pairs. This is a space and time efficient means to store data.

Examples of cascading.scheme.hadoop.SequenceFile


  private static BloomFilter mergeBloomParts(String tapPath, long numBloomBits, long splitSize, int numBloomHashes, long numElems) throws IOException {
    FixedSizeBitSet bitSet = new FixedSizeBitSet(numBloomBits);


    if (FileSystemHelper.getFS().exists(new Path(tapPath))) {
      Hfs tap = new Hfs(new SequenceFile(new Fields("split", "filter")), tapPath);
      TupleEntryIterator itr = tap.openForRead(CascadingUtil.get().getFlowProcess());
      while (itr.hasNext()) {
        TupleEntry cur = itr.next();
        long split = cur.getLong(0);
        FixedSizeBitSet curSet = new FixedSizeBitSet(splitSize, ((BytesWritable) cur.getObject(1)).getBytes());

View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

  private static long getApproxDistinctKeysCount(JobConf conf, String partsDir) throws IOException, CardinalityMergeException {
    if (!FileSystemHelper.getFS().exists(new Path(partsDir))) {
      return 0;
    }


    Hfs approxCountsTap = new Hfs(new SequenceFile(new Fields("bytes")), partsDir);


    TupleEntryIterator in = approxCountsTap.openForRead(CascadingUtil.get().getFlowProcess());
    List<HyperLogLog> countParts = new LinkedList<HyperLogLog>();


    long totalSum = 0;

View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

    Map<String, Tap> sources = new HashMap<String, Tap>();
    sources.put("source1", ExampleFixtures.SOURCE_TAP_1);
    sources.put("source2", ExampleFixtures.SOURCE_TAP_2);


    String outputDir = args[0];
    Hfs sink = new Hfs(new SequenceFile(new Fields("field1", "field2", "field3", "field4")), outputDir);


    Pipe source1 = new Pipe("source1");
    Pipe source2 = new Pipe("source2");


    Pipe joined = new BloomJoin(source1, new Fields("field1"), source2, new Fields("field3"));

View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

      System.out.println("Usage: hadoop jar cascading_ext.job.jar com.liveramp.cascading_ext.example.SimpleFlowExample <output dir>");
      return;
    }


    String outputDir = args[0];
    Hfs sink = new Hfs(new SequenceFile(new Fields("field1", "field2", "field3", "field4")), outputDir);


    Pipe source1 = new Pipe("source1");


    Pipe source2 = new Pipe("source2");

View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

      System.out.println("Usage: hadoop jar cascading_ext.job.jar com.liveramp.cascading_ext.example.BloomJoinExampleWithoutCascadingUtil <output dir>");
      return;
    }


    String outputDir = args[0];
    Hfs sink = new Hfs(new SequenceFile(new Fields("field1", "field2", "field3", "field4")), outputDir);


    Pipe source1 = new Pipe("source1");


    Pipe source2 = new Pipe("source2");

View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

      System.out.println("Usage: hadoop jar cascading_ext.job.jar com.liveramp.cascading_ext.example.FlowWithCustomCascadingUtil <output dir>");
      return;
    }


    String outputDir = args[0];
    Hfs sink = new Hfs(new SequenceFile(new Fields("field1", "field2", "field3", "field4")), outputDir);


    Pipe source1 = new Pipe("source1");


    Pipe source2 = new Pipe("source2");

View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

  private Tap<JobConf, ?, ?> inputRhs;


  @Before
  public void setUp() throws IOException {


    inputLhs = new Hfs(new SequenceFile(new Fields("a", "b")), getTestRoot() + "/lhs");
    inputRhs = new Hfs(new SequenceFile(new Fields("c", "d")), getTestRoot() + "/rhs");


    TapHelper.writeToTap(inputLhs,
        new Tuple("1", "A"),
        new Tuple("1", "B"),
        new Tuple("1", "C"),

View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

        new Tuple("2", "G"));
  }


  @Test
  public void checkLimits() throws IOException {
    Tap<JobConf, ?, ?> output = new Hfs(new SequenceFile(new Fields("a", "b", "d")), getTestRoot() + "/output");


    Pipe lhs = new Pipe("lhs");


    Pipe rhs = new Pipe("rhs");

View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

      new Fields("in"));


    Pipe pipe = new Pipe("batch-pipe");
    pipe = new Each(pipe, new AnnotateWithSizeOfCurrentBatch(4));


    Tap<JobConf, RecordReader, OutputCollector> dst = new Lfs(new SequenceFile(OUT_FIELD), getTestRoot()+"/out");


    Flow f = CascadingUtil.get().getFlowConnector().connect(src, dst, pipe);
    f.complete();


    TupleEntryIterator tupleEntryIterator = dst.openForRead(CascadingUtil.get().getFlowProcess());

View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

  private final String OUTPUT_PATH = getTestRoot() + "/output";


  @Test
  public void testSimpleCombiner() throws IOException {


    Hfs source = new Hfs(new SequenceFile(new Fields("key", "value")), INPUT_PATH);
    TupleEntryCollector tc = source.openForWrite(CascadingUtil.get().getFlowProcess());
    tc.add(new Tuple("k0", 1));
    tc.add(new Tuple("k0", 2));
    tc.add(new Tuple("k1", 1));
    tc.add(new Tuple("k1", -3));
    tc.add(new Tuple("k1", 10));
    tc.close();


    Tap sink = new Hfs(new SequenceFile(new Fields("key", "sum")), OUTPUT_PATH);


    Pipe pipe = new Pipe("pipe");
    pipe = Combiner.assembly(pipe, new SimpleAggregator(), new Fields("key"), new Fields("value"), new Fields("sum"));


    CascadingUtil.get().getFlowConnector().connect(source, sink, pipe).complete();

View Full Code Here

0 1 2 3 4

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.