Examples of cascading.scheme.hadoop.SequenceFile

cascading.scheme.hadoop.SequenceFile
A SequenceFile is a type of {@link cascading.scheme.Scheme}, which is a flat file consisting of binary key/value pairs. This is a space and time efficient means to store data.

    }


  @Test
  public void testLfs() throws URISyntaxException, IOException
    {
    Tap tap = new Lfs( new SequenceFile( new Fields( "foo" ) ), "some/path" );


    String path = tap.getFullIdentifier( getPlatform().getFlowProcess() );
    assertTrue( "wrong scheme", new Path( path ).toUri().getScheme().equalsIgnoreCase( "file" ) );


    new Lfs( new SequenceFile( new Fields( "foo" ) ), "file:///some/path" );


    try
      {
      new Lfs( new SequenceFile( new Fields( "foo" ) ), "s3://localhost:5001/some/path" );
      fail( "not valid url" );
      }
    catch( Exception exception )
      {
      }

View Full Code Here

   * @param name   of type String
   * @param isNull of type boolean
   */
  public TempHfs( Configuration conf, String name, boolean isNull )
    {
    super( isNull ? new NullScheme() : new SequenceFile()
    {
    } );
    this.name = name;
    this.stringPath = initTemporaryPath( conf, true );
    }

View Full Code Here

import java.io.IOException;


public class StdoutTap extends Lfs {


    public StdoutTap() {
        super(new SequenceFile(Fields.ALL), getTempDir());
    }

View Full Code Here

public class MemorySinkTap extends Lfs {
    private List<Tuple> results;
    private Fields fields;


    public MemorySinkTap(List<Tuple> tuples, Fields fields) {
        super(new SequenceFile(Fields.ALL), getTempDir());
        this.results = tuples;
        this.fields = fields;
    }

View Full Code Here

      JobConf conf = (JobConf) flowProcess.getConfigCopy();


      try {
        LOG.info("HLL counter found " + approxCounter.cardinality() + " distinct keys");


        Hfs tap = new Hfs(new SequenceFile(new Fields("bytes")), BloomProps.getApproxCountsDir(conf));
        TupleEntryCollector out = tap.openForWrite(new HadoopFlowProcess(conf));
        out.add(new Tuple(new BytesWritable(approxCounter.getBytes())));
        out.close();


      } catch (IOException e) {

View Full Code Here

      String partsRoot = BloomProps.getBloomFilterPartsDir(conf);
      maxHashes = BloomProps.getMaxBloomHashes(conf);
      minHashes = BloomProps.getMinBloomHashes(conf);


      for (int i = minHashes; i <= maxHashes; i++) {
        Hfs tap = new Hfs(new SequenceFile(new Fields("split", "filter")), partsRoot + "/" + i + "/");
        numHashesToCollector.put(i, tap.openForWrite(new HadoopFlowProcess(conf)));
      }


    } catch (IOException e) {
      throw new RuntimeException(e);

View Full Code Here


  private static BloomFilter mergeBloomParts(String tapPath, long numBloomBits, long splitSize, int numBloomHashes, long numElems) throws IOException {
    FixedSizeBitSet bitSet = new FixedSizeBitSet(numBloomBits);


    if (FileSystemHelper.getFS().exists(new Path(tapPath))) {
      Hfs tap = new Hfs(new SequenceFile(new Fields("split", "filter")), tapPath);
      TupleEntryIterator itr = tap.openForRead(CascadingUtil.get().getFlowProcess());
      while (itr.hasNext()) {
        TupleEntry cur = itr.next();
        long split = cur.getLong(0);
        FixedSizeBitSet curSet = new FixedSizeBitSet(splitSize, ((BytesWritable) cur.getObject(1)).getBytes());

View Full Code Here

  private static long getApproxDistinctKeysCount(JobConf conf, String partsDir) throws IOException, CardinalityMergeException {
    if (!FileSystemHelper.getFS().exists(new Path(partsDir))) {
      return 0;
    }


    Hfs approxCountsTap = new Hfs(new SequenceFile(new Fields("bytes")), partsDir);


    TupleEntryIterator in = approxCountsTap.openForRead(CascadingUtil.get().getFlowProcess());
    List<HyperLogLog> countParts = new LinkedList<HyperLogLog>();


    long totalSum = 0;

View Full Code Here

    Map<String, Tap> sources = new HashMap<String, Tap>();
    sources.put("source1", ExampleFixtures.SOURCE_TAP_1);
    sources.put("source2", ExampleFixtures.SOURCE_TAP_2);


    String outputDir = args[0];
    Hfs sink = new Hfs(new SequenceFile(new Fields("field1", "field2", "field3", "field4")), outputDir);


    Pipe source1 = new Pipe("source1");
    Pipe source2 = new Pipe("source2");


    Pipe joined = new BloomJoin(source1, new Fields("field1"), source2, new Fields("field3"));

View Full Code Here

      System.out.println("Usage: hadoop jar cascading_ext.job.jar com.liveramp.cascading_ext.example.SimpleFlowExample <output dir>");
      return;
    }


    String outputDir = args[0];
    Hfs sink = new Hfs(new SequenceFile(new Fields("field1", "field2", "field3", "field4")), outputDir);


    Pipe source1 = new Pipe("source1");


    Pipe source2 = new Pipe("source2");

View Full Code Here

0 1 2 3

TOP

Related Classes of cascading.scheme.hadoop.SequenceFile

cascading.flow.hadoop.BuildJobsHadoopPlatformTest

cascading.tap.hadoop.HadoopTapPlatformTest

cascading.tap.hadoop.util.TempHfs

cascading.tuple.hadoop.SerializedPipesPlatformTest

cascading.tuple.Tuple

cascading.tuple.TupleEntry

com.liveramp.cascading_ext.assembly.BloomAssemblyTestCase

com.liveramp.cascading_ext.assembly.CreateBloomFilter$CollectKeyStats

com.liveramp.cascading_ext.assembly.TestBloomFilter

com.liveramp.cascading_ext.assembly.TestBloomJoin

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.