Examples of SequenceFile


Examples of cascading.scheme.hadoop.SequenceFile

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );

    Tap sink = new Hfs( new SequenceFile( Fields.ALL ), getOutputPath( "customerwritable" ), SinkMode.REPLACE );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    pipeLower = new Each( pipeLower, new InsertTestText( new Fields( "group" ), "inserted text as bytes", false ), Fields.ALL );
    pipeLower = new Each( pipeLower, new InsertTestText( new Fields( "value" ), "inserted text as bytes", false ), Fields.ALL );
    pipeLower = new Each( pipeLower, new InsertTestText( new Fields( "text" ), "inserted text as custom text", false ), Fields.ALL );
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );

    // using null pos so all fields are written
    Fields fields = new Fields( "num", "char", "group", "value", "num2", "char2", "group2", "value2" );
    Tap sink = new Hfs( new SequenceFile( fields ), getOutputPath( "/rawbyteskeyvalue/" + useDefaultComparator + "/" + secondarySortOnValue + "/" + ignoreSerializationToken + "/" + compositeGrouping ), SinkMode.REPLACE );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    pipeLower = new Each( pipeLower, new InsertTestText( new Fields( "group" ), "inserted text as bytes", true, 3, 4 ), Fields.ALL );
    pipeLower = new Each( pipeLower, new InsertRawBytes( new Fields( "value" ), "inserted text as bytes", true ), Fields.ALL );
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

    pipe = new GroupBy( pipe, new Fields( "offset" ) );

    pipe = new Every( pipe, new Count(), new Fields( "offset", "count" ) );

    Fields sinkFields = new Fields( "offset", "count" ).applyTypes( Coercions.BIG_DECIMAL, long.class );
    Tap sink = new Hfs( new SequenceFile( sinkFields ), getOutputPath( "bigdecimal" ), SinkMode.REPLACE );

    Map<Object, Object> jobProperties = getProperties();

    TupleSerialization.addSerialization( jobProperties, BigDecimalSerialization.class.getName() );
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

      {
      LOG.warn( "skipped Dfs tests, HDFS is unavailable on current platform" );
      return;
      }

    Tap tap = new Dfs( new SequenceFile( new Fields( "foo" ) ), "some/path" );

    String path = tap.getFullIdentifier( getPlatform().getFlowProcess() );
    assertTrue( "wrong scheme", new Path( path ).toUri().getScheme().equalsIgnoreCase( "hdfs" ) );

    new Dfs( new SequenceFile( new Fields( "foo" ) ), "hdfs://localhost:5001/some/path" );
    new Dfs( new SequenceFile( new Fields( "foo" ) ), new URI( "hdfs://localhost:5001/some/path" ) );

    try
      {
      new Dfs( new SequenceFile( new Fields( "foo" ) ), "s3://localhost:5001/some/path" );
      fail( "not valid url" );
      }
    catch( Exception exception )
      {
      }

    try
      {
      new Dfs( new SequenceFile( new Fields( "foo" ) ), new URI( "s3://localhost:5001/some/path" ) );
      fail( "not valid url" );
      }
    catch( Exception exception )
      {
      }
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

    }

  @Test
  public void testLfs() throws URISyntaxException, IOException
    {
    Tap tap = new Lfs( new SequenceFile( new Fields( "foo" ) ), "some/path" );

    String path = tap.getFullIdentifier( getPlatform().getFlowProcess() );
    assertTrue( "wrong scheme", new Path( path ).toUri().getScheme().equalsIgnoreCase( "file" ) );

    new Lfs( new SequenceFile( new Fields( "foo" ) ), "file:///some/path" );

    try
      {
      new Lfs( new SequenceFile( new Fields( "foo" ) ), "s3://localhost:5001/some/path" );
      fail( "not valid url" );
      }
    catch( Exception exception )
      {
      }
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

   * @param name   of type String
   * @param isNull of type boolean
   */
  public TempHfs( Configuration conf, String name, boolean isNull )
    {
    super( isNull ? new NullScheme() : new SequenceFile()
    {
    } );
    this.name = name;
    this.stringPath = initTemporaryPath( conf, true );
    }
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

import java.io.IOException;

public class StdoutTap extends Lfs {

    public StdoutTap() {
        super(new SequenceFile(Fields.ALL), getTempDir());
    }
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

public class MemorySinkTap extends Lfs {
    private List<Tuple> results;
    private Fields fields;

    public MemorySinkTap(List<Tuple> tuples, Fields fields) {
        super(new SequenceFile(Fields.ALL), getTempDir());
        this.results = tuples;
        this.fields = fields;
    }
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

      JobConf conf = (JobConf) flowProcess.getConfigCopy();

      try {
        LOG.info("HLL counter found " + approxCounter.cardinality() + " distinct keys");

        Hfs tap = new Hfs(new SequenceFile(new Fields("bytes")), BloomProps.getApproxCountsDir(conf));
        TupleEntryCollector out = tap.openForWrite(new HadoopFlowProcess(conf));
        out.add(new Tuple(new BytesWritable(approxCounter.getBytes())));
        out.close();

      } catch (IOException e) {
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

      String partsRoot = BloomProps.getBloomFilterPartsDir(conf);
      maxHashes = BloomProps.getMaxBloomHashes(conf);
      minHashes = BloomProps.getMinBloomHashes(conf);

      for (int i = minHashes; i <= maxHashes; i++) {
        Hfs tap = new Hfs(new SequenceFile(new Fields("split", "filter")), partsRoot + "/" + i + "/");
        numHashesToCollector.put(i, tap.openForWrite(new HadoopFlowProcess(conf)));
      }

    } catch (IOException e) {
      throw new RuntimeException(e);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.