Examples of SequenceFile


Examples of cascading.scheme.SequenceFile

      input.add(t);
    }
    input.close();

    // Create flow to read from local file and insert into HBase.
    Tap source = new Hfs(new SequenceFile(inputFields), mHelper.manageTemporaryPath("input"));

    Pipe pipe = new Pipe("values");
    Fields keyFields = new Fields("num");
    Fields valueFields = new Fields("lower", "upper");
    Tap hBaseTap = new HBaseTap("testTable",
View Full Code Here

Examples of cascading.scheme.SequenceFile

    File inputFile = new File(inputPath);
    if (inputFile.exists()) {
      throw new CascadingException("Input file " + inputPath + " already exists.");
    }
    Tap inputTap = new Hfs(new SequenceFile(fields), inputPath, SinkMode.REPLACE);
    TupleEntryCollector collector = inputTap.openForWrite(getJobConf());
    return collector;
  }
View Full Code Here

Examples of cascading.scheme.SequenceFile

   */
  public Flow runFlow(Pipe[] tails, Map<String, Fields> inputs, Fields[] outputs,
                      String[] outputPaths) {
    Map<String, Tap> sources = new HashMap<String, Tap>();
    for (Map.Entry<String, Fields> input : inputs.entrySet()) {
      Tap tap = new Hfs(new SequenceFile(input.getValue()),
                        manageTemporaryPath(input.getKey()));
      sources.put(input.getKey(), tap);
    }

    if (tails.length != outputs.length) {
      System.err.println("size of tails should be same as outputs.");
      return null;
    }

    int i = 0;
    Map<String, Tap> sinks = new HashMap<String, Tap>();
    for (Fields output : outputs) {
      String sinkName = tails[i].getName();
      Tap sink = new Hfs(new SequenceFile(output), outputPaths[i]);
      sinks.put(sinkName, sink);
      i++;
    }

    Flow f = mFlowConnector.connect(sources, sinks, tails);
View Full Code Here

Examples of cascading.scheme.SequenceFile

    // a predefined pipe assembly that returns fields named "url" and "page"
    Pipe importPipe = new ImportCrawlDataAssembly( "import pipe" );

    // create the tap instances
    Tap localPagesSource = new Lfs( new TextLine(), inputPath );
    Tap importedPages = new Hfs( new SequenceFile( new Fields( "url", "page" ) ), pagesPath );

    // connect the pipe assembly to the tap instances
    Flow importPagesFlow = flowConnector.connect( "import pages", localPagesSource, importedPages, importPipe );

    // a predefined pipe assembly that splits the stream into two named "url pipe" and "word pipe"
    // these pipes could be retrieved via the getTails() method and added to new pipe instances
    SubAssembly wordCountPipe = new WordCountSplitAssembly( "wordcount pipe", "url pipe", "word pipe" );

    // create Hadoop sequence files to store the results of the counts
    Tap sinkUrl = new Hfs( new SequenceFile( new Fields( "url", "word", "count" ) ), urlsPath );
    Tap sinkWord = new Hfs( new SequenceFile( new Fields( "word", "count" ) ), wordsPath );

    // convenience method to bind multiple pipes and taps
    Map<String, Tap> sinks = Cascades.tapsMap( new String[]{"url pipe", "word pipe"}, Tap.taps( sinkUrl, sinkWord ) );

    // wordCountPipe will be recognized as an assembly and handled appropriately
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

        return new Hfs(scheme, path, SinkMode.REPLACE);
    }
   
    @Override
    protected Tap<?, ?, ?> makeSourceTap(Fields fields, String path) {
        return new Hfs(new SequenceFile(fields), path, SinkMode.REPLACE);
    }
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

  @Test
  public void testMerge2()
    {
    Tap source1 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo/merge1" );
    Tap source2 = new Hfs( new SequenceFile( new Fields( "offset", "line" ) ), "foo/merge2" );

    Tap sink = new Hfs( new TextLine(), "foo" );

    Pipe left = new Each( new Pipe( "left" ), new Fields( "line" ), new RegexFilter( ".*46.*" ) );
    Pipe right = new Each( new Pipe( "right" ), new Fields( "line" ), new RegexFilter( ".*192.*" ) );
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

    Map sources = new HashMap();

    sources.put( "lower", sourceLower );
    sources.put( "upper", sourceUpper );

    Scheme leftScheme = testTempReplaced ? new SequenceFile( new Fields( "num", "lower", "num2", "upper" ) ) : new TextLine( new Fields( "offset", "line" ), new Fields( "lower" ) );
    Tap sinkLeft = new Hfs( leftScheme, "/splitmiddle/left", SinkMode.REPLACE );

    Scheme rightScheme = testTempReplaced ? new SequenceFile( new Fields( "lower" ) ) : new TextLine( new Fields( "offset", "line" ), new Fields( "lower" ) );
    Tap sinkRight = new Hfs( rightScheme, "/splitmiddle/right", SinkMode.REPLACE );

    Map sinks = new HashMap();

    sinks.put( "left", sinkLeft );
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

    pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) );

    pipe = new Each( pipe, new InsertBoolean( new Fields( "boolean" ), false ), Fields.ALL );

    Tap sink = new Hfs( new SequenceFile( Fields.ALL ), getOutputPath( "serialization" ), SinkMode.REPLACE );

    Map<Object, Object> jobProperties = getProperties();

    TupleSerialization.addSerializationToken( jobProperties, 1000, BooleanWritable.class.getName() );
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

    bytes.setComparator( "bytes", new BytesComparator() );
    pipe = new GroupBy( pipe, bytes );

    pipe = new Every( pipe, new Count(), new Fields( "bytes", "count" ) );

    Tap sink = new Hfs( new SequenceFile( Fields.ALL ), getOutputPath( "grouponbytes" ), SinkMode.REPLACE );

    Map<Object, Object> properties = getProperties();

    TupleSerializationProps.addSerialization( properties, BytesSerialization.class.getName() );
View Full Code Here

Examples of cascading.scheme.hadoop.SequenceFile

    sources.put( "upper", sourceUpper );

    Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " );

    // using null pos so all fields are written
    Tap sink = new Hfs( new SequenceFile( Fields.ALL ), getOutputPath( "writablekeyvalue" ), SinkMode.REPLACE );

    Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter );
    pipeLower = new Each( pipeLower, new InsertBytes( new Fields( "group" ), "inserted text as bytes" ), Fields.ALL );
    pipeLower = new Each( pipeLower, new InsertBytes( new Fields( "value" ), "inserted text as bytes" ), Fields.ALL );
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.