Package cascading.tuple

Examples of cascading.tuple.TupleEntry


         */
        public FetchSetDatum drain() {
            if (!_queue.isEmpty()) {
                return removeFromQueue();
            } else if (safeHasNext()) {
                return new FetchSetDatum(new TupleEntry(_values.next()));
            } else {
                return null;
            }
        }
View Full Code Here


        super.cleanup(flowProcess, operationCall);
    }
   
  @Override
  public void operate(FlowProcess flowProcess, BufferCall<NullContext> bufferCall) {
        TupleEntry group = bufferCall.getGroup();
        String protocolAndDomain = group.getString(0);
        LOGGER.info("Processing tuple group: " + group);

        DiskQueue<GroupedUrlDatum> urls = new DiskQueue<GroupedUrlDatum>(MAX_URLS_IN_MEMORY);
        Iterator<TupleEntry> values = bufferCall.getArgumentsIterator();
        while (values.hasNext()) {
            urls.add(new GroupedUrlDatum(new TupleEntry(values.next())));
        }
       
        try {
            Runnable doRobots = new ProcessRobotsTask(protocolAndDomain, _scorer, urls, _fetcher, _parser, bufferCall.getOutputCollector(), _flowProcess);
            _executor.execute(doRobots);
View Full Code Here

        }

        @SuppressWarnings("rawtypes")
        @Override
        public void operate(FlowProcess process, FunctionCall<NullContext> funcCall) {
            TupleEntry entry = funcCall.getArguments();
            FetchedDatum fd = new FetchedDatum(entry);
           
            // Get the fetch status that we hang on the end of the tuple,
            // after all of the FetchedDatum fields.
            Object result = entry.getObject(_fieldPos);
            StatusDatum status;
           
            // Note: Here we share the payload of the FetchedDatum with the
            // StatusDatum we're about to emit, but since we let go after we
            // emit, there shouldn't be an issue with this sharing.
View Full Code Here

    public UrlDatum() {
        super(FIELDS);
    }

    public UrlDatum(UrlDatum datum) {
        super(new TupleEntry(datum.getTupleEntry()));
    }
View Full Code Here

    @SuppressWarnings("rawtypes")
    @Override
    public void operate(FlowProcess process, BufferCall<NullContext> buffCall) {
        Iterator<TupleEntry> values = buffCall.getArgumentsIterator();
        TupleEntry group = buffCall.getGroup();
       
        _values = values;
        _iteratorDone = false;

        // <key> is the output of the IGroupingKeyGenerator used. This should
        // be <IP address>-<crawl delay in ms>
        String key = group.getString(0);

        if (GroupingKey.isSpecialKey(key)) {
            throw new RuntimeException("Invalid grouping key: " + key);
        }

        long crawlDelay = GroupingKey.getCrawlDelayFromKey(key);
        if (crawlDelay == BaseFetchJobPolicy.UNSET_CRAWL_DELAY) {
            crawlDelay = _policy.getDefaultCrawlDelay();
        }
       
        _policy.startFetchSet(key, crawlDelay);
       
        TupleEntryCollector collector = buffCall.getOutputCollector();

        PartitioningKey newKey = new PartitioningKey(key, _numReduceTasks);
       
        while (safeHasNext()) {
            ScoredUrlDatum scoredDatum = new ScoredUrlDatum(new TupleEntry(values.next()));
            FetchSetInfo setInfo = _policy.nextFetchSet(scoredDatum);
            if (setInfo != null) {
                FetchSetDatum result = makeFetchSetDatum(setInfo, newKey, safeHasNext());
                collector.add(BixoPlatform.clone(result.getTuple(), process));
            }
View Full Code Here

        super(outfields);
    }

    public void operate( FlowProcess flowProcess, FunctionCall functionCall )
    {
        TupleEntry arguments = functionCall.getArguments();
        int key = arguments.getInteger(arguments.getFields().get(0));
        try {
            String doJoinString = (String)flowProcess.getProperty("joining");
            String itemIDString = rowIndex.inverse().get(String.valueOf(key));
            Vector va = ((VectorWritable)arguments.getObject(arguments.getFields().get(1))).get();
            String vaDoc = createOrderedDoc(va, itemIndex);
            Tuple tuple;
            if(doJoinString.equals("true")){
                Vector vb = ((VectorWritable)arguments.getObject(arguments.getFields().get(3))).get();
                String vbDoc = createOrderedDoc(vb, itemIndex);
                tuple = new Tuple(itemIDString, vaDoc, vbDoc);
            } else { // not joining, just converting to CSV
                tuple = new Tuple(itemIDString, vaDoc);
            }
View Full Code Here

  }

  @Override
  public void sink(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall)
      throws IOException {
    TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
    OutputCollector outputCollector = sinkCall.getOutput();
    Tuple key = tupleEntry.selectTuple(keyField);
    byte[] keyBytes = Bytes.toBytes(key.getString(0));
    Put put = new Put(keyBytes);

    for (int i = 0; i < valueFields.length; i++) {
      Fields fieldSelector = valueFields[i];
      TupleEntry values = tupleEntry.selectEntry(fieldSelector);
     
      for (int j = 0; j < values.getFields().size(); j++) {
        Fields fields = values.getFields();
        Tuple tuple = values.getTuple();

        String value = tuple.getString(j);
        byte[] asBytes = value == null ? null : Bytes.toBytes(value);
        put.add(Bytes.toBytes(familyNames[i]), Bytes.toBytes((String) fields.get(j)), asBytes);
      }
View Full Code Here

    }

    @Override
    public void sink( FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall ) throws IOException {
        // it's ok to use NULL here so the collector does not write anything
        TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
        OutputCollector outputCollector = sinkCall.getOutput();
        if( updateBy != null )
        {
            Tuple allValues = tupleEntry.selectTuple( updateValueFields );
            Tuple updateValues = tupleEntry.selectTuple( updateByFields );

            allValues = cleanTuple( allValues );

            TupleRecord key = new TupleRecord( allValues );

            if( updateValues.equals( updateIfTuple ) )
                outputCollector.collect( key, null );
            else
                outputCollector.collect( key, key );

            return;
        }

        Tuple result = tupleEntry.selectTuple( getSinkFields() );

        result = cleanTuple( result );

        outputCollector.collect( new TupleRecord( result ), null );
    }
View Full Code Here

    super( 5, fieldDeclaration );
    }

  public void operate( FlowProcess flowProcess, FunctionCall functionCall )
    {
    TupleEntry argument = functionCall.getArguments();

    String uid1 = argument.getString( 0 );
    int token_count1 = argument.getInteger( 1 );
    String uid2 = argument.getString( 2 );
    int token_count2 = argument.getInteger( 3 );
    int common = argument.getInteger( 4 );

    double similarity = calcSimilarity( token_count1, token_count2, common );

    Tuple result = new Tuple();
    result.add( uid1 );
View Full Code Here

    }

    @Override
    public void sink(FlowProcess<JobConf> process, SinkCall<Object[], OutputCollector> sinkCall)
        throws IOException {
      TupleEntry tuple = sinkCall.getOutgoingEntry();

      Object obj = tuple.getObject(0);
      String key;
      //a hack since byte[] isn't natively handled by hadoop
      if (getStructure() instanceof DefaultPailStructure) {
        key = getCategory(obj);
      } else {
View Full Code Here

TOP

Related Classes of cascading.tuple.TupleEntry

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.