Package cascading.tuple

Examples of cascading.tuple.TupleEntry


                entry.setObject(field, object);
            }
        }

        static Tuple coerceToString(SinkCall<?, ?> sinkCall) {
            TupleEntry entry = sinkCall.getOutgoingEntry();
            Fields fields = entry.getFields();
            Tuple tuple = entry.getTuple();

            if (fields.hasTypes()) {
                Type types[] = new Type[fields.size()];
                for (int index = 0; index < fields.size(); index++) {
                    Type type = fields.getType(index);
                    if (type instanceof CoercibleType<?>) {
                        types[index] = String.class;
                    }
                    else {
                        types[index] = type;
                    }
                }

                tuple = entry.getCoercedTuple(types);
            }
            return tuple;
        }
View Full Code Here


        if (!sourceCall.getInput().next(context[0], context[1])) {
            return false;
        }

        TupleEntry entry = sourceCall.getIncomingEntry();
        Map data = (Map) context[1];
        FieldAlias alias = (FieldAlias) context[2];

        if (entry.getFields().isDefined()) {
            // lookup using writables
            Text lookupKey = new Text();
            // TODO: it's worth benchmarking whether using an index/offset yields significantly better performance
            for (Comparable<?> field : entry.getFields()) {
                if (IS_ES_10) {
                    // check for multi-level alias
                    Object result = data;
                    for (String level : StringUtils.tokenize(alias.toES(field.toString()), ".")) {
                        lookupKey.set(level);
                        result = ((Map) result).get(lookupKey);
                        if (result == null) {
                            break;
                        }
                    }
                    CascadingUtils.setObject(entry, field, result);
                }
                else {
                    lookupKey.set(alias.toES(field.toString()));
                    CascadingUtils.setObject(entry, field, data.get(lookupKey));
                }
            }
        }
        else {
            // no definition means no coercion
            List<Object> elements = Tuple.elements(entry.getTuple());
            elements.clear();
            elements.addAll(data.values());
        }

        return true;
View Full Code Here

        if (!query.hasNext()) {
            return false;
        }

        TupleEntry entry = sourceCall.getIncomingEntry();
        Map<String, ?> data = (Map<String, ?>) query.next()[1];
        FieldAlias alias = (FieldAlias) sourceCall.getContext()[0];

        if (entry.getFields().isDefined()) {
            // lookup using writables
            // TODO: it's worth benchmarking whether using an index/offset yields significantly better performance
            for (Comparable<?> field : entry.getFields()) {
                if (IS_ES_10) {
                    Object result = data;
                    // check for multi-level alias
                    for (String level : StringUtils.tokenize(alias.toES(field.toString()), ".")) {
                        result = ((Map) result).get(level);
                        if (result == null) {
                            break;
                        }
                    }
                    entry.setObject(field, result);
                }
                else {
                    //NB: coercion should be applied automatically by the TupleEntry
                    entry.setObject(field, data.get(alias.toES(field.toString())));
                }
            }
        }
        else {
            // no definition means no coercion
            List<Object> elements = Tuple.elements(entry.getTuple());
            elements.clear();
            elements.addAll(data.values());
        }
        return true;
    }
View Full Code Here

  }

  @Override
  public void sink(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall)
      throws IOException {
    TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
    OutputCollector outputCollector = sinkCall.getOutput();
    Tuple key = tupleEntry.selectTuple(keyField);
    ImmutableBytesWritable keyBytes = (ImmutableBytesWritable) key.getObject(0);
    Put put = new Put(keyBytes.get());

    for (int i = 0; i < valueFields.length; i++) {
      Fields fieldSelector = valueFields[i];
      TupleEntry values = tupleEntry.selectEntry(fieldSelector);

      for (int j = 0; j < values.getFields().size(); j++) {
        Fields fields = values.getFields();
        Tuple tuple = values.getTuple();

        ImmutableBytesWritable valueBytes = (ImmutableBytesWritable) tuple.getObject(j);
        put.add(Bytes.toBytes(familyNames[i]), Bytes.toBytes((String) fields.get(j)), valueBytes.get());
      }
    }
View Full Code Here

    }

    @Override
    public void sink( FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall ) throws IOException {
        // it's ok to use NULL here so the collector does not write anything
        TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
        OutputCollector outputCollector = sinkCall.getOutput();
        if( updateBy != null )
        {
            Tuple allValues = tupleEntry.selectTuple( updateValueFields );
            Tuple updateValues = tupleEntry.selectTuple( updateByFields );

            allValues = cleanTuple( allValues );

            TupleRecord key = new TupleRecord( allValues );

            if( updateValues.equals( updateIfTuple ) )
                outputCollector.collect( key, null );
            else
                outputCollector.collect( key, key );

            return;
        }

        Tuple result = tupleEntry.selectTuple( getSinkFields() );

        result = cleanTuple( result );

        outputCollector.collect( new TupleRecord( result ), null );
    }
View Full Code Here

        LOGGER.info("Analyzing: " +  CrawlConfig.STATUS_SUBDIR_NAME);
        UrlStatus[] statusValues = UrlStatus.values();
        int[] statusCounts = new int[statusValues.length];
        int totalEntries = 0;
        while (iter.hasNext()) {
            TupleEntry entry = iter.next();
            totalEntries += 1;
   
            String statusLine = entry.getString("line");
            String[] pieces = statusLine.split("\t");
            int pos = StatusDatum.FIELDS.getPos(StatusDatum.STATUS_FN);
            UrlStatus status = UrlStatus.valueOf(pieces[pos]);
            statusCounts[status.ordinal()] += 1;
        }
View Full Code Here

        int unfetchedUrls = 0;
        LOGGER.info("Analyzing: " +  CrawlConfig.CRAWLDB_SUBDIR_NAME);

        while (iter.hasNext()) {
            TupleEntry entry = iter.next();
            totalEntries += 1;
           
            CrawlDbDatum datum = new CrawlDbDatum(entry);
            if (exportDb) {
                LOGGER.info(datum.toString());
View Full Code Here

    public MboxSplitterFunction() {
        super(FetchedDatum.FIELDS);
    }

    public void operate(FlowProcess flowProcess, FunctionCall<NullContext> functionCall) {
        TupleEntry arguments = functionCall.getArguments();
        FetchedDatum fetchedDatum = new FetchedDatum(arguments.getTuple());
       
        // Now, if the FetchedDatum mime-type is application/mbox, we want to split it into N FetchedDatum
        // tuples, one per mail message.
        if (fetchedDatum.getContentType().equals(MBOX_MIME_TYPE)) {
          splitIntoEmails(fetchedDatum, functionCall.getOutputCollector());
View Full Code Here

          outputCollector.add(makeNewTupleEntry(fetchedDatum, email));
        }
  }
 
  private TupleEntry makeNewTupleEntry(FetchedDatum fetchedDatum, StringBuilder email) {
      FetchedDatum newDatum = new FetchedDatum(new TupleEntry(fetchedDatum.getTupleEntry()));
      newDatum.setContent(new ContentBytes(safeGetAsciiBytes(email.toString())));
      return newDatum.getTupleEntry();
  }
View Full Code Here

        Tap testSink = platform.makeTap(platform.makeBinaryScheme(UrlDatum.FIELDS), resultsPath);
        TupleEntryIterator reader = testSink.openForRead(platform.makeFlowProcess());
        int count = 0;
        long latest = 0;
        while (reader.hasNext()) {
            TupleEntry next = reader.next();
            UrlDatum datum = new UrlDatum(next);
            latest = (Long) datum.getPayloadValue(CrawlDbDatum.LAST_FETCHED_FIELD);
            count++;
        }
       
View Full Code Here

TOP

Related Classes of cascading.tuple.TupleEntry

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.