Package org.apache.crunch.types

Examples of org.apache.crunch.types.PTypeFamily


        }));
    return new FirstElementPObject<S>(minCollect);
  }

  public static <K, V> PTable<K, Collection<V>> collectValues(PTable<K, V> collect) {
    PTypeFamily tf = collect.getTypeFamily();
    final PType<V> valueType = collect.getValueType();
    return collect.groupByKey().parallelDo("collect",
        new MapValuesFn<K, Iterable<V>, Collection<V>>() {

          @Override
          public void initialize() {
            valueType.initialize(getConfiguration());
          }

          public Collection<V> map(Iterable<V> values) {
            List<V> collected = Lists.newArrayList();
            for (V value : values) {
              collected.add(valueType.getDetachedValue(value));
            }
            return collected;
          }
        }, tf.tableOf(collect.getKeyType(), tf.collections(collect.getValueType())));
  }
View Full Code Here


   * Co-groups the two {@link PTable} arguments.
   *
   * @return a {@code PTable} representing the co-grouped tables.
   */
  public static <K, U, V> PTable<K, Pair<Collection<U>, Collection<V>>> cogroup(PTable<K, U> left, PTable<K, V> right) {
    PTypeFamily ptf = left.getTypeFamily();
    PType<K> keyType = left.getPTableType().getKeyType();
    PType<U> leftType = left.getPTableType().getValueType();
    PType<V> rightType = right.getPTableType().getValueType();
    PType<Pair<U, V>> itype = ptf.pairs(leftType, rightType);

    PTable<K, Pair<U, V>> cgLeft = left.parallelDo("coGroupTag1", new CogroupFn1<K, U, V>(),
        ptf.tableOf(keyType, itype));
    PTable<K, Pair<U, V>> cgRight = right.parallelDo("coGroupTag2", new CogroupFn2<K, U, V>(),
        ptf.tableOf(keyType, itype));

    PTable<K, Pair<U, V>> both = cgLeft.union(cgRight);

    PType<Pair<Collection<U>, Collection<V>>> otype = ptf.pairs(ptf.collections(leftType), ptf.collections(rightType));
    return both.groupByKey().parallelDo("cogroup",
        new PostGroupFn<K, U, V>(leftType, rightType), ptf.tableOf(keyType, otype));
  }
View Full Code Here

      init();
    }
   
    private void init() {
      List<PType> pt = ptype.getSubTypes();
      PTypeFamily ptf = ptype.getFamily();
      if (cols.length == 1) {
        byFn = new SingleKeyFn(cols[0]);
        keyPType = pt.get(cols[0]);
      } else {
        TupleFactory tf = null;
        switch (cols.length) {
        case 2:
          tf = TupleFactory.PAIR;
          keyPType = ptf.pairs(pt.get(cols[0]), pt.get(cols[1]));
          break;
        case 3:
          tf = TupleFactory.TUPLE3;
          keyPType = ptf.triples(pt.get(cols[0]), pt.get(cols[1]), pt.get(cols[2]));
          break;
        case 4:
          tf = TupleFactory.TUPLE4;
          keyPType = ptf.quads(pt.get(cols[0]), pt.get(cols[1]), pt.get(cols[2]), pt.get(cols[3]));
          break;
        default:
          PType[] pts = new PType[cols.length];
          for (int i = 0; i < pts.length; i++) {
            pts[i] = pt.get(cols[i]);
          }
          tf = TupleFactory.TUPLEN;
          keyPType = (PType<Object>) (PType<?>) ptf.tuples(pts);
        }
       
        if (ptf == AvroTypeFamily.getInstance()) {
          Schema s = createOrderedTupleSchema(keyPType, columnOrder);
          keyPType = (PType<Object>) (PType<?>) Avros.generics(s);
View Full Code Here

    pipeline = new SparkPipeline("local", "pagerank");
  }

  @Test
  public void testAvroReflects() throws Exception {
    PTypeFamily tf = AvroTypeFamily.getInstance();
    PType<PageRankData> prType = Avros.reflects(PageRankData.class);
    String urlInput = tmpDir.copyResourceFileName("urls.txt");
    run(pipeline, urlInput, prType, tf);
    pipeline.done();
  }
View Full Code Here

    pipeline.done();
  }

  @Test
  public void testWritablesJSON() throws Exception {
    PTypeFamily tf = WritableTypeFamily.getInstance();
    PType<PageRankData> prType = PTypes.jsonString(PageRankData.class, tf);
    String urlInput = tmpDir.copyResourceFileName("urls.txt");
    run(pipeline, urlInput, prType, tf);
    pipeline.done();
  }
View Full Code Here

    run(pipeline, urlInput, prType, tf);
    pipeline.done();
  }

  public static PTable<String, PageRankData> pageRank(PTable<String, PageRankData> input, final float d) {
    PTypeFamily ptf = input.getTypeFamily();
    PTable<String, Float> outbound = input.parallelDo(new DoFn<Pair<String, PageRankData>, Pair<String, Float>>() {
      @Override
      public void process(Pair<String, PageRankData> input, Emitter<Pair<String, Float>> emitter) {
        PageRankData prd = input.second();
        for (String link : prd.urls) {
          emitter.emit(Pair.of(link, prd.propagatedScore()));
        }
      }
    }, ptf.tableOf(ptf.strings(), ptf.floats()));

    return input.cogroup(outbound).mapValues(
        new MapFn<Pair<Collection<PageRankData>, Collection<Float>>, PageRankData>() {
          @Override
          public PageRankData map(Pair<Collection<PageRankData>, Collection<Float>> input) {
View Full Code Here

    collection.getPipeline().getConfiguration().set(BloomFilterFn.CRUNCH_FILTER_NAME, collection.getName());
    return new FirstElementPObject<BloomFilter>(createFilterTable(collection, filterFn).values());
  }

  private static <T> PTable<String, BloomFilter> createFilterTable(PCollection<T> collection, BloomFilterFn<T> filterFn) {
    PTypeFamily tf = collection.getTypeFamily();
    PTable<String, BloomFilter> table = collection.parallelDo(filterFn,
        tf.tableOf(tf.strings(), Writables.writables(BloomFilter.class)));
    return table.groupByKey(1).combineValues(new BloomFilterAggregator());
  }
View Full Code Here

   * @param keyType The {@code PType} for the key of the SequenceFile entry
   * @param valueType The {@code PType} for the value of the SequenceFile entry
   * @return A new {@code SourceTable<K, V>} instance
   */
  public static <K, V> TableSource<K, V> sequenceFile(Path path, PType<K> keyType, PType<V> valueType) {
    PTypeFamily ptf = keyType.getFamily();
    return new SeqFileTableSource<K, V>(path, ptf.tableOf(keyType, valueType));
  }
View Full Code Here

   * @param keyType The {@code PType} for the key of the SequenceFile entry
   * @param valueType The {@code PType} for the value of the SequenceFile entry
   * @return A new {@code SourceTable<K, V>} instance
   */
  public static <K, V> TableSource<K, V> sequenceFile(List<Path> paths, PType<K> keyType, PType<V> valueType) {
    PTypeFamily ptf = keyType.getFamily();
    return new SeqFileTableSource<K, V>(paths, ptf.tableOf(keyType, valueType));
  }
View Full Code Here

  Collection<V3>,
  Collection<V4>> {

    public static <V1, V2, V3, V4> PType<Tuple4.Collect<V1, V2, V3, V4>> derived(PType<V1> first,
        PType<V2> second, PType<V3> third, PType<V4> fourth) {
      PTypeFamily tf = first.getFamily();
      PType<Tuple4<Collection<V1>, Collection<V2>, Collection<V3>, Collection<V4>>> pt =
          tf.quads(
              tf.collections(first),
              tf.collections(second),
              tf.collections(third),
              tf.collections(fourth));
      Object clazz = Tuple4.Collect.class;
      return tf.derived((Class<Tuple4.Collect<V1, V2, V3, V4>>) clazz,
          new MapFn<Tuple4<Collection<V1>, Collection<V2>, Collection<V3>, Collection<V4>>,
          Collect<V1, V2, V3, V4>>() {
        @Override
        public Collect<V1, V2, V3, V4> map(
            Tuple4<Collection<V1>, Collection<V2>, Collection<V3>, Collection<V4>> in) {
View Full Code Here

TOP

Related Classes of org.apache.crunch.types.PTypeFamily

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.