Package com.datasalt.pangool.io

Examples of com.datasalt.pangool.io.Schema


  public static Schema getOutputCountSchema() {
    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("topic", Type.INT));
    fields.add(Field.create("totalcount", Type.INT));
    return new Schema("outputcount", fields);
  }
View Full Code Here


    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("location", Type.STRING));
    fields.add(Field.create("date", Type.STRING));
    fields.add(Field.create("hashtag", Type.STRING));
    fields.add(Field.create("count", Type.INT));
    Schema schema = new Schema("my_schema", fields);

    TupleMRBuilder mr = new TupleMRBuilder(conf);
    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("location", "date", "hashtag");
    mr.setOrderBy(new OrderBy().add("location", Order.ASC).add("date", Order.ASC).add("hashtag", Order.ASC));
View Full Code Here

    fields.add(Field.create("user", Type.STRING));
    fields.add(Field.create("feature", Type.STRING));
    fields.add(Field.create("all",Type.BOOLEAN));
    fields.add(Field.create("clicks", Type.INT));

    Schema schema = new Schema("my_schema", fields);

    TupleMRBuilder mr = new TupleMRBuilder(conf);
    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("user", "all", "feature");
    mr.setOrderBy(new OrderBy().add("user", Order.ASC).add("all", Order.DESC).add("feature", Order.ASC));
View Full Code Here

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("intField", Type.INT));
    fields.add(Field.create("strField", Type.STRING));
    fields.add(Field.create("longField", Type.LONG));
    fields.add(Field.create("doubleField", Type.DOUBLE));
    Schema schema = new Schema("schema", fields);

    TupleMRBuilder mr = new TupleMRBuilder(conf, "Pangool Secondary Sort");
    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("intField", "strField");
    mr.setOrderBy(new OrderBy().add("intField", Order.ASC).add("strField", Order.ASC).add("longField", Order.ASC));
View Full Code Here

    List<Field> urlMapFields = new ArrayList<Field>();
    urlMapFields.add(Field.create("url",Type.STRING));
    urlMapFields.add(Field.create("canonicalUrl",Type.STRING));

    TupleMRBuilder mr = new TupleMRBuilder(conf,"Pangool Url Resolution");
    mr.addIntermediateSchema(new Schema("urlMap", urlMapFields));
    mr.addIntermediateSchema(new Schema("urlRegister", urlRegisterFields));
    mr.setGroupByFields("url");
    mr.setTupleReducer(new Handler());
    mr.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, NullWritable.class);
    mr.addInput(new Path(input1), new HadoopInputFormat(TextInputFormat.class), new UrlMapProcessor());
    mr.addInput(new Path(input2), new HadoopInputFormat(TextInputFormat.class), new UrlProcessor());
View Full Code Here

    List<Field> fields = new ArrayList<Field>();
    // The schema has 3 fields: word, topicId and count
    fields.add(Field.create("word", Type.STRING));
    fields.add(Field.create("topic", Type.INT));
    fields.add(Field.create("count", Type.INT));
    return new Schema("schema", fields);
  }
View Full Code Here

    // Configure schema, sort and group by
    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("first", Type.INT));
    fields.add(Field.create("second", Type.INT));

    return new Schema("my_schema", fields);
  }
View Full Code Here

    fields.add(Field.create("user", Type.STRING));
    fields.add(Field.create("feature", Type.STRING));
    fields.add(Field.create("all", Type.BOOLEAN));
    fields.add(Field.create("clicks", Type.INT));

    Schema schema = new Schema("my_schema", fields);

    TupleMRBuilder mr = new TupleMRBuilder(conf);
    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("user", "all", "feature");
    mr.setOrderBy(new OrderBy().add("user", Order.ASC).add("all", Order.DESC).add("feature", Order.ASC));
View Full Code Here

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("location", Type.STRING));
    fields.add(Field.create("date", Type.STRING));
    fields.add(Field.create("hashtag", Type.STRING));
    fields.add(Field.create("count", Type.INT));
    Schema schema = new Schema("my_schema", fields);

    TupleMRBuilder mr = new TupleMRBuilder(conf);
    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("location", "date", "hashtag");
    mr.setOrderBy(new OrderBy().add("location", Order.ASC).add("date", Order.ASC)
View Full Code Here

    final int iterations = conf.getInt("gol.iterations", 1000);
    Log.info("using parameters: maxX grid: " + maxX + " maxY grid: " + maxY + " max #iterations: "
        + iterations);

    // Define the intermediate schema: a pair of ints
    final Schema schema = new Schema("minMax", Fields.parse("min:int, max:int"));

    TupleMRBuilder job = new TupleMRBuilder(conf);
    job.addIntermediateSchema(schema);
    job.setGroupByFields("min", "max");
    job.setCustomPartitionFields("min");
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Schema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.