Package eu.stratosphere.api.java.record.io

Examples of eu.stratosphere.api.java.record.io.TextInputFormat


    // parse job parameters
    int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    String dataInput = (args.length > 1 ? args[1] : "");
    String output    = (args.length > 2 ? args[2] : "");

    FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
    MapOperator mapper = MapOperator.builder(new TokenizeLine())
      .input(source)
      .name("Tokenize Lines")
      .build();
    ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
View Full Code Here


    checkWordCountWithSortedSink(false);
  }
 
  private void checkWordCountWithSortedSink(boolean estimates) {
    try {
      FileDataSource sourceNode = new FileDataSource(new TextInputFormat(), IN_FILE, "Input Lines");
      MapOperator mapNode = MapOperator.builder(new TokenizeLine())
        .input(sourceNode)
        .name("Tokenize Lines")
        .build();
      ReduceOperator reduceNode = ReduceOperator.builder(new CountWords(), StringValue.class, 0)
View Full Code Here

    return toParameterList(config1);
  }
 
  static Plan getTestPlanPlan(int numSubTasks, String input, String output) {
   
    FileDataSource source = new FileDataSource(new TextInputFormat(), input, "Input Lines");
    source.setParameter(TextInputFormat.CHARSET_NAME, "ASCII");
    MapOperator mapper = MapOperator.builder(new TokenizeLine())
      .input(source)
      .name("Tokenize Lines")
      .build();
View Full Code Here

  public Plan getPlan(int numSubTasks, String dataInput, String output) {


    // input is {word, count} pair
    FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");

    //do a selection using cached file
    MapOperator mapper = MapOperator.builder(new TokenizeLine())
      .input(source)
      .name("Tokenize Lines")
View Full Code Here

  public Plan getPlan(String... args) {
    int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    String dataInput = (args.length > 1 ? args[1] : "");
    String output = (args.length > 2 ? args[2] : "");

    FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");

    MapOperator mapper = MapOperator.builder(new TokenizeLine()).input(source).name("Tokenize Lines").build();
   
    ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0).input(mapper)
        .name("Count Words").build();
View Full Code Here

TOP

Related Classes of eu.stratosphere.api.java.record.io.TextInputFormat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.