Package org.apache.spark.sql.api.java

Examples of org.apache.spark.sql.api.java.JavaSchemaRDD


    }
  }

  @Test
  public void testBasicRead() throws Exception {
    JavaSchemaRDD schemaRDD = artistsAsSchemaRDD();
    assertTrue(schemaRDD.count() > 300);
    schemaRDD.registerTempTable("datfile");
    System.out.println(schemaRDD.schemaString());
    assertEquals(5, schemaRDD.take(5).size());
    JavaSchemaRDD results = sqc
        .sql("SELECT name FROM datfile WHERE id >=1 AND id <=10");
    assertEquals(10, schemaRDD.take(10).size());
  }
View Full Code Here


    assertEquals(10, schemaRDD.take(10).size());
  }

  @Test
  public void testEsSchemaRDD1Write() throws Exception {
    JavaSchemaRDD schemaRDD = artistsAsSchemaRDD();

    String target = "sparksql-test/scala-basic-write";
    JavaEsSparkSQL.saveToEs(schemaRDD, target);
    assertTrue(RestUtils.exists(target));
    assertThat(RestUtils.get(target + "/_search?"), containsString("345"));
View Full Code Here

    assertThat(RestUtils.get(target + "/_search?"), containsString("345"));
  }

  @Test
  public void testEsSchemaRDD1WriteWithId() throws Exception {
    JavaSchemaRDD schemaRDD = artistsAsSchemaRDD();

    String target = "sparksql-test/scala-basic-write-id-mapping";
    JavaEsSparkSQL.saveToEs(schemaRDD, target, ImmutableMap.of(ES_MAPPING_ID, "id"));
    assertTrue(RestUtils.exists(target));
    assertThat(RestUtils.get(target + "/_search?"), containsString("345"));
View Full Code Here

  @Test
  public void testEsSchemaRDD2Read() throws Exception {
    String target = "sparksql-test/scala-basic-write";

    JavaSchemaRDD schemaRDD = JavaEsSparkSQL.esRDD(sqc, target);
    assertTrue(schemaRDD.count() > 300);
    String schema = schemaRDD.schemaString();
    assertTrue(schema.contains("id: long"));
    assertTrue(schema.contains("name: string"));
    assertTrue(schema.contains("pictures: string"));
    assertTrue(schema.contains("time: long"));
    assertTrue(schema.contains("url: string"));

    // schemaRDD.take(5).foreach(println)

    schemaRDD.registerTempTable("basicRead");
    JavaSchemaRDD nameRDD = sqc.sql("SELECT name FROM basicRead WHERE id >= 1 AND id <=10");
    assertEquals(10, nameRDD.count());

  }
View Full Code Here

          return person;
        }
      });

    // Apply a schema to an RDD of Java Beans and register it as a table.
    JavaSchemaRDD schemaPeople = sqlCtx.applySchema(people, Person.class);
    schemaPeople.registerAsTable("people");

    // SQL can be run over RDDs that have been registered as tables.
    JavaSchemaRDD teenagers = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");

    // The results of SQL queries are SchemaRDDs and support all the normal RDD operations.
    // The columns of a row in the result can be accessed by ordinal.
    List<String> teenagerNames = teenagers.map(new Function<Row, String>() {
      public String call(Row row) {
        return "Name: " + row.getString(0);
      }
    }).collect();

    // JavaSchemaRDDs can be saved as parquet files, maintaining the schema information.
    schemaPeople.saveAsParquetFile("people.parquet");

    // Read in the parquet file created above.  Parquet files are self-describing so the schema is preserved.
    // The result of loading a parquet file is also a JavaSchemaRDD.
    JavaSchemaRDD parquetFile = sqlCtx.parquetFile("people.parquet");

    //Parquet files can also be registered as tables and then used in SQL statements.
    parquetFile.registerAsTable("parquetFile");
    JavaSchemaRDD teenagers2 = sqlCtx.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
  }
View Full Code Here

          return person;
        }
      });

    // Apply a schema to an RDD of Java Beans and register it as a table.
    JavaSchemaRDD schemaPeople = sqlCtx.applySchema(people, Person.class);
    schemaPeople.registerTempTable("people");

    // SQL can be run over RDDs that have been registered as tables.
    JavaSchemaRDD teenagers = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");

    // The results of SQL queries are SchemaRDDs and support all the normal RDD operations.
    // The columns of a row in the result can be accessed by ordinal.
    List<String> teenagerNames = teenagers.map(new Function<Row, String>() {
      @Override
      public String call(Row row) {
        return "Name: " + row.getString(0);
      }
    }).collect();
    for (String name: teenagerNames) {
      System.out.println(name);
    }

    System.out.println("=== Data source: Parquet File ===");
    // JavaSchemaRDDs can be saved as parquet files, maintaining the schema information.
    schemaPeople.saveAsParquetFile("people.parquet");

    // Read in the parquet file created above.
    // Parquet files are self-describing so the schema is preserved.
    // The result of loading a parquet file is also a JavaSchemaRDD.
    JavaSchemaRDD parquetFile = sqlCtx.parquetFile("people.parquet");

    //Parquet files can also be registered as tables and then used in SQL statements.
    parquetFile.registerTempTable("parquetFile");
    JavaSchemaRDD teenagers2 =
      sqlCtx.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19");
    teenagerNames = teenagers2.map(new Function<Row, String>() {
      @Override
      public String call(Row row) {
          return "Name: " + row.getString(0);
      }
    }).collect();
    for (String name: teenagerNames) {
      System.out.println(name);
    }

    System.out.println("=== Data source: JSON Dataset ===");
    // A JSON dataset is pointed by path.
    // The path can be either a single text file or a directory storing text files.
    String path = "examples/src/main/resources/people.json";
    // Create a JavaSchemaRDD from the file(s) pointed by path
    JavaSchemaRDD peopleFromJsonFile = sqlCtx.jsonFile(path);

    // Because the schema of a JSON dataset is automatically inferred, to write queries,
    // it is better to take a look at what is the schema.
    peopleFromJsonFile.printSchema();
    // The schema of people is ...
    // root
    //  |-- age: IntegerType
    //  |-- name: StringType

    // Register this JavaSchemaRDD as a table.
    peopleFromJsonFile.registerTempTable("people");

    // SQL statements can be run by using the sql methods provided by sqlCtx.
    JavaSchemaRDD teenagers3 = sqlCtx.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");

    // The results of SQL queries are JavaSchemaRDDs and support all the normal RDD operations.
    // The columns of a row in the result can be accessed by ordinal.
    teenagerNames = teenagers3.map(new Function<Row, String>() {
      @Override
      public String call(Row row) { return "Name: " + row.getString(0); }
    }).collect();
    for (String name: teenagerNames) {
      System.out.println(name);
    }

    // Alternatively, a JavaSchemaRDD can be created for a JSON dataset represented by
    // a RDD[String] storing one JSON object per string.
    List<String> jsonData = Arrays.asList(
          "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}");
    JavaRDD<String> anotherPeopleRDD = ctx.parallelize(jsonData);
    JavaSchemaRDD peopleFromJsonRDD = sqlCtx.jsonRDD(anotherPeopleRDD);

    // Take a look at the schema of this new JavaSchemaRDD.
    peopleFromJsonRDD.printSchema();
    // The schema of anotherPeople is ...
    // root
    //  |-- address: StructType
    //  |    |-- city: StringType
    //  |    |-- state: StringType
    //  |-- name: StringType

    peopleFromJsonRDD.registerTempTable("people2");

    JavaSchemaRDD peopleWithCity = sqlCtx.sql("SELECT name, address.city FROM people2");
    List<String> nameAndCity = peopleWithCity.map(new Function<Row, String>() {
      @Override
      public String call(Row row) {
        return "Name: " + row.getString(0) + ", City: " + row.getString(1);
      }
    }).collect();
View Full Code Here

TOP

Related Classes of org.apache.spark.sql.api.java.JavaSchemaRDD

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.