Package org.apache.drill.exec.vector.complex.writer

Source Code of org.apache.drill.exec.vector.complex.writer.TestJsonReader

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.vector.complex.writer;

import static org.jgroups.util.Util.assertTrue;
import static org.junit.Assert.assertEquals;
import io.netty.buffer.DrillBuf;
import static org.junit.Assert.assertNull;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;

import com.google.common.io.Files;
import org.apache.drill.BaseTestQuery;
import org.apache.drill.common.expression.PathSegment;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.util.FileUtils;
import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.memory.TopLevelAllocator;
import org.apache.drill.exec.physical.base.GroupScan;
import org.apache.drill.exec.proto.UserBitShared;
import org.apache.drill.exec.record.RecordBatchLoader;
import org.apache.drill.exec.record.VectorWrapper;
import org.apache.drill.exec.rpc.user.QueryResultBatch;
import org.apache.drill.exec.vector.IntVector;
import org.apache.drill.exec.vector.NullableBigIntVector;
import org.apache.drill.exec.vector.NullableIntVector;
import org.apache.drill.exec.vector.RepeatedBigIntVector;
import org.apache.drill.exec.vector.ValueVector;
import org.apache.drill.exec.vector.complex.MapVector;
import org.apache.drill.exec.vector.complex.fn.JsonReader;
import org.apache.drill.exec.vector.complex.fn.JsonReaderWithState;
import org.apache.drill.exec.vector.complex.fn.JsonWriter;
import org.apache.drill.exec.vector.complex.fn.ReaderJSONRecordSplitter;
import org.apache.drill.exec.vector.complex.impl.ComplexWriterImpl;
import org.apache.drill.exec.vector.complex.reader.FieldReader;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;
import com.google.common.base.Charsets;
import com.google.common.collect.Lists;

public class TestJsonReader extends BaseTestQuery {
  static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestJsonReader.class);

  private static BufferAllocator allocator;
  private static final boolean VERBOSE_DEBUG = true;

  @BeforeClass
  public static void setupAllocator(){
    allocator = new TopLevelAllocator();
  }

  @AfterClass
  public static void destroyAllocator(){
    allocator.close();
  }

  public void runTestsOnFile(String filename, UserBitShared.QueryType queryType, String[] queries, long[] rowCounts) throws Exception {
    if (VERBOSE_DEBUG) {
      System.out.println("===================");
      System.out.println("source data in json");
      System.out.println("===================");
      System.out.println(Files.toString(FileUtils.getResourceAsFile(filename), Charsets.UTF_8));
    }

    int i = 0;
    for (String query : queries) {
      if (VERBOSE_DEBUG) {
        System.out.println("=====");
        System.out.println("query");
        System.out.println("=====");
        System.out.println(query);
        System.out.println("======");
        System.out.println("result");
        System.out.println("======");
      }
      int rowCount = testRunAndPrint(queryType, query);
      assertEquals(rowCounts[i], rowCount);
      System.out.println();
      i++;
    }
  }

  @Test
  public void testSingleColumnRead_vector_fill_bug() throws Exception {
    String[] queries = {"select * from cp.`/store/json/single_column_long_file.json`"};
    long[] rowCounts = {13512};
    String filename = "/store/json/single_column_long_file.json";
    runTestsOnFile(filename, UserBitShared.QueryType.SQL, queries, rowCounts);
  }

  @Test
  public void testNonExistentColumnReadAlone() throws Exception {
    String[] queries = {"select non_existent_column from cp.`/store/json/single_column_long_file.json`"};
    long[] rowCounts = {13512};
    String filename = "/store/json/single_column_long_file.json";
    runTestsOnFile(filename, UserBitShared.QueryType.SQL, queries, rowCounts);
  }

  public void testAllTextMode() throws Exception {
    test("alter system set `store.json.all_text_mode` = true");
    String[] queries = {"select * from cp.`/store/json/schema_change_int_to_string.json`"};
    long[] rowCounts = {3};
    String filename = "/store/json/schema_change_int_to_string.json";
    runTestsOnFile(filename, UserBitShared.QueryType.SQL, queries, rowCounts);
  }

  @Test
  public void readComplexWithStar() throws Exception {
    List<QueryResultBatch> results = testSqlWithResults("select * from cp.`/store/json/test_complex_read_with_star.json`");
    assertEquals(2, results.size());

    RecordBatchLoader batchLoader = new RecordBatchLoader(getAllocator());
    QueryResultBatch batch = results.get(0);

    assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData()));
    assertEquals(3, batchLoader.getSchema().getFieldCount());
    testExistentColumns(batchLoader, batch);

    batch.release();
    batchLoader.clear();
  }

  @Test
  public void testNullWhereListExpected() throws Exception {
    test("alter system set `store.json.all_text_mode` = true");
    String[] queries = {"select * from cp.`/store/json/null_where_list_expected.json`"};
    long[] rowCounts = {3};
    String filename = "/store/json/null_where_list_expected.json";
    runTestsOnFile(filename, UserBitShared.QueryType.SQL, queries, rowCounts);
  }

  @Test
  public void testNullWhereMapExpected() throws Exception {
    test("alter system set `store.json.all_text_mode` = true");
    String[] queries = {"select * from cp.`/store/json/null_where_map_expected.json`"};
    long[] rowCounts = {3};
    String filename = "/store/json/null_where_map_expected.json";
    runTestsOnFile(filename, UserBitShared.QueryType.SQL, queries, rowCounts);
  }

  // The project pushdown rule is correctly adding the projected columns to the scan, however it is not removing
  // the redundant project operator after the scan, this tests runs a physical plan generated from one of the tests to
  // ensure that the project is filtering out the correct data in the scan alone
  @Test
  public void testProjectPushdown() throws Exception {
    String[] queries = {Files.toString(FileUtils.getResourceAsFile("/store/json/project_pushdown_json_physical_plan.json"), Charsets.UTF_8)};
    long[] rowCounts = {3};
    String filename = "/store/json/schema_change_int_to_string.json";
    test("alter system set `store.json.all_text_mode` = false");
    runTestsOnFile(filename, UserBitShared.QueryType.PHYSICAL, queries, rowCounts);

    List<QueryResultBatch> results = testPhysicalWithResults(queries[0]);
    assertEquals(2, results.size());
    // "`field_1`", "`field_3`.`inner_1`", "`field_3`.`inner_2`", "`field_4`.`inner_1`"

    RecordBatchLoader batchLoader = new RecordBatchLoader(getAllocator());
    QueryResultBatch batch = results.get(0);
    assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData()));
    assertEquals(5, batchLoader.getSchema().getFieldCount());
    testExistentColumns(batchLoader, batch);

    VectorWrapper vw = batchLoader.getValueAccessorById(
        NullableIntVector.class, //
        batchLoader.getValueVectorId(SchemaPath.getCompoundPath("non_existent_at_root")).getFieldIds() //
    );
    assertNull(vw.getValueVector().getAccessor().getObject(0));
    assertNull(vw.getValueVector().getAccessor().getObject(1));
    assertNull(vw.getValueVector().getAccessor().getObject(2));

    vw = batchLoader.getValueAccessorById(
        NullableIntVector.class, //
        batchLoader.getValueVectorId(SchemaPath.getCompoundPath("non_existent", "nested","field")).getFieldIds() //
    );
    assertNull(vw.getValueVector().getAccessor().getObject(0));
    assertNull(vw.getValueVector().getAccessor().getObject(1));
    assertNull(vw.getValueVector().getAccessor().getObject(2));

    vw.getValueVector().clear();
    batch.release();
    batchLoader.clear();
  }

  private void testExistentColumns(RecordBatchLoader batchLoader, QueryResultBatch batch) throws SchemaChangeException {

    assertTrue(batchLoader.load(batch.getHeader().getDef(), batch.getData()));

    VectorWrapper<?> vw = batchLoader.getValueAccessorById(
        RepeatedBigIntVector.class, //
        batchLoader.getValueVectorId(SchemaPath.getCompoundPath("field_1")).getFieldIds() //
    );
    assertEquals("[1]", vw.getValueVector().getAccessor().getObject(0).toString());
    assertEquals("[5]", vw.getValueVector().getAccessor().getObject(1).toString());
    assertEquals("[5,10,15]", vw.getValueVector().getAccessor().getObject(2).toString());

    vw = batchLoader.getValueAccessorById(
        IntVector.class, //
        batchLoader.getValueVectorId(SchemaPath.getCompoundPath("field_3", "inner_1")).getFieldIds() //
    );
    assertNull(vw.getValueVector().getAccessor().getObject(0));
    assertEquals(2l, vw.getValueVector().getAccessor().getObject(1));
    assertEquals(5l, vw.getValueVector().getAccessor().getObject(2));

    vw = batchLoader.getValueAccessorById(
        IntVector.class, //
        batchLoader.getValueVectorId(SchemaPath.getCompoundPath("field_3", "inner_2")).getFieldIds() //
    );
    assertNull(vw.getValueVector().getAccessor().getObject(0));
    assertNull(vw.getValueVector().getAccessor().getObject(1));
    assertEquals(3l, vw.getValueVector().getAccessor().getObject(2));

    vw = batchLoader.getValueAccessorById(
        RepeatedBigIntVector.class, //
        batchLoader.getValueVectorId(SchemaPath.getCompoundPath("field_4", "inner_1")).getFieldIds() //
    );
    assertEquals("[]", vw.getValueVector().getAccessor().getObject(0).toString());
    assertEquals("[1,2,3]", vw.getValueVector().getAccessor().getObject(1).toString());
    assertEquals("[4,5,6]", vw.getValueVector().getAccessor().getObject(2).toString());
  }

  @Test
  public void testReader() throws Exception{
    final int repeatSize = 10;

    String simple = " { \"b\": \"hello\", \"c\": \"goodbye\"}\n " +
        "{ \"b\": \"yellow\", \"c\": \"red\", \"p\":" +
    "{ \"integer\" : 2001, \n" +
        "  \"float\"   : 1.2,\n" +
        "  \"x\": {\n" +
        "    \"y\": \"friends\",\n" +
        "    \"z\": \"enemies\"\n" +
        "  },\n" +
        "  \"z\": [\n" +
        "    {\"orange\" : \"black\" },\n" +
        "    {\"pink\" : \"purple\" }\n" +
        "  ]\n" +
        "  \n" +
        "}\n }";

    String compound = simple;
    for(int i =0; i < repeatSize; i++) compound += simple;

//    simple = "{ \"integer\" : 2001, \n" +
//        "  \"float\"   : 1.2\n" +
//        "}\n" +
//        "{ \"integer\" : -2002,\n" +
//        "  \"float\"   : -1.2 \n" +
//        "}";
    MapVector v = new MapVector("", allocator);
    ComplexWriterImpl writer = new ComplexWriterImpl("col", v);
    writer.allocate();

    DrillBuf buffer = allocator.buffer(255);
    JsonReaderWithState jsonReader = new JsonReaderWithState(new ReaderJSONRecordSplitter(compound), buffer,
        GroupScan.ALL_COLUMNS, false);
    int i =0;
    List<Integer> batchSizes = Lists.newArrayList();

    outside: while(true){
      writer.setPosition(i);
      switch(jsonReader.write(writer)){
      case WRITE_SUCCEED:
        i++;
        break;
      case NO_MORE:
        batchSizes.add(i);
        System.out.println("no more records - main loop");
        break outside;

      case WRITE_FAILED:
        System.out.println("==== hit bounds at " + i);
        //writer.setValueCounts(i - 1);
        batchSizes.add(i);
        i = 0;
        writer.allocate();
        writer.reset();

        switch(jsonReader.write(writer)){
        case NO_MORE:
          System.out.println("no more records - new alloc loop.");
          break outside;
        case WRITE_FAILED:
          throw new RuntimeException("Failure while trying to write.");
        case WRITE_SUCCEED:
          i++;
        };

      };
    }

    int total = 0;
    int lastRecordCount = 0;
    for(Integer records : batchSizes){
      total += records;
      lastRecordCount = records;
    }


    ObjectWriter ow = new ObjectMapper().writer().withDefaultPrettyPrinter();

    ow.writeValueAsString(v.getAccessor().getObject(0));
    ow.writeValueAsString(v.getAccessor().getObject(1));
    FieldReader reader = v.get("col", MapVector.class).getAccessor().getReader();

    ByteArrayOutputStream stream = new ByteArrayOutputStream();
    JsonWriter jsonWriter = new JsonWriter(stream, true);

    reader.setPosition(0);
    jsonWriter.write(reader);
    reader.setPosition(1);
    jsonWriter.write(reader);
    System.out.print("Json Read: ");
    System.out.println(new String(stream.toByteArray(), Charsets.UTF_8));
//    System.out.println(compound);

    System.out.println("Total Records Written " + batchSizes);

    reader.setPosition(lastRecordCount - 2);
    assertEquals("goodbye", reader.reader("c").readText().toString());
    reader.setPosition(lastRecordCount - 1);
    assertEquals("red", reader.reader("c").readText().toString());
    assertEquals((repeatSize+1) * 2, total);

    writer.clear();
    buffer.release();
  }
}
TOP

Related Classes of org.apache.drill.exec.vector.complex.writer.TestJsonReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.