Package org.kitesdk.cli.commands

Source Code of org.kitesdk.cli.commands.TestCSVImportCommandCluster

/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.kitesdk.cli.commands;

import com.beust.jcommander.internal.Lists;
import com.beust.jcommander.internal.Sets;
import com.google.common.io.Files;
import java.io.BufferedWriter;
import java.io.File;
import java.io.OutputStreamWriter;
import java.util.Set;
import java.util.concurrent.Callable;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.kitesdk.cli.TestUtil;
import org.kitesdk.data.Dataset;
import org.kitesdk.data.Datasets;
import org.kitesdk.data.MiniDFSTest;
import org.kitesdk.data.TestHelpers;
import org.kitesdk.data.URIBuilder;
import org.kitesdk.data.spi.filesystem.DatasetTestUtilities;
import org.slf4j.Logger;

import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;

public class TestCSVImportCommandCluster extends MiniDFSTest {

  private static String sample;
  private static String avsc;
  private static String datasetName;
  private static String repoURI;

  private Logger console = null;
  private CSVImportCommand command;
  private Dataset<GenericData.Record> dataset;

  private static final Set<GenericData.Record> expected = Sets.newHashSet();

  @BeforeClass
  public static void createCSVSchemaAndSample() throws Exception {
    sample = "target/users.csv";
    avsc = "target/user.avsc";
    datasetName = "users";
    repoURI = "repo:" + getDFS().getUri().toString() + "/tmp/data";

    BufferedWriter writer = Files.newWriter(
        new File(sample), CSVSchemaCommand.SCHEMA_CHARSET);
    writer.append("id,username,email\n");
    writer.append("1,test,test@example.com\n");
    writer.append("2,user,user@example.com\n");
    writer.close();

    TestUtil.run("csv-schema", sample, "-o", avsc, "--class", "User");

    GenericRecordBuilder builder = new GenericRecordBuilder(
        new Schema.Parser().parse(new File(avsc)));
    builder.set("id", 1l);
    builder.set("username", "test");
    builder.set("email", "test@example.com");
    expected.add(builder.build());
    builder.set("id", 2l);
    builder.set("username", "user");
    builder.set("email", "user@example.com");
    expected.add(builder.build());
  }

  @Before
  public void setup() throws Exception {
    TestUtil.run("create", datasetName, "-r", repoURI, "-s", avsc);

    this.dataset = Datasets.load(URIBuilder.build(repoURI, "default", datasetName),
        GenericData.Record.class);
    this.console = mock(Logger.class);
    this.command = new CSVImportCommand(console);
    command.setConf(new Configuration());
    // set the test repository information
    command.repoURI = repoURI;
    // set up the configuration as it would be with a cluster
    Configuration conf = getConfiguration();
    conf.setBoolean("kite.testing", true);
    command.setConf(conf);
  }

  @After
  public void removeData() throws Exception {
    TestUtil.run("delete", datasetName, "-r", repoURI);
  }

  @Test
  public void testBasicImport() throws Exception {
    command.targets = Lists.newArrayList(sample, datasetName);
    command.run();
    Assert.assertEquals("Should contain expected records",
        expected, DatasetTestUtilities.materialize(dataset));
    verify(console).info("Added {} records to \"{}\"", 2l, datasetName);
    verifyNoMoreInteractions(console);
  }

  @Test
  public void testHDFSBasicImport() throws Exception {
    Path hdfsSample = getDFS().makeQualified(new Path("hdfs:/tmp/" + sample));
    getDFS().copyFromLocalFile(new Path(sample), hdfsSample);
    command.targets = Lists.newArrayList(hdfsSample.toString(), datasetName);
    command.run();
    Assert.assertEquals("Should contain expected records",
        expected, DatasetTestUtilities.materialize(dataset));
    verify(console).info("Added {} records to \"{}\"", 2l, datasetName);
    verifyNoMoreInteractions(console);
  }

  @Test
  public void testDirectoryImport() throws Exception {
    new File("target/sample").mkdir();
    BufferedWriter writer = Files.newWriter(
        new File("target/sample/one.csv"), CSVSchemaCommand.SCHEMA_CHARSET);
    writer.append("id,username,email\n");
    writer.append("1,test,test@example.com\n");
    writer.close();

    writer = Files.newWriter(
        new File("target/sample/two.csv"), CSVSchemaCommand.SCHEMA_CHARSET);
    writer.append("id,username,email\n");
    writer.append("2,user,user@example.com\n");
    writer.close();

    command.targets = Lists.newArrayList("target/sample", datasetName);
    command.run();
    Assert.assertEquals("Should contain expected records",
        expected, DatasetTestUtilities.materialize(dataset));
    verify(console).info("Added {} records to \"{}\"", 2l, datasetName);
    verifyNoMoreInteractions(console);
  }

  @Test
  public void testHDFSDirectoryImport() throws Exception {
    Path hdfsSample = getDFS().makeQualified(new Path("hdfs:/tmp/sample"));
    getDFS().mkdirs(hdfsSample);

    FSDataOutputStream one = getDFS().create(
        new Path(hdfsSample, "one.csv"), true /* overwrite */ );
    OutputStreamWriter writer = new OutputStreamWriter(one, "utf8");
    writer.write("id,username,email\n");
    writer.write("1,test,test@example.com\n");
    writer.close();

    FSDataOutputStream two = getDFS().create(
        new Path(hdfsSample, "two.csv"), true /* overwrite */ );
    writer = new OutputStreamWriter(two, "utf8");
    writer.append("id,username,email\n");
    writer.append("2,user,user@example.com\n");
    writer.close();

    command.targets = Lists.newArrayList(hdfsSample.toString(), datasetName);
    command.run();
    Assert.assertEquals("Should contain expected records",
        expected, DatasetTestUtilities.materialize(dataset));
    verify(console).info("Added {} records to \"{}\"", 2l, datasetName);
    verifyNoMoreInteractions(console);
  }

  @Test
  public void testEmptyHDFSSampleDirectory() throws Exception {
    String hdfsSample = "hdfs:/tmp/emptyDir";
    getDFS().mkdirs(new Path(hdfsSample));

    command.targets = Lists.newArrayList(hdfsSample, datasetName);
    TestHelpers.assertThrows("Should complain about no data files",
        IllegalArgumentException.class, new Callable() {
          @Override
          public Object call() throws Exception {
            command.run();
            return null;
          }
        });
    verifyNoMoreInteractions(console);
  }

}
TOP

Related Classes of org.kitesdk.cli.commands.TestCSVImportCommandCluster

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.