Source Code of com.socrata.datasync.utilities.PortUtilityTest

package com.socrata.datasync.utilities;


import com.socrata.api.Soda2Producer;
import com.socrata.api.SodaDdl;
import com.socrata.datasync.PortUtility;
import com.socrata.datasync.PublishMethod;
import com.socrata.datasync.TestBase;
import com.socrata.datasync.publishers.Soda2Publisher;
import com.socrata.exceptions.LongRunningQueryException;
import com.socrata.exceptions.SodaError;
import com.socrata.model.importer.Column;
import com.socrata.model.importer.Dataset;
import com.socrata.model.importer.DatasetInfo;
import com.socrata.model.soql.SoqlQuery;
import junit.framework.TestCase;
import org.junit.Before;
import org.junit.Test;
import test.model.UnitTestDataset;


import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


public class PortUtilityTest extends TestBase {


    static SodaDdl sourceDdl;
    static SodaDdl sinkDdl;
    static Soda2Producer sourceProducer;
    static Soda2Producer sinkProducer;


    @Before
    public void setUpTest() throws IOException {
        sourceDdl = createSodaDdl();
        sinkDdl = createSodaDdl();
        sourceProducer = createProducer();
        sinkProducer = createProducer();
    }


    @Test
    public void testPortSchema() throws SodaError, InterruptedException {
        // Perform the test operation, saving the String return value.
        String newDatasetID = PortUtility.portSchema(sourceDdl, sinkDdl, UNITTEST_DATASET_ID, "", false);
        //System.out.println(newDatasetID);
        validatePortedSchema(newDatasetID);
    }


    @Test
    public void testPortSchemaNewBackend() throws SodaError, InterruptedException {
        // TODO remove conditional check
        // onyl necessary because New Backend is ONLY enabled on staging
        if(testOnStaging) {
            // Perform the test operation, saving the String return value.
            String newDatasetID = PortUtility.portSchema(sourceDdl, sinkDdl, UNITTEST_DATASET_ID, "", true);
            System.out.println("New Backend dataset: " + newDatasetID);


            // TODO validate that metadata denoting New Backend is true by hitting:
                // https://opendata.test-socrata.com/api/views/s5r5-8cth.json
                //DatasetInfo sinkMeta = sinkDdl.loadDatasetInfo(newDatasetID);


            validatePortedSchema(newDatasetID);
        }
    }


    private void validatePortedSchema(final String newDatasetID) throws SodaError, InterruptedException {
        // Grab the necessary objects for testing.
        DatasetInfo sourceMeta = sourceDdl.loadDatasetInfo(UNITTEST_DATASET_ID);
        DatasetInfo sinkMeta = sinkDdl.loadDatasetInfo(newDatasetID);


        try {
            // First, test the metadata (just the basics) via DatasetInfo.
            TestCase.assertEquals(sourceMeta.getViewType(), sinkMeta.getViewType());
            TestCase.assertEquals(sourceMeta.getName(), sinkMeta.getName());
            TestCase.assertEquals(sourceMeta.getDescription(), sinkMeta.getDescription());
            TestCase.assertEquals(sourceMeta.getCategory(), sinkMeta.getCategory());
            TestCase.assertEquals(sourceMeta.getTags(), sinkMeta.getTags());
            TestCase.assertEquals(sourceMeta.getRights(), sinkMeta.getRights());


            // Next, test the schema by grabbing Dataset and Column objects from DatasetInfo.
            Dataset sourceSchema = (Dataset) sourceMeta;
            Dataset sinkSchema = (Dataset) sinkMeta;
            List<Column> sourceColumns = sourceSchema.getColumns();
            List<Column> sinkColumns = sinkSchema.getColumns();


            TestCase.assertEquals(sourceColumns.size(), sinkColumns.size());
            for (int i = 0; i < sourceColumns.size(); i++) {
                TestCase.assertEquals(sourceColumns.get(i).getName(), sinkColumns.get(i).getName());
                TestCase.assertEquals(sourceColumns.get(i).getFieldName(), sinkColumns.get(i).getFieldName());
                TestCase.assertEquals(sourceColumns.get(i).getDescription(), sinkColumns.get(i).getDescription());
                TestCase.assertEquals(sourceColumns.get(i).getDataTypeName(), sinkColumns.get(i).getDataTypeName());
                TestCase.assertEquals(sourceColumns.get(i).getFlags(), sinkColumns.get(i).getFlags());
                //TestCase.assertEquals(sourceColumns.get(i).getPosition(), sinkColumns.get(i).getPosition());
            }


        } finally {
            sinkDdl.deleteDataset(newDatasetID);
        }
    }


    @Test
    public void testPortSchemaRenameDataeset() throws SodaError, InterruptedException {
        String destinationDatasetName = "New Dataset";
        // Perform the test operation, saving the String return value.
        String newDatasetID = PortUtility.portSchema(sourceDdl, sinkDdl, UNITTEST_DATASET_ID, destinationDatasetName, false);


        // Grab the necessary objects for testing.
        DatasetInfo sourceMeta = sourceDdl.loadDatasetInfo(UNITTEST_DATASET_ID);
        DatasetInfo sinkMeta = sinkDdl.loadDatasetInfo(newDatasetID);


        try {
            // Test the metadata (just the basics) via DatasetInfo.
            TestCase.assertEquals(destinationDatasetName, sinkMeta.getName());
        } finally {
            sinkDdl.deleteDataset(newDatasetID);
        }
    }


    @Test
    public void testPublishDataset() throws SodaError, InterruptedException {
        // Port a dataset's schema and confirm that it is unpublished by default.
        String unpublishedID = PortUtility.portSchema(sourceDdl, sinkDdl, UNITTEST_DATASET_ID, "", false);
        DatasetInfo source = sourceDdl.loadDatasetInfo(unpublishedID);
        TestCase.assertEquals("unpublished", source.getPublicationStage());


        // Perform the test operation.  Confirm the dataset is published afterwards.
        String publishedID = PortUtility.publishDataset(sinkDdl, unpublishedID);
        DatasetInfo sink = sinkDdl.loadDatasetInfo(publishedID);
        TestCase.assertEquals("published", sink.getPublicationStage());


        sinkDdl.deleteDataset(publishedID);
    }


    @Test
    public void testPortContentsUpsert() throws SodaError, InterruptedException, LongRunningQueryException, IOException {
        // Query for the rows of the source dataset.
        List<UnitTestDataset> sourceRows = sinkProducer.query(UNITTEST_DATASET_ID, SoqlQuery.SELECT_ALL, UnitTestDataset.LIST_TYPE);


        // Port a dataset's schema to get an empty copy to test with.
        String newDatasetID = PortUtility.portSchema(sourceDdl, sinkDdl, UNITTEST_DATASET_ID, "", false);


        // Query for the rows (well, lack thereof) of the sink dataset.
        List<UnitTestDataset> sinkRows = sinkProducer.query(newDatasetID, SoqlQuery.SELECT_ALL, UnitTestDataset.LIST_TYPE);
        TestCase.assertEquals(0, sinkRows.size());


        try {
            // Perform the test operation.
            PortUtility.portContents(sourceProducer, sinkProducer, UNITTEST_DATASET_ID, newDatasetID, PublishMethod.upsert);


            // Query for the rows of the sink again.
            sinkRows = sinkProducer.query(newDatasetID, SoqlQuery.SELECT_ALL, UnitTestDataset.LIST_TYPE);


            TestCase.assertEquals(sourceRows.size(), sinkRows.size());
            for (int i = 0; i < sourceRows.size(); i++) {
                TestCase.assertEquals(sourceRows.get(i).getId(), sinkRows.get(i).getId());
                TestCase.assertEquals(sourceRows.get(i).getName(), sinkRows.get(i).getName());
                TestCase.assertEquals(sourceRows.get(i).getAnother_name(), sinkRows.get(i).getAnother_name());
                TestCase.assertEquals(sourceRows.get(i).getDate(), sinkRows.get(i).getDate());
            }


        } finally {
            sinkDdl.deleteDataset(newDatasetID);
        }
    }


    @Test
    public void testPortContentsReplace() throws SodaError, InterruptedException, IOException, LongRunningQueryException {
        File threeRowsFile = new File("src/test/resources/datasync_unit_test_three_rows.csv");
        Soda2Publisher.replaceNew(sinkProducer, sinkDdl, UNITTEST_DATASET_ID, threeRowsFile, true);


        // Query for the rows of the source dataset.
        int sourceTotalRows = getTotalRows(UNITTEST_DATASET_ID);


        // Use one of our CSVs from Integration tests to reset the dataset (in case it's been changed).
        File twoRowsFile = new File("src/test/resources/datasync_unit_test_two_rows.csv");
        Soda2Publisher.replaceNew(sinkProducer, sinkDdl, UNITTEST_PORT_RESULT_DATASET_ID, twoRowsFile, true);
        // Query for the rows of the sink dataset
        int sinkTotalRows = getTotalRows(UNITTEST_PORT_RESULT_DATASET_ID);
        // Simple test before we proceed: we want to make sure that the datasets
        // do not have the same amount of rows.
        TestCase.assertEquals(2, sinkTotalRows);
        TestCase.assertNotSame(sourceTotalRows, sinkTotalRows);


        // Perform the test operation...
        PortUtility.portContents(sourceProducer, sinkProducer,
                UNITTEST_DATASET_ID, UNITTEST_PORT_RESULT_DATASET_ID, PublishMethod.replace);
        // Grab both datasets' contents
        List<UnitTestDataset> sourceRows = sinkProducer.query(UNITTEST_DATASET_ID, SoqlQuery.SELECT_ALL, UnitTestDataset.LIST_TYPE);
        List<UnitTestDataset> sinkRows = sinkProducer.query(UNITTEST_PORT_RESULT_DATASET_ID, SoqlQuery.SELECT_ALL, UnitTestDataset.LIST_TYPE);


        // Run tests that ensure that the sink dataset's contents have been completely replaced
        // and are now identical to the source dataset's contents.
        TestCase.assertEquals(sourceRows.size(), sinkRows.size());
        for (int i = 0; i < sourceRows.size(); i++) {
            TestCase.assertEquals(sourceRows.get(i).getId(), sinkRows.get(i).getId());
            TestCase.assertEquals(sourceRows.get(i).getName(), sinkRows.get(i).getName());
            TestCase.assertEquals(sourceRows.get(i).getAnother_name(), sinkRows.get(i).getAnother_name());
            TestCase.assertEquals(sourceRows.get(i).getDate(), sinkRows.get(i).getDate());
        }
    }


    @Test
    public void testSchemaAdaptation() {
        // set up original dataset schema
        String groupingKey = "grouping_aggregate";
        Dataset schema = new Dataset();


        Column colNoFormatting = new Column();
        colNoFormatting.setFieldName("col_without_formatting");


        Column colIgnorableFormatting = new Column();
        colIgnorableFormatting.setFieldName("col_with_ignorable_formatting");
        Map<String, String> ignorableFormatting = new HashMap<String,String>();
        ignorableFormatting.put("drill_down", "true");
        colIgnorableFormatting.setFormat(ignorableFormatting);


        Column colAggregatedFormatting = new Column();
        colAggregatedFormatting.setFieldName("col_with_aggregated_formatting");
        Map<String, String> aggregateFormatting = new HashMap<String,String>();
        aggregateFormatting.put(groupingKey, "count");
        colAggregatedFormatting.setFormat(aggregateFormatting);


        List<Column> columns = new ArrayList<Column>();
        columns.add(colNoFormatting);
        columns.add(colIgnorableFormatting);
        columns.add(colAggregatedFormatting);
        schema.setColumns(columns);


        // edit schema
        PortUtility.adaptSchemaForAggregates(schema);


        // test that edits are as expected
        List<Column> editedColumns = schema.getColumns();
        Column col1 = editedColumns.get(0);
        Column col2 = editedColumns.get(1);
        Column col3 = editedColumns.get(2);


        TestCase.assertNotNull(col1);
        TestCase.assertNotNull(col2);
        TestCase.assertNotNull(col3);


        // columns without formatting should be left alone
        TestCase.assertEquals(colNoFormatting, col1);


        // columns with ignorable formatting should be left alone
        TestCase.assertEquals(colIgnorableFormatting, col2);


        // columns with aggregation info in the formatting should have the aggregation info removed
        // and prepended to the field name:
        TestCase.assertFalse(col3.getFormat().containsKey(groupingKey));
        TestCase.assertEquals("count_col_with_aggregated_formatting", col3.getFieldName());
    }
}
Source Code of com.socrata.datasync.utilities.PortUtilityTest

Related Classes of com.socrata.datasync.utilities.PortUtilityTest