Package org.apache.pig.piggybank.test.storage

Source Code of org.apache.pig.piggybank.test.storage.TestMultiStorageCompression

/*
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
* NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is
* distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/

package org.apache.pig.piggybank.test.storage;

import static org.apache.pig.ExecType.LOCAL;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import junit.framework.TestCase;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.BZip2Codec;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.test.Util;

public class TestMultiStorageCompression extends TestCase {

   private static String patternString = "(\\d+)!+(\\w+)~+(\\w+)";
   public static ArrayList<String[]> data = new ArrayList<String[]>();
   static {
      data.add(new String[] { "f1,a,123" });
      data.add(new String[] { "f2,b,234" });
      data.add(new String[] { "f3,c,345" });
      data.add(new String[] { "f4,d,567" });
   }

   public void testMultiStorageShouldSupportBz2() throws Exception {

      String type = "bz2";
      List<String> filesToDelete = new ArrayList<String>();

      String tmpDir = System.getProperty("java.io.tmpdir");
      String outputPath = tmpDir + File.separator + "output001." + type;

      filesToDelete.add(outputPath);

      try {
         runQuery(outputPath, type);
         verifyResults(type, filesToDelete, outputPath);
      } finally {
         cleanUpDirs(filesToDelete);
      }
   }

   public void testMultiStorageShouldSupportGz() throws Exception {

      String type = "gz";
      List<String> filesToDelete = new ArrayList<String>();

      String tmpDir = System.getProperty("java.io.tmpdir");
      String outputPath = tmpDir + File.separator + "output001." + type;

      filesToDelete.add(outputPath);

      try {
         runQuery(outputPath, type);
         verifyResults(type, filesToDelete, outputPath);
      } finally {
         cleanUpDirs(filesToDelete);
      }
   }

   private void cleanUpDirs(List<String> filesToDelete) {
      // Delete files recursively
      Collections.reverse(filesToDelete);
      for (String string : filesToDelete)
         new File(string).delete();
   }


   private void verifyResults(String type, List<String> filesToDelete,
         String outputPath) throws IOException, FileNotFoundException {
      // Verify the output
      File outputDir = new File(outputPath);
      List<String> indexFolders = Arrays.asList(outputDir.list());

      // Assert whether all keys are present
      assertTrue(indexFolders.contains("f1." + type));
      assertTrue(indexFolders.contains("f2." + type));
      assertTrue(indexFolders.contains("f3." + type));
      assertTrue(indexFolders.contains("f4." + type));

      // Sort so that assertions are easy
      Collections.sort(indexFolders);

      for (int i = 0; i < indexFolders.size(); i++) {

         String indexFolder = indexFolders.get(i);
         if (indexFolder.startsWith("._SUCCESS")||indexFolder.startsWith("_SUCCESS"))
             continue;
         String topFolder = outputPath + File.separator + indexFolder;
         File indexFolderFile = new File(topFolder);
         filesToDelete.add(topFolder);
         String[] list = indexFolderFile.list();
         for (String outputFile : list) {

            String file = topFolder + File.separator + outputFile;
            filesToDelete.add(file);

            // Skip off any file starting with .
            if (outputFile.startsWith("."))
               continue;

            // Try to read the records using the codec
            CompressionCodec codec = null;


            // Use the codec according to the test case
            if (type.equals("bz2")) {
               codec = new BZip2Codec();
            } else if (type.equals("gz")) {
               codec = new GzipCodec();
            }
            if(codec instanceof Configurable) {
                ((Configurable)codec).setConf(new Configuration());
            }

            CompressionInputStream createInputStream = codec
                  .createInputStream(new FileInputStream(file));
            int b;
            StringBuffer sb = new StringBuffer();
            while ((b = createInputStream.read()) != -1) {
               sb.append((char) b);
            }
            createInputStream.close();

            // Assert for the number of fields and keys.
            String[] fields = sb.toString().split("\\t");
            assertEquals(3, fields.length);
            String id = indexFolder.substring(1,2);
            assertEquals("f" + id, fields[0]);

         }

      }
   }

   private void runQuery(String outputPath, String compressionType)
         throws Exception, ExecException, IOException, FrontendException {

      // create a data file
      String filename = TestHelper.createTempFile(data, "");
      PigServer pig = new PigServer(LOCAL);
      filename = filename.replace("\\", "\\\\");
      patternString = patternString.replace("\\", "\\\\");
      String query = "A = LOAD '" + Util.encodeEscape(filename)
            + "' USING PigStorage(',') as (a,b,c);";

      String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath)
            + "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('"
            + Util.encodeEscape(outputPath) + "','0', '" + compressionType + "', '\\t');";

      // Run Pig
      pig.setBatchOn();
      pig.registerQuery(query);
      pig.registerQuery(query2);

      pig.executeBatch();
   }


}
TOP

Related Classes of org.apache.pig.piggybank.test.storage.TestMultiStorageCompression

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.