Package org.apache.pig.builtin

Examples of org.apache.pig.builtin.PigStorage


    @Test
    public void testLFPig() throws Exception {
        Util.createInputFile(cluster, "input.txt", new String[]
            {"this:is:delimited:by:a:colon\n"});
        int arity1 = 6;
        LoadFunc lf = new PigStorage(":");
        LoadFunc p1 = new ReadToEndLoader(lf, ConfigurationUtil.
            toConfiguration(cluster.getProperties()), "input.txt", 0);
        Tuple f1 = p1.getNext();
        assertTrue(f1.size() == arity1);
        Util.deleteFile(cluster, "input.txt");

        int LOOP_COUNT = 100;
        String[] input = new String[LOOP_COUNT * LOOP_COUNT];
        int n = 0;
        for (int i = 0; i < LOOP_COUNT; i++) {
            for (int j = 0; j < LOOP_COUNT; j++) {
                input[n++] = (i + "\t" + i + "\t" + j % 2);
            }
        }
        Util.createInputFile(cluster, "input.txt", input);

        LoadFunc p15 = new ReadToEndLoader(new PigStorage(), ConfigurationUtil.
            toConfiguration(cluster.getProperties()), "input.txt", 0);

        int count = 0;
        while (true) {
            Tuple f15 = p15.getNext();
            if (f15 == null)
                break;
            count++;
            assertEquals(3, f15.size());
        }
        assertEquals(LOOP_COUNT * LOOP_COUNT, count);
        Util.deleteFile(cluster, "input.txt");

        String input2 = ":this:has:a:leading:colon\n";
        int arity2 = 6;
        Util.createInputFile(cluster, "input.txt", new String[] {input2});
        LoadFunc p2 = new ReadToEndLoader(new PigStorage(":"), ConfigurationUtil.
            toConfiguration(cluster.getProperties()), "input.txt", 0);
        Tuple f2 = p2.getNext();
        assertTrue(f2.size() == arity2);
        Util.deleteFile(cluster, "input.txt");

        String input3 = "this:has:a:trailing:colon:\n";
        int arity3 = 6;
        Util.createInputFile(cluster, "input.txt", new String[] {input2});
        LoadFunc p3 = new ReadToEndLoader(new PigStorage(":"), ConfigurationUtil.
            toConfiguration(cluster.getProperties()), "input.txt", 0);
        Tuple f3 = p3.getNext();
        assertTrue(f3.size() == arity3);
        Util.deleteFile(cluster, "input.txt");
    }
View Full Code Here


        String query = "a = load 'testSFPig-input.txt';" +
                "store a into '" + outputLocation + "';";
        mrPigServer.setBatchOn();
        Util.registerMultiLineQuery(mrPigServer, query);
        mrPigServer.executeBatch();
        LoadFunc lfunc = new ReadToEndLoader(new PigStorage(), ConfigurationUtil.
            toConfiguration(cluster.getProperties()), outputLocation, 0);
        Tuple f2 = lfunc.getNext();
        Util.deleteFile(cluster, "testSFPig-input.txt");

        Util.deleteFile(cluster, outputLocation);
View Full Code Here

    @Test
    public void testGetInputSizeFromFs() throws Exception {
        long size = 2L * 1024 * 1024 * 1024;
        Assert.assertEquals(size, InputSizeReducerEstimator.getTotalInputFileSize(
                CONF, Lists.newArrayList(createPOLoadWithSize(size, new PigStorage())),
                new org.apache.hadoop.mapreduce.Job(CONF)));

        Assert.assertEquals(size, InputSizeReducerEstimator.getTotalInputFileSize(
                CONF,
                Lists.newArrayList(createPOLoadWithSize(size, new PigStorageWithStatistics())),
                new org.apache.hadoop.mapreduce.Job(CONF)));

        Assert.assertEquals(size * 2, InputSizeReducerEstimator.getTotalInputFileSize(
                CONF,
                Lists.newArrayList(
                        createPOLoadWithSize(size, new PigStorage()),
                        createPOLoadWithSize(size, new PigStorageWithStatistics())),
                        new org.apache.hadoop.mapreduce.Job(CONF)));

        // Negative test - PIG-3754
        POLoad poLoad = createPOLoadWithSize(size, new PigStorage());
        poLoad.setLFile(new FileSpec("hbase://users", null));

        Assert.assertEquals(-1, InputSizeReducerEstimator.getTotalInputFileSize(
                CONF,
                Collections.singletonList(poLoad),
View Full Code Here

    @Test
    public void testPigStorageSchemaSearch() throws Exception {
        String globtestdir = "build/test/tmpglobbingdata/";
        ResourceSchema testSchema = new ResourceSchema(Utils.parseSchema("a0:chararray"));
        PigStorage pigStorage = new PigStorage();
        pigContext.connect();
        try{
            Util.deleteDirectory(new File(datadir));
            pig.mkdirs(globtestdir+"a");
            pig.mkdirs(globtestdir+"a/a0");
            putInputFile(globtestdir+"a/a0/input");
            pig.mkdirs(globtestdir+"a/b0");
            putInputFile(globtestdir+"a/b0/input");
            pig.mkdirs(globtestdir+"b");
        } catch (IOException e) {};

        // if schema file is not found, schema is null
        ResourceSchema schema = pigStorage.getSchema(globtestdir, new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(schema==null);

        // if .pig_schema is in the input directory
        putSchemaFile(globtestdir+"a/a0/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"a/a0", new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
        new File(globtestdir+"a/a0/.pig_schema").delete();

        // .pig_schema in one of globStatus returned directory
        putSchemaFile(globtestdir+"a/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"*", new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
        new File(globtestdir+"a/.pig_schema").delete();

        putSchemaFile(globtestdir+"b/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"*", new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
        new File(globtestdir+"b/.pig_schema").delete();

        // if .pig_schema is deep in the globbing, it will not get used
        putSchemaFile(globtestdir+"a/a0/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"*", new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(schema==null);
        putSchemaFile(globtestdir+"a/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"*", new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
        new File(globtestdir+"a/a0/.pig_schema").delete();
        new File(globtestdir+"a/.pig_schema").delete();

        pigStorage = new PigStorage("\t", "-schema");
        putSchemaFile(globtestdir+"a/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"{a,b}", new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
    }
View Full Code Here

        Tuple t = new DefaultTuple();
        t.append(inpDB);
        proj.attachInput(t);
        Result res = st.store();
        assertEquals(POStatus.STATUS_EOP, res.returnStatus);
        PigStorage ps = new PigStorage(":");
       
        int size = 0;
        BufferedReader br = new BufferedReader(new FileReader("/tmp/storeTest.txt"));
        for(String line=br.readLine();line!=null;line=br.readLine()){
            String[] flds = line.split(":",-1);
            t = new DefaultTuple();
            t.append(flds[0].compareTo("")!=0 ? ps.bytesToBag(flds[0].getBytes()) : null);
            t.append(flds[1].compareTo("")!=0 ? ps.bytesToCharArray(flds[1].getBytes()) : null);
            t.append(flds[2].compareTo("")!=0 ? ps.bytesToCharArray(flds[2].getBytes()) : null);
            t.append(flds[3].compareTo("")!=0 ? ps.bytesToDouble(flds[3].getBytes()) : null);
            t.append(flds[4].compareTo("")!=0 ? ps.bytesToFloat(flds[4].getBytes()) : null);
            t.append(flds[5].compareTo("")!=0 ? ps.bytesToInteger(flds[5].getBytes()) : null);
            t.append(flds[6].compareTo("")!=0 ? ps.bytesToLong(flds[6].getBytes()) : null);
            t.append(flds[7].compareTo("")!=0 ? ps.bytesToMap(flds[7].getBytes()) : null);
            t.append(flds[8].compareTo("")!=0 ? ps.bytesToTuple(flds[8].getBytes()) : null);
           
            assertEquals(true, TestHelper.bagContains(inpDB, t));
            ++size;
        }
        assertEquals(true, size==inpDB.size());
View Full Code Here

        Tuple t = new DefaultTuple();
        t.append(inpDB);
        proj.attachInput(t);
        Result res = st.store();
        assertEquals(POStatus.STATUS_EOP, res.returnStatus);
        PigStorage ps = new PigStorage(":");
       
        int size = 0;
        BufferedReader br = new BufferedReader(new FileReader("/tmp/storeTest.txt"));
        for(String line=br.readLine();line!=null;line=br.readLine()){
            System.err.println("Complex data: ");
            System.err.println(line);
            String[] flds = line.split(":",-1);
            t = new DefaultTuple();
            t.append(flds[0].compareTo("")!=0 ? ps.bytesToBag(flds[0].getBytes()) : null);
            t.append(flds[1].compareTo("")!=0 ? ps.bytesToCharArray(flds[1].getBytes()) : null);
            t.append(flds[2].compareTo("")!=0 ? ps.bytesToCharArray(flds[2].getBytes()) : null);
            t.append(flds[3].compareTo("")!=0 ? ps.bytesToDouble(flds[3].getBytes()) : null);
            t.append(flds[4].compareTo("")!=0 ? ps.bytesToFloat(flds[4].getBytes()) : null);
            t.append(flds[5].compareTo("")!=0 ? ps.bytesToInteger(flds[5].getBytes()) : null);
            t.append(flds[6].compareTo("")!=0 ? ps.bytesToLong(flds[6].getBytes()) : null);
            t.append(flds[7].compareTo("")!=0 ? ps.bytesToMap(flds[7].getBytes()) : null);
            t.append(flds[8].compareTo("")!=0 ? ps.bytesToTuple(flds[8].getBytes()) : null);
            t.append(flds[9].compareTo("")!=0 ? ps.bytesToCharArray(flds[9].getBytes()) : null);
           
            assertTrue(inputTuple.equals(t));
            ++size;
        }
        FileLocalizer.delete(fSpec.getFileName(), pc);
View Full Code Here

            Path outputDir, String name, Progressable progress)
            throws IOException {
        StoreFunc store;
        String storeFunc = job.get("pig.storeFunc", "");
        if (storeFunc.length() == 0) {
            store = new PigStorage();
        } else {
            try {
                store = (StoreFunc) PigContext
                        .instantiateFuncFromSpec(storeFunc);
            } catch (Exception e) {
View Full Code Here

    @Test
    public void testGetInputSizeFromFs() throws Exception {
        long size = 2L * 1024 * 1024 * 1024;
        Assert.assertEquals(size, InputSizeReducerEstimator.getTotalInputFileSize(
                CONF, Lists.newArrayList(createPOLoadWithSize(size, new PigStorage())),
                new org.apache.hadoop.mapreduce.Job(CONF)));

        Assert.assertEquals(size, InputSizeReducerEstimator.getTotalInputFileSize(
                CONF,
                Lists.newArrayList(createPOLoadWithSize(size, new PigStorageWithStatistics())),
                new org.apache.hadoop.mapreduce.Job(CONF)));

        Assert.assertEquals(size * 2, InputSizeReducerEstimator.getTotalInputFileSize(
                CONF,
                Lists.newArrayList(
                        createPOLoadWithSize(size, new PigStorage()),
                        createPOLoadWithSize(size, new PigStorageWithStatistics())),
                new org.apache.hadoop.mapreduce.Job(CONF)));
    }
View Full Code Here

   
    @Test
    public void testPigStorageSchemaSearch() throws Exception {
        String globtestdir = "build/test/tmpglobbingdata/";
        ResourceSchema testSchema = new ResourceSchema(Utils.parseSchema("a0:chararray"));
        PigStorage pigStorage = new PigStorage();
        pigContext.connect();
        try{
            Util.deleteDirectory(new File(datadir));
            pig.mkdirs(globtestdir+"a");
            pig.mkdirs(globtestdir+"a/a0");
            putInputFile(globtestdir+"a/a0/input");
            pig.mkdirs(globtestdir+"a/b0");
            putInputFile(globtestdir+"a/b0/input");
            pig.mkdirs(globtestdir+"b");
        } catch (IOException e) {};
       
        // if schema file is not found, schema is null
        ResourceSchema schema = pigStorage.getSchema(globtestdir, new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(schema==null);
       
        // if .pig_schema is in the input directory
        putSchemaFile(globtestdir+"a/a0/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"a/a0", new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
        new File(globtestdir+"a/a0/.pig_schema").delete();
       
        // .pig_schema in one of globStatus returned directory
        putSchemaFile(globtestdir+"a/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"*", new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
        new File(globtestdir+"a/.pig_schema").delete();
       
        putSchemaFile(globtestdir+"b/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"*", new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
        new File(globtestdir+"b/.pig_schema").delete();
       
        // if .pig_schema is deep in the globbing, it will not get used
        putSchemaFile(globtestdir+"a/a0/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"*", new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(schema==null);
        putSchemaFile(globtestdir+"a/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"*", new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
        new File(globtestdir+"a/a0/.pig_schema").delete();
        new File(globtestdir+"a/.pig_schema").delete();
       
        pigStorage = new PigStorage("\t", "-schema");
        putSchemaFile(globtestdir+"a/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"{a,b}", new Job(ConfigurationUtil.toConfiguration(pigContext.getProperties())));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
    }
View Full Code Here

    @SuppressWarnings("deprecation")
    @Test
    public void testPigStorageSchemaSearch() throws Exception {
        String globtestdir = "build/test/tmpglobbingdata/";
        ResourceSchema testSchema = new ResourceSchema(Utils.parseSchema("a0:chararray"));
        PigStorage pigStorage = new PigStorage();
        pigContext.connect();
        try{
            Util.deleteDirectory(new File(datadir));
            pig.mkdirs(globtestdir+"a");
            pig.mkdirs(globtestdir+"a/a0");
            putInputFile(globtestdir+"a/a0/input");
            pig.mkdirs(globtestdir+"a/b0");
            putInputFile(globtestdir+"a/b0/input");
            pig.mkdirs(globtestdir+"b");
        } catch (IOException e) {};

        Configuration conf = ConfigurationUtil.toConfiguration(pigContext.getProperties());
        // if schema file is not found, schema is null
        ResourceSchema schema = pigStorage.getSchema(globtestdir, new Job(conf));
        Assert.assertTrue(schema==null);

        // if .pig_schema is in the input directory
        putSchemaFile(globtestdir+"a/a0/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"a/a0", new Job(conf));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
        new File(globtestdir+"a/a0/.pig_schema").delete();

        // .pig_schema in one of globStatus returned directory
        putSchemaFile(globtestdir+"a/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"*", new Job(conf));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
        new File(globtestdir+"a/.pig_schema").delete();

        putSchemaFile(globtestdir+"b/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"*", new Job(conf));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
        new File(globtestdir+"b/.pig_schema").delete();

        // if .pig_schema is deep in the globbing, it will not get used
        putSchemaFile(globtestdir+"a/a0/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"*", new Job(conf));
        Assert.assertTrue(schema==null);
        putSchemaFile(globtestdir+"a/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"*", new Job(conf));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
        new File(globtestdir+"a/a0/.pig_schema").delete();
        new File(globtestdir+"a/.pig_schema").delete();

        pigStorage = new PigStorage("\t", "-schema");
        putSchemaFile(globtestdir+"a/.pig_schema", testSchema);
        schema = pigStorage.getSchema(globtestdir+"{a,b}", new Job(conf));
        Assert.assertTrue(ResourceSchema.equals(schema, testSchema));
    }
View Full Code Here

TOP

Related Classes of org.apache.pig.builtin.PigStorage

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.