Package org.apache.pig.impl.io

Examples of org.apache.pig.impl.io.FileSpec


            }
           
        }
       
        private void setUpHashTable() throws IOException {
            FileSpec replFile = new FileSpec(repl,new FuncSpec(PigStorage.class.getName()+"()"));
            POLoad ld = new POLoad(new OperatorKey("Repl File Loader", 1L), replFile);
            PigContext pc = new PigContext(ExecType.MAPREDUCE,ConfigurationUtil.toProperties(PigMapReduce.sJobConfInternal.get()));
            try {
                pc.connect();
           
View Full Code Here


    public void visit(LOSplit loSplit) throws FrontendException {
        String scope = DEFAULT_SCOPE;
        POSplit physOp = new POSplit(new OperatorKey(scope, nodeGen
                .getNextNodeId(scope)), loSplit.getRequestedParallelisam());
        physOp.setAlias(loSplit.getAlias());
        FileSpec splStrFile;
        try {
            splStrFile = new FileSpec(FileLocalizer.getTemporaryPath(pc).toString(),new FuncSpec(Utils.getTmpFileCompressorName(pc)));
        } catch (IOException e1) {
            byte errSrc = pc.getErrorSource();
            int errCode = 0;
            switch(errSrc) {
            case PigException.BUG:
View Full Code Here

        // Since nulls are smaller then anything else, if its in data, key with null value
        // should be very first entry of index.

        // First create a loader to read index.
        POLoad ld = new POLoad(new OperatorKey(this.mKey.scope,NodeIdGenerator.getGenerator().getNextNodeId(this.mKey.scope)),
                new FileSpec(indexFileName, idxFuncSpec));

        // Index file is distributed through Distributed Cache to all mappers. So, read it locally.
        Properties props = ConfigurationUtil.getLocalFSProperties();
        ld.setPc(new PigContext(ExecType.LOCAL, props));
View Full Code Here

        // the keys are sent in a tuple. If there is really only
        // 1 join key, it would be the first field of the tuple. If
        // there are multiple Join keys, the tuple itself represents
        // the join key
        Object firstLeftKey = (keys.size() == 1 ? keys.get(0): keys);
        POLoad ld = new POLoad(genKey(), new FileSpec(indexFile, new FuncSpec(indexFileLoadFuncSpec)));
               
        Properties props = ConfigurationUtil.getLocalFSProperties();
        PigContext pc = new PigContext(ExecType.LOCAL, props);
        ld.setPc(pc);
        index = new LinkedList<Tuple>();
View Full Code Here

    protected void visit(LOSplit split) throws VisitorException {
        String scope = split.getOperatorKey().scope;
        PhysicalOperator physOp = new POSplit(new OperatorKey(scope, nodeGen
                .getNextNodeId(scope)), split.getRequestedParallelism());
        physOp.setAlias(split.getAlias());
        FileSpec splStrFile;
        try {
            splStrFile = new FileSpec(FileLocalizer.getTemporaryPath(pc).toString(),new FuncSpec(Utils.getTmpFileCompressorName(pc)));
        } catch (IOException e1) {
            byte errSrc = pc.getErrorSource();
            int errCode = 0;
            switch(errSrc) {
            case PigException.BUG:
View Full Code Here

            LOG.warn("unable to get inputs of the job");
            return;
        }
       
        if (loads.size() == 1) {
            FileSpec fsp = loads.get(0);
            if (!PigStatsUtil.isTempFile(fsp.getFileName())) {
                long records = mapInputRecords;      
                InputStats is = new InputStats(fsp.getFileName(),
                        hdfsBytesRead, records, (state == JobState.SUCCESS));             
                is.setConf(conf);
                if (isSampler()) is.markSampleInput();
                if (isIndexer()) is.markIndexerInput();
                inputs.add(is);               
            }
        } else {
            // check for self-join (duplicated input file names)
            HashMap<String, Integer> dupmap = new HashMap<String, Integer>();
            for (FileSpec fsp : loads) {
                String name = PigStatsUtil.getMultiInputsCounterName(fsp.getFileName());
                if (name == null) continue;
                if (dupmap.containsKey(name)) {
                    int n = dupmap.get(name);
                    dupmap.put(name, (n+1));
                } else {
                    dupmap.put(name, 1);
                }               
            }
            for (FileSpec fsp : loads) {
                if (PigStatsUtil.isTempFile(fsp.getFileName())) continue;
                addOneInputStats(fsp.getFileName(), dupmap);
            }
        }           
    }
View Full Code Here

        Map<String, PhysicalOperator> leafMap = new HashMap<String, PhysicalOperator>();
        for (PhysicalOperator physOp : plan.getLeaves()) {
            log.info(physOp);
            if (physOp instanceof POStore) {
                FileSpec spec = ((POStore) physOp).getSFile();
                if (spec != null)
                    leafMap.put(spec.toString(), physOp);
            }
        }
        try {
            PigStats stats = launcher.launchPig(plan, jobName, pigContext);
View Full Code Here

    public static FileSpec checkLeafIsStore(
            PhysicalPlan plan,
            PigContext pigContext) throws ExecException {
        try {
            PhysicalOperator leaf = plan.getLeaves().get(0);
            FileSpec spec = null;
            if(!(leaf instanceof POStore)){
                String scope = leaf.getOperatorKey().getScope();
                POStore str = new POStore(new OperatorKey(scope,
                    NodeIdGenerator.getGenerator().getNextNodeId(scope)));
                spec = new FileSpec(FileLocalizer.getTemporaryPath(
                    pigContext).toString(),
                    new FuncSpec(Utils.getTmpFileCompressorName(pigContext)));
                str.setSFile(spec);
                plan.addAsLeaf(str);
            } else{
View Full Code Here

          LogicalSchema aschema = new LogicalSchema();
          aschema.addField(new LogicalSchema.LogicalFieldSchema(
              "x", null, DataType.BYTEARRAY));
          aschema.addField(new LogicalSchema.LogicalFieldSchema(
              "y", null, DataType.BYTEARRAY));
          LOLoad A = new LOLoad(new FileSpec("bla", new FuncSpec("PigStorage", "\t")), aschema, lp, null);
          A.setAlias("A");
          lp.add(A);
         
          // B = load
          LogicalSchema bschema = new LogicalSchema();
          bschema.addField(new LogicalSchema.LogicalFieldSchema(
              "a", null, DataType.BYTEARRAY));
          bschema.addField(new LogicalSchema.LogicalFieldSchema(
              "b", null, DataType.BYTEARRAY));
          LOLoad B = new LOLoad(new FileSpec("morebla", new FuncSpec("PigStorage", "\t")), bschema, lp, null);
          B.setAlias("B");
          lp.add(B);
         
          // C = join
          LogicalSchema cschema = new LogicalSchema();
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "x", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "y", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "a", null, DataType.BYTEARRAY));
          cschema.addField(new LogicalSchema.LogicalFieldSchema(
              "b", null, DataType.BYTEARRAY));
          MultiMap<Integer, LogicalExpressionPlan> mm =
                new MultiMap<Integer, LogicalExpressionPlan>();
          LogicalExpressionPlan aprojplan = new LogicalExpressionPlan();
          LOJoin C = new LOJoin(lp, mm, JOINTYPE.HASH, new boolean[] {true, true});
          new ProjectExpression(aprojplan, 0, 0, C);
          LogicalExpressionPlan bprojplan = new LogicalExpressionPlan();
          new ProjectExpression(bprojplan, 1, 0, C);
          mm.put(0, aprojplan);
          mm.put(1, bprojplan);
         
          C.setAlias("C");
          lp.add(C);
          lp.connect(A, C);
          lp.connect(B, C);
       
          // D = filter
          LogicalExpressionPlan filterPlan = new LogicalExpressionPlan();
          LOFilter D = new LOFilter(lp, filterPlan);
          ProjectExpression fx = new ProjectExpression(filterPlan, 0, 0, D);
          ConstantExpression fc0 = new ConstantExpression(filterPlan, new Integer(0), new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          EqualExpression eq1 = new EqualExpression(filterPlan, fx, fc0);
          ProjectExpression fanotherx = new ProjectExpression(filterPlan, 0, 0, D);
          ProjectExpression fa = new ProjectExpression(filterPlan, 0, 2, D);
          EqualExpression eq2 = new EqualExpression(filterPlan, fanotherx, fa);
          AndExpression and1 = new AndExpression(filterPlan, eq1, eq2);
          ProjectExpression fb = new ProjectExpression(filterPlan, 0, 3, D);
          ConstantExpression fc1 = new ConstantExpression(filterPlan, new Integer(1),new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          EqualExpression eq3 = new EqualExpression(filterPlan, fb, fc1);
          AndExpression and2 = new AndExpression(filterPlan, and1, eq3);
          ProjectExpression fanotherb = new ProjectExpression(filterPlan, 0, 3, D);
          ProjectExpression fy = new ProjectExpression(filterPlan, 0, 1, D);
          EqualExpression eq4 = new EqualExpression(filterPlan, fy, fanotherb);
          new AndExpression(filterPlan, and2, eq4);
         
          D.setAlias("D");
          // Connect D to B, since the transform has happened.
          lp.add(D);
          lp.connect(C, D);
        }
       
        System.out.println(lp);
        LogicalPlanOptimizer optimizer = new LogicalPlanOptimizer(lp, 500, null);
        optimizer.optimize();
       
        LogicalPlan expected = new LogicalPlan();
        {
            // A = load
          LogicalSchema aschema = new LogicalSchema();
          aschema.addField(new LogicalSchema.LogicalFieldSchema(
              "x", null, DataType.BYTEARRAY));
          aschema.addField(new LogicalSchema.LogicalFieldSchema(
              "y", null, DataType.BYTEARRAY));
          LOLoad A = new LOLoad(new FileSpec("bla", new FuncSpec("PigStorage", "\t")), aschema, expected, null);
          expected.add(A);
         
          // DA = filter
          LogicalExpressionPlan DAfilterPlan = new LogicalExpressionPlan();
          LOFilter DA = new LOFilter(expected, DAfilterPlan);
          ProjectExpression fx = new ProjectExpression(DAfilterPlan, 0, 0, DA);
          fx.neverUseForRealSetFieldSchema(new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          ConstantExpression fc0 = new ConstantExpression(DAfilterPlan, new Integer(0), new LogicalFieldSchema(null, null, DataType.BYTEARRAY));
          new EqualExpression(DAfilterPlan, fx, fc0);
         
          DA.neverUseForRealSetSchema(aschema);
          expected.add(DA);
          expected.connect(A, DA);
         
          // B = load
          LogicalSchema bschema = new LogicalSchema();
          bschema.addField(new LogicalSchema.LogicalFieldSchema(
              "a", null, DataType.BYTEARRAY));
          bschema.addField(new LogicalSchema.LogicalFieldSchema(
              "b", null, DataType.BYTEARRAY));
          LOLoad B = new LOLoad(new FileSpec("morebla", new FuncSpec("PigStorage", "\t")), bschema, expected, null);
          expected.add(B);
         
          // DB = filter
          LogicalExpressionPlan DBfilterPlan = new LogicalExpressionPlan();
            LOFilter DB = new LOFilter(expected, DBfilterPlan);
View Full Code Here

            }
           
        }
       
        private void setUpHashTable() throws IOException {
            FileSpec replFile = new FileSpec(repl,new FuncSpec(PigStorage.class.getName()+"()"));
            POLoad ld = new POLoad(new OperatorKey("Repl File Loader", 1L), replFile);
            PigContext pc = new PigContext(ExecType.MAPREDUCE,ConfigurationUtil.toProperties(PigMapReduce.sJobConf));
            try {
                pc.connect();
           
View Full Code Here

TOP

Related Classes of org.apache.pig.impl.io.FileSpec

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.