/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.ExecType;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.backend.local.executionengine.physicalLayer.LocalLogToPhyTranslationVisitor;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.LODefine;
import org.apache.pig.impl.logicalLayer.LOLoad;
import org.apache.pig.impl.logicalLayer.LogicalOperator;
import org.apache.pig.impl.logicalLayer.LogicalPlan;
import org.apache.pig.impl.logicalLayer.LogicalPlanBuilder;
import org.apache.pig.impl.logicalLayer.PlanSetter;
import org.apache.pig.impl.logicalLayer.validators.LogicalPlanValidationExecutor;
import org.apache.pig.impl.plan.CompilationMessageCollector;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.pen.util.FunctionalLogicalOptimizer;
import org.junit.Test;
public class TestLocalPOSplit extends TestCase {
Random r = new Random();
Log log = LogFactory.getLog(getClass());
PigContext pigContext = new PigContext(ExecType.LOCAL, new Properties());
@Test
public void testSplit() throws IOException, VisitorException, ExecException {
pigContext.connect();
File datFile = File.createTempFile("tempA", ".dat");
FileOutputStream dat = new FileOutputStream(datFile);
for (int i = 0; i < 100; i++) {
String str = r.nextInt(10) + "\n";
dat.write(str.getBytes());
}
dat.close();
String query = "split (load '" + Util.encodeEscape(datFile.getAbsolutePath())
+ "') into a if $0 == 2, b if $0 == 9, c if $0 == 7 ;";
LogicalPlan plan = buildPlan(query);
PhysicalPlan pp = buildPhysicalPlan(plan);
DataBag[] bag = new DataBag[pp.getLeaves().size()];
for (int i = 0; i < bag.length; i++) {
bag[i] = BagFactory.getInstance().newDefaultBag();
}
for (int i = 0; i < pp.getLeaves().size(); i++) {
Tuple t = null;
for (Result res = pp.getLeaves().get(i).getNext(t); res.returnStatus != POStatus.STATUS_EOP; res = pp
.getLeaves().get(i).getNext(t)) {
if (res.returnStatus == POStatus.STATUS_OK)
bag[i].add((Tuple) res.result);
}
}
// Depending on how the "maps" in the physical plan are
// built the leaves could be in different order between different runs.
// lets test the first tuple out of each leaf to
// 1) ensure the value was not seen before
// 2) all the remaining tuples from that leaf are same
// as the first value
Map<DataByteArray, Boolean> seen = new HashMap<DataByteArray, Boolean>();
seen.put(new DataByteArray("7".getBytes()), false);
seen.put(new DataByteArray("9".getBytes()), false);
seen.put(new DataByteArray("2".getBytes()), false);
for (int i = 0; i < bag.length; i++) {
DataByteArray firstValue = null;
Iterator<Tuple> it = bag[i].iterator();
if (it.hasNext()) {
// check that we have not seen this value before
Tuple t = it.next();
System.out.println(t);
firstValue = (DataByteArray) t.get(0);
assertFalse((Boolean) seen.get(firstValue));
seen.put(firstValue, true);
}
// check that all remaining tuples from this
// leaf have the same values as the first value
for (; it.hasNext();) {
Tuple t = it.next();
System.out.println(t);
assertEquals(t.get(0), firstValue);
}
}
}
public PhysicalPlan buildPhysicalPlan(LogicalPlan lp)
throws VisitorException {
LocalLogToPhyTranslationVisitor visitor = new LocalLogToPhyTranslationVisitor(
lp);
visitor.setPigContext(pigContext);
visitor.visit();
return visitor.getPhysicalPlan();
}
public LogicalPlan buildPlan(String query) {
return buildPlan(query, LogicalPlanBuilder.class.getClassLoader());
}
public LogicalPlan buildPlan(String query, ClassLoader cldr) {
LogicalPlanBuilder.classloader = cldr;
LogicalPlanBuilder builder = new LogicalPlanBuilder(pigContext); //
try {
LogicalPlan lp = builder.parse("Test-Plan-Builder", query, aliases,
logicalOpTable, aliasOp, fileNameMap);
List<LogicalOperator> roots = lp.getRoots();
if (roots.size() > 0) {
for (LogicalOperator op : roots) {
if (!(op instanceof LOLoad) && !(op instanceof LODefine)) {
throw new Exception(
"Cannot have a root that is not the load or define operator. Found "
+ op.getClass().getName());
}
}
}
System.err.println("Query: " + query);
// Just the top level roots and their children
// Need a recursive one to travel down the tree
for (LogicalOperator op : lp.getRoots()) {
System.err.println("Logical Plan Root: "
+ op.getClass().getName() + " object " + op);
List<LogicalOperator> listOp = lp.getSuccessors(op);
if (null != listOp) {
Iterator<LogicalOperator> iter = listOp.iterator();
while (iter.hasNext()) {
LogicalOperator lop = iter.next();
System.err.println("Successor: "
+ lop.getClass().getName() + " object " + lop);
}
}
}
lp = refineLogicalPlan(lp);
assertTrue(lp != null);
return lp;
} catch (IOException e) {
// log.error(e);
// System.err.println("IOException Stack trace for query: " +
// query);
// e.printStackTrace();
fail("IOException: " + e.getMessage());
} catch (Exception e) {
log.error(e);
// System.err.println("Exception Stack trace for query: " + query);
// e.printStackTrace();
fail(e.getClass().getName() + ": " + e.getMessage() + " -- "
+ query);
}
return null;
}
private LogicalPlan refineLogicalPlan(LogicalPlan plan) {
PlanSetter ps = new PlanSetter(plan);
try {
ps.visit();
} catch (VisitorException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// run through validator
CompilationMessageCollector collector = new CompilationMessageCollector();
FrontendException caught = null;
try {
LogicalPlanValidationExecutor validator = new LogicalPlanValidationExecutor(
plan, pigContext);
validator.validate(plan, collector);
FunctionalLogicalOptimizer optimizer = new FunctionalLogicalOptimizer(
plan);
optimizer.optimize();
} catch (FrontendException fe) {
// Need to go through and see what the collector has in it. But
// remember what we've caught so we can wrap it into what we
// throw.
caught = fe;
}
return plan;
}
Map<LogicalOperator, LogicalPlan> aliases = new HashMap<LogicalOperator, LogicalPlan>();
Map<OperatorKey, LogicalOperator> logicalOpTable = new HashMap<OperatorKey, LogicalOperator>();
Map<String, LogicalOperator> aliasOp = new HashMap<String, LogicalOperator>();
Map<String, String> fileNameMap = new HashMap<String, String>();
}