result.outputNames.add(EMPTY_STRING);
}
}
private void classifyExpr(NamedExpression ex, RecordBatch incoming, ClassifierResult result) {
NameSegment expr = ((SchemaPath)ex.getExpr()).getRootSegment();
NameSegment ref = ex.getRef().getRootSegment();
boolean exprHasPrefix = expr.getPath().contains(StarColumnHelper.PREFIX_DELIMITER);
boolean refHasPrefix = ref.getPath().contains(StarColumnHelper.PREFIX_DELIMITER);
boolean exprIsStar = expr.getPath().equals(StarColumnHelper.STAR_COLUMN);
boolean refContainsStar = ref.getPath().contains(StarColumnHelper.STAR_COLUMN);
boolean exprContainsStar = expr.getPath().contains(StarColumnHelper.STAR_COLUMN);
boolean refEndsWithStar = ref.getPath().endsWith(StarColumnHelper.STAR_COLUMN);
String exprPrefix = EMPTY_STRING;
String exprSuffix = expr.getPath();
if (exprHasPrefix) {
// get the prefix of the expr
String[] exprComponents = expr.getPath().split(StarColumnHelper.PREFIX_DELIMITER, 2);
assert(exprComponents.length == 2);
exprPrefix = exprComponents[0];
exprSuffix = exprComponents[1];
result.prefix = exprPrefix;
}
if (exprContainsStar) {
result.isStar = true;
Integer value = (Integer) result.prefixMap.get(exprPrefix);
if (value == null) {
Integer n = 1;
result.prefixMap.put(exprPrefix, n);
} else {
Integer n = value + 1;
result.prefixMap.put(exprPrefix, n);
}
}
int incomingSchemaSize = incoming.getSchema().getFieldCount();
// for debugging..
// if (incomingSchemaSize > 9) {
// assert false;
// }
// input is '*' and output is 'prefix_*'
if (exprIsStar && refHasPrefix && refEndsWithStar) {
String[] components = ref.getPath().split(StarColumnHelper.PREFIX_DELIMITER, 2);
assert(components.length == 2);
String prefix = components[0];
result.outputNames = Lists.newArrayList();
for(VectorWrapper<?> wrapper : incoming) {
ValueVector vvIn = wrapper.getValueVector();
String name = vvIn.getField().getPath().getRootSegment().getPath();
// add the prefix to the incoming column name
String newName = prefix + StarColumnHelper.PREFIX_DELIMITER + name;
addToResultMaps(newName, result, false);
}
}
// input and output are the same
else if (expr.getPath().equals(ref.getPath())) {
if (exprContainsStar && exprHasPrefix) {
assert exprPrefix != null;
int k = 0;
result.outputNames = Lists.newArrayListWithCapacity(incomingSchemaSize);
for (int j=0; j < incomingSchemaSize; j++) {
result.outputNames.add(EMPTY_STRING); // initialize
}
for (VectorWrapper<?> wrapper : incoming) {
ValueVector vvIn = wrapper.getValueVector();
String incomingName = vvIn.getField().getPath().getRootSegment().getPath();
// get the prefix of the name
String[] nameComponents = incomingName.split(StarColumnHelper.PREFIX_DELIMITER, 2);
// if incoming valuevector does not have a prefix, ignore it since this expression is not referencing it
if (nameComponents.length <= 1) {
k++;
continue;
}
String namePrefix = nameComponents[0];
if (exprPrefix.equals(namePrefix)) {
String newName = incomingName;
if (!result.outputMap.containsKey(newName)) {
result.outputNames.set(k, newName);
result.outputMap.put(newName, newName);
}
}
k++;
}
} else {
result.outputNames = Lists.newArrayList();
if (exprContainsStar) {
for (VectorWrapper<?> wrapper : incoming) {
ValueVector vvIn = wrapper.getValueVector();
String incomingName = vvIn.getField().getPath().getRootSegment().getPath();
if (refContainsStar) {
addToResultMaps(incomingName, result, true); // allow dups since this is likely top-level project
} else {
addToResultMaps(incomingName, result, false);
}
}
} else {
String newName = expr.getPath();
if (!refHasPrefix && !exprHasPrefix) {
addToResultMaps(newName, result, true); // allow dups since this is likely top-level project
} else {
addToResultMaps(newName, result, false);
}
}
}
}
// only the output has prefix
else if (!exprHasPrefix && refHasPrefix) {
result.outputNames = Lists.newArrayList();
String newName = ref.getPath();
addToResultMaps(newName, result, false);
}
// input has prefix but output does not
else if (exprHasPrefix && !refHasPrefix) {
int k = 0;
result.outputNames = Lists.newArrayListWithCapacity(incomingSchemaSize);
for (int j=0; j < incomingSchemaSize; j++) {
result.outputNames.add(EMPTY_STRING); // initialize
}
for (VectorWrapper<?> wrapper : incoming) {
ValueVector vvIn = wrapper.getValueVector();
String name = vvIn.getField().getPath().getRootSegment().getPath();
String[] components = name.split(StarColumnHelper.PREFIX_DELIMITER, 2);
if (components.length <= 1) {
k++;
continue;
}
String namePrefix = components[0];
String nameSuffix = components[1];
if (exprPrefix.equals(namePrefix)) {
if (refContainsStar) {
// remove the prefix from the incoming column names
String newName = getUniqueName(nameSuffix, result); // for top level we need to make names unique
result.outputNames.set(k, newName);
} else if (exprSuffix.equals(nameSuffix)) {
// example: ref: $f1, expr: T0<PREFIX><column_name>
String newName = ref.getPath();
result.outputNames.set(k, newName);
}
} else {
result.outputNames.add(EMPTY_STRING);
}
k++;
}
}
// input and output have prefixes although they could be different...
else if (exprHasPrefix && refHasPrefix) {
String[] input = expr.getPath().split(StarColumnHelper.PREFIX_DELIMITER, 2);
assert(input.length == 2);
assert false : "Unexpected project expression or reference"; // not handled yet
}
else {
// if the incoming schema's column name matches the expression name of the Project,
// then we just want to pick the ref name as the output column name
result.outputNames = Lists.newArrayListWithCapacity(incomingSchemaSize);
for (int j=0; j < incomingSchemaSize; j++) {
result.outputNames.add(EMPTY_STRING); // initialize
}
int k = 0;
for (VectorWrapper<?> wrapper : incoming) {
ValueVector vvIn = wrapper.getValueVector();
String incomingName = vvIn.getField().getPath().getRootSegment().getPath();
if (expr.getPath().equals(incomingName)) {
String newName = ref.getPath();
if (!result.outputMap.containsKey(newName)) {
result.outputNames.set(k, newName);
result.outputMap.put(newName, newName);
}
}