/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.morphline.api;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.junit.Assert;
import org.junit.Test;
import org.kitesdk.morphline.base.FieldExpression;
import com.google.common.base.Joiner;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
@SuppressWarnings("unchecked")
public class FieldExpressionTest extends Assert {
@Test
public void testSimplePatterns() throws Exception {
// 012345678901234567890123456789
String expr = "Mr. @{first_name} age: @{age}";
String regex = "@\\{(.*?)\\}";
Matcher matcher = Pattern.compile(regex).matcher(expr);
assertTrue(matcher.find());
assertEquals("first_name", matcher.group(1));
assertEquals(6, matcher.start(1));
assertEquals(6 + "first_name".length(), matcher.end(1));
assertTrue(matcher.find());
assertEquals("age", matcher.group(1));
assertEquals(25, matcher.start(1));
assertEquals(25 + "age".length(), matcher.end(1));
assertFalse(matcher.find());
matcher = Pattern.compile("foo").matcher("foo");
assertTrue(matcher.matches());
matcher = Pattern.compile("foo").matcher("barfoo");
assertFalse(matcher.matches());
matcher = Pattern.compile(".*foo").matcher("barfoo");
assertTrue(matcher.matches());
}
@Test
public void testSimpleExpression() throws Exception {
Record record = new Record();
record.getFields().put("first_name", "Nadja");
record.getFields().put("age", 8);
record.getFields().put("tags", "one");
record.getFields().put("tags", 2);
record.getFields().put("tags", "three");
assertEquals("foo", resolveExpressionSimple("foo", record));
assertEquals("", resolveExpressionSimple("", record));
assertEquals("Nadja", resolveExpressionSimple("@{first_name}", record));
assertEquals("Ms. Nadja", resolveExpressionSimple("Ms. @{first_name}", record));
assertEquals("Ms. Nadja is 8 years old.", resolveExpressionSimple("Ms. @{first_name} is @{age} years old.", record));
assertEquals(Arrays.asList("Nadja"), resolveReference("@{first_name}", record));
assertEquals(Arrays.asList("one", 2, "three"), resolveReference("@{tags}", record));
try {
resolveReference("first_name", record);
fail();
} catch (MorphlineCompilationException e) {
;
}
}
@Test
public void testExpression() throws Exception {
Record record = new Record();
record.getFields().put("first_name", "Nadja");
record.getFields().put("age", 8);
record.getFields().put("tags", "one");
record.getFields().put("tags", 2);
record.getFields().put("tags", "three");
record.getFields().put("likes", "pembo");
record.getFields().put("likes", "shanti");
record.getFields().put("hates", "brothers");
record.getFields().put("hates", "milk");
assertEquals(Arrays.asList("foo"), resolveExpression("foo", record));
assertEquals(Arrays.asList(""), resolveExpression("", record));
assertEquals(Arrays.asList("Nadja"), resolveExpression("@{first_name}", record));
assertEquals(Arrays.asList("pembo", "shanti"), resolveExpression("@{likes}", record));
assertEquals(Arrays.asList("Ms. Nadja"), resolveExpression("Ms. @{first_name}", record));
assertEquals(Arrays.asList(), resolveExpression("Ms. @{nonExistingField}", record));
assertEquals(Arrays.asList(), resolveExpression("@{nonExistingField}", record));
assertEquals(Arrays.asList(
"Ms. Nadja is 8 years old."),
resolveExpression("Ms. @{first_name} is @{age} years old.", record));
assertEquals(Arrays.asList(
"is one years",
"is 2 years",
"is three years"),
resolveExpression("is @{tags} years", record));
assertEquals(Arrays.asList(
"Ms. Nadja is one years old.",
"Ms. Nadja is 2 years old.",
"Ms. Nadja is three years old."),
resolveExpression("Ms. @{first_name} is @{tags} years old.", record));
assertEquals(Arrays.asList(
"She likes pembo and hates brothers.",
"She likes pembo and hates milk.",
"She likes shanti and hates brothers.",
"She likes shanti and hates milk."),
resolveExpression("She likes @{likes} and hates @{hates}.", record));
assertEquals(Arrays.asList("Nadja"), resolveReference("@{first_name}", record));
assertEquals(Arrays.asList("one", 2, "three"), resolveReference("@{tags}", record));
try {
resolveReference("first_name", record);
fail();
} catch (MorphlineCompilationException e) {
;
}
}
private List resolveExpression(String expr, Record record) {
return new FieldExpression(expr, null).evaluate(record);
}
private List resolveReference(String expr, Record record) {
return resolveReference(expr, record, ConfigFactory.empty());
}
// example: @{first_name}
private static final Pattern PATTERN = Pattern.compile("@\\{.*?\\}");
// TODO: optimize by not using regexes
private List resolveReference(String reference, Record record, Config config) {
Matcher matcher = PATTERN.matcher(reference);
if (!matcher.matches()) {
throw new MorphlineCompilationException("Invalid variable reference", config);
}
String value = reference.substring("@{".length(), reference.length() - "}".length());
List resolvedValues = record.getFields().get(value);
return resolvedValues;
}
// TODO: optimize by not using regexes
private String resolveExpressionSimple(String expr, Record record) {
Matcher matcher = PATTERN.matcher(expr);
StringBuilder buf = new StringBuilder();
int from = 0;
while (matcher.find()) {
int start = matcher.start();
int end = matcher.end();
buf.append(expr.substring(from, start));
String ref = expr.substring(start + "@{".length(), end - "}".length());
if (ref.length() == 0) {
buf.append(record.toString()); // @{} means dump string representation of entire record
} else {
List resolvedValues = record.getFields().get(ref);
Joiner.on(" ").appendTo(buf, resolvedValues);
}
from = end;
}
buf.append(expr.substring(from, expr.length()));
return buf.toString();
}
}