/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.cdk.morphline.hadoop.sequencefile;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudera.cdk.morphline.api.AbstractMorphlineTest;
import com.cloudera.cdk.morphline.api.Record;
import com.cloudera.cdk.morphline.base.Fields;
import com.google.common.io.Closeables;
public class ReadSequenceFileTest extends AbstractMorphlineTest {
private static final Logger LOGGER = LoggerFactory.getLogger(ReadSequenceFileTest.class);
/**
* Test that Solr queries on a parsed SequenceFile document
* return the expected content and fields. Don't pass
* in our own parser via the context.
*/
@Test
public void testSequenceFileContentSimple() throws Exception {
morphline = createMorphline("test-morphlines/sequenceFileMorphlineSimple");
String path = RESOURCES_DIR;
File sequenceFile = new File(path, "testSequenceFileContentSimple.seq");
int numRecords = 5;
HashMap<String, Record> expected = createTextSequenceFile(sequenceFile, numRecords);
InputStream in = new FileInputStream(sequenceFile.getAbsolutePath());
Record record = new Record();
record.put(Fields.ATTACHMENT_BODY, in);
startSession();
assertEquals(1, collector.getNumStartEvents());
assertTrue(morphline.process(record));
assertTrue(areFieldsEqual(expected, collector.getRecords()));
}
/**
* return a mapping of expected keys -> records
*/
private HashMap<String, Record> createTextSequenceFile(File file, int numRecords) throws IOException {
HashMap<String, Record> map = new HashMap<String, Record>();
SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile());
FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null);
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(new Configuration(), out, Text.class, Text.class,
SequenceFile.CompressionType.NONE, null, metadata);
for (int i = 0; i < numRecords; ++i) {
Text key = new Text("key" + i);
Text value = new Text("value" + i);
writer.append(key, value);
Record record = new Record();
record.put("key", key);
record.put("value", value);
map.put(key.toString(), record);
}
} finally {
Closeables.closeQuietly(writer);
}
return map;
}
/**
* Test that Solr queries on a parsed SequenceFile document
* return the expected content and fields.
*/
@Test
public void testSequenceFileContentCustomParsers() throws Exception {
morphline = createMorphline("test-morphlines/sequenceFileMorphlineSimple");
String path = RESOURCES_DIR;
File sequenceFile = new File(path, "testSequenceFileContentCustomParsers.seq");
int numRecords = 10;
HashMap<String, Record> expected = createTextSequenceFile(sequenceFile, numRecords);
InputStream in = new FileInputStream(sequenceFile.getAbsolutePath());
Record record = new Record();
record.put(Fields.ATTACHMENT_BODY, in);
startSession();
assertEquals(1, collector.getNumStartEvents());
assertTrue(morphline.process(record));
assertTrue(areFieldsEqual(expected, collector.getRecords()));
}
/**
* return a mapping of expected keys -> records
*/
private HashMap<String, Record> createMyWritableSequenceFile(File file, int numRecords) throws IOException {
HashMap<String, Record> map = new HashMap<String, Record>();
SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile());
FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null);
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(new Configuration(), out, Text.class, ParseTextMyWritableBuilder.MyWritable.class,
SequenceFile.CompressionType.NONE, null, metadata);
for (int i = 0; i < numRecords; ++i) {
Text key = new Text("key" + i);
ParseTextMyWritableBuilder.MyWritable value = new ParseTextMyWritableBuilder.MyWritable("value", i);
writer.append(key, value);
Record record = new Record();
record.put("key", key);
record.put("value", value);
map.put(key.toString(), record);
}
} finally {
Closeables.closeQuietly(writer);
}
return map;
}
private TreeMap<Text, Text> getMetadataForSequenceFile() {
TreeMap<Text, Text> metadata = new TreeMap<Text, Text>();
metadata.put(new Text("license"), new Text("Apache"));
metadata.put(new Text("year"), new Text("2013"));
return metadata;
}
private boolean areRecordFieldsEqual(Record record1, Record record2, List<String> fieldsToCheck) {
for(String field : fieldsToCheck) {
if (!record1.get(field).equals(record2.get(field))) {
return false;
}
}
return true;
}
private boolean areFieldsEqual(HashMap<String, Record> expected, List<Record> actual) {
if (expected.size() != actual.size()) {
return false;
}
for (Record current : actual) {
String key = current.getFirstValue("key").toString();
Record currentExpected = expected.get(key);
if (!areRecordFieldsEqual(current, currentExpected, Arrays.asList(new String[]{"key", "value"}))) {
return false;
}
}
return true;
}
}