/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.pact.runtime.sort;
import java.io.IOException;
import java.util.Comparator;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.NoSuchElementException;
import junit.framework.Assert;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import eu.stratosphere.api.common.typeutils.TypeComparator;
import eu.stratosphere.api.common.typeutils.TypeSerializer;
import eu.stratosphere.api.common.typeutils.TypeSerializerFactory;
import eu.stratosphere.api.java.record.functions.ReduceFunction;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.nephele.services.iomanager.IOManager;
import eu.stratosphere.nephele.services.memorymanager.MemoryManager;
import eu.stratosphere.nephele.services.memorymanager.spi.DefaultMemoryManager;
import eu.stratosphere.nephele.template.AbstractTask;
import eu.stratosphere.api.java.typeutils.runtime.record.RecordComparator;
import eu.stratosphere.api.java.typeutils.runtime.record.RecordSerializerFactory;
import eu.stratosphere.pact.runtime.test.util.DummyInvokable;
import eu.stratosphere.pact.runtime.test.util.TestData;
import eu.stratosphere.pact.runtime.test.util.TestData.Generator.KeyMode;
import eu.stratosphere.pact.runtime.test.util.TestData.Generator.ValueMode;
import eu.stratosphere.pact.runtime.test.util.TestData.Key;
import eu.stratosphere.pact.runtime.util.KeyGroupedIterator;
import eu.stratosphere.types.IntValue;
import eu.stratosphere.types.Record;
import eu.stratosphere.util.Collector;
import eu.stratosphere.util.LogUtils;
import eu.stratosphere.util.MutableObjectIterator;
public class CombiningUnilateralSortMergerITCase {
private static final Log LOG = LogFactory.getLog(CombiningUnilateralSortMergerITCase.class);
private static final long SEED = 649180756312423613L;
private static final int KEY_MAX = 1000;
private static final int VALUE_LENGTH = 118;
private static final int NUM_PAIRS = 50000;
public static final int MEMORY_SIZE = 1024 * 1024 * 256;
private final AbstractTask parentTask = new DummyInvokable();
private IOManager ioManager;
private MemoryManager memoryManager;
private TypeSerializerFactory<Record> serializerFactory;
private TypeComparator<Record> comparator;
@BeforeClass
public static void setup() {
LogUtils.initializeDefaultTestConsoleLogger();
}
@SuppressWarnings("unchecked")
@Before
public void beforeTest() {
this.memoryManager = new DefaultMemoryManager(MEMORY_SIZE);
this.ioManager = new IOManager();
this.serializerFactory = RecordSerializerFactory.get();
this.comparator = new RecordComparator(new int[] {0}, new Class[] {TestData.Key.class});
}
@After
public void afterTest() {
this.ioManager.shutdown();
if (!this.ioManager.isProperlyShutDown()) {
Assert.fail("I/O Manager was not properly shut down.");
}
if (this.memoryManager != null) {
Assert.assertTrue("Memory leak: not all segments have been returned to the memory manager.",
this.memoryManager.verifyEmpty());
this.memoryManager.shutdown();
this.memoryManager = null;
}
}
@Test
public void testCombine() throws Exception
{
int noKeys = 100;
int noKeyCnt = 10000;
MockRecordReader reader = new MockRecordReader();
LOG.debug("initializing sortmerger");
TestCountCombiner comb = new TestCountCombiner();
Sorter<Record> merger = new CombiningUnilateralSortMerger<Record>(comb,
this.memoryManager, this.ioManager, reader, this.parentTask, this.serializerFactory, this.comparator,
64 * 1024 * 1024, 64, 0.7f);
final Record rec = new Record();
rec.setField(1, new IntValue(1));
final TestData.Key key = new TestData.Key();
for (int i = 0; i < noKeyCnt; i++) {
for (int j = 0; j < noKeys; j++) {
key.setKey(j);
rec.setField(0, key);
reader.emit(rec);
}
}
reader.close();
MutableObjectIterator<Record> iterator = merger.getIterator();
Iterator<Integer> result = getReducingIterator(iterator, serializerFactory.getSerializer(), comparator.duplicate());
while (result.hasNext()) {
Assert.assertEquals(noKeyCnt, result.next().intValue());
}
merger.close();
// if the combiner was opened, it must have been closed
Assert.assertTrue(comb.opened == comb.closed);
}
@Test
public void testCombineSpilling() throws Exception {
int noKeys = 100;
int noKeyCnt = 10000;
MockRecordReader reader = new MockRecordReader();
LOG.debug("initializing sortmerger");
TestCountCombiner comb = new TestCountCombiner();
Sorter<Record> merger = new CombiningUnilateralSortMerger<Record>(comb,
this.memoryManager, this.ioManager, reader, this.parentTask, this.serializerFactory, this.comparator,
3 * 1024 * 1024, 64, 0.005f);
final Record rec = new Record();
rec.setField(1, new IntValue(1));
final TestData.Key key = new TestData.Key();
for (int i = 0; i < noKeyCnt; i++) {
for (int j = 0; j < noKeys; j++) {
key.setKey(j);
rec.setField(0, key);
reader.emit(rec);
}
}
reader.close();
MutableObjectIterator<Record> iterator = merger.getIterator();
Iterator<Integer> result = getReducingIterator(iterator, serializerFactory.getSerializer(), comparator.duplicate());
while (result.hasNext()) {
Assert.assertEquals(noKeyCnt, result.next().intValue());
}
merger.close();
// if the combiner was opened, it must have been closed
Assert.assertTrue(comb.opened == comb.closed);
}
@Test
public void testSortAndValidate() throws Exception
{
final Hashtable<TestData.Key, Integer> countTable = new Hashtable<TestData.Key, Integer>(KEY_MAX);
for (int i = 1; i <= KEY_MAX; i++) {
countTable.put(new TestData.Key(i), new Integer(0));
}
// comparator
final Comparator<TestData.Key> keyComparator = new TestData.KeyComparator();
// reader
MockRecordReader reader = new MockRecordReader();
// merge iterator
LOG.debug("initializing sortmerger");
TestCountCombiner2 comb = new TestCountCombiner2();
Sorter<Record> merger = new CombiningUnilateralSortMerger<Record>(comb,
this.memoryManager, this.ioManager, reader, this.parentTask, this.serializerFactory, this.comparator,
64 * 1024 * 1024, 2, 0.7f);
// emit data
LOG.debug("emitting data");
TestData.Generator generator = new TestData.Generator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM, ValueMode.FIX_LENGTH);
Record rec = new Record();
final TestData.Value value = new TestData.Value("1");
for (int i = 0; i < NUM_PAIRS; i++) {
Assert.assertTrue((rec = generator.next(rec)) != null);
final TestData.Key key = rec.getField(0, TestData.Key.class);
rec.setField(1, value);
reader.emit(rec);
countTable.put(new TestData.Key(key.getKey()), countTable.get(key) + 1);
}
reader.close();
rec = null;
// check order
MutableObjectIterator<Record> iterator = merger.getIterator();
LOG.debug("checking results");
Record rec1 = new Record();
Record rec2 = new Record();
Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
countTable.put(new TestData.Key(rec1.getField(0, TestData.Key.class).getKey()), countTable.get(rec1.getField(0, TestData.Key.class)) - (Integer.parseInt(rec1.getField(1, TestData.Value.class).toString())));
while ((rec2 = iterator.next(rec2)) != null) {
final Key k1 = rec1.getField(0, TestData.Key.class);
final Key k2 = rec2.getField(0, TestData.Key.class);
Assert.assertTrue(keyComparator.compare(k1, k2) <= 0);
countTable.put(new TestData.Key(k2.getKey()), countTable.get(k2) - (Integer.parseInt(rec2.getField(1, TestData.Value.class).toString())));
Record tmp = rec1;
rec1 = rec2;
k1.setKey(k2.getKey());
rec2 = tmp;
}
for (Integer cnt : countTable.values()) {
Assert.assertTrue(cnt == 0);
}
merger.close();
// if the combiner was opened, it must have been closed
Assert.assertTrue(comb.opened == comb.closed);
}
// --------------------------------------------------------------------------------------------
public static class TestCountCombiner extends ReduceFunction {
private static final long serialVersionUID = 1L;
private final IntValue count = new IntValue();
public volatile boolean opened = false;
public volatile boolean closed = false;
@Override
public void combine(Iterator<Record> values, Collector<Record> out) {
Record rec = null;
int cnt = 0;
while (values.hasNext()) {
rec = values.next();
cnt += rec.getField(1, IntValue.class).getValue();
}
this.count.setValue(cnt);
rec.setField(1, this.count);
out.collect(rec);
}
@Override
public void reduce(Iterator<Record> values, Collector<Record> out) {}
@Override
public void open(Configuration parameters) throws Exception {
opened = true;
}
@Override
public void close() throws Exception {
closed = true;
}
}
public static class TestCountCombiner2 extends ReduceFunction {
private static final long serialVersionUID = 1L;
public volatile boolean opened = false;
public volatile boolean closed = false;
@Override
public void combine(Iterator<Record> values, Collector<Record> out) {
Record rec = null;
int cnt = 0;
while (values.hasNext()) {
rec = values.next();
cnt += Integer.parseInt(rec.getField(1, TestData.Value.class).toString());
}
out.collect(new Record(rec.getField(0, Key.class), new TestData.Value(cnt + "")));
}
@Override
public void reduce(Iterator<Record> values, Collector<Record> out) {
// yo, nothing, mon
}
@Override
public void open(Configuration parameters) throws Exception {
opened = true;
}
@Override
public void close() throws Exception {
closed = true;
}
}
private static Iterator<Integer> getReducingIterator(MutableObjectIterator<Record> data, TypeSerializer<Record> serializer, TypeComparator<Record> comparator) {
final KeyGroupedIterator<Record> groupIter = new KeyGroupedIterator<Record>(data, serializer, comparator);
return new Iterator<Integer>() {
private boolean hasNext = false;
@Override
public boolean hasNext() {
if (hasNext) {
return true;
}
try {
hasNext = groupIter.nextKey();
} catch (IOException e) {
throw new RuntimeException(e);
}
return hasNext;
}
@Override
public Integer next() {
if (hasNext()) {
hasNext = false;
Iterator<Record> values = groupIter.getValues();
Record rec = null;
int cnt = 0;
while (values.hasNext()) {
rec = values.next();
cnt += rec.getField(1, IntValue.class).getValue();
}
return cnt;
} else {
throw new NoSuchElementException();
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
}