Package de.jungblut.nlp.mr

Source Code of de.jungblut.nlp.mr.TfIdfCalculatorJobTest

package de.jungblut.nlp.mr;

import static org.junit.Assert.assertEquals;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.apache.hadoop.mrunit.types.Pair;
import org.junit.Before;
import org.junit.Test;

import de.jungblut.nlp.mr.TfIdfCalculatorJob.DocumentVectorizerReducer;
import de.jungblut.writable.VectorWritable;

public class TfIdfCalculatorJobTest {

  ReduceDriver<Text, TextIntIntIntWritable, Text, VectorWritable> reduceDriver;

  @Before
  public void setUp() {
    DocumentVectorizerReducer reducer = new DocumentVectorizerReducer();
    reduceDriver = ReduceDriver.newReduceDriver(reducer);
    reduceDriver.getConfiguration().setInt(
        TfIdfCalculatorJob.NUMBER_OF_DOCUMENTS_KEY, 20);
    reduceDriver.getConfiguration().setInt(
        TfIdfCalculatorJob.NUMBER_OF_TOKENS_KEY, 3);
  }

  @Test
  public void testReducer() throws IOException {

    reduceDriver.setInput(new Text("ID2012"), Arrays
        .asList(new TextIntIntIntWritable(new Text("this"), new IntWritable(4),
            new IntWritable(2), new IntWritable(1))));

    List<Pair<Text, VectorWritable>> res = reduceDriver.run();
    assertEquals(1, res.size());

    Pair<Text, VectorWritable> pair = res.get(0);
    assertEquals("ID2012", pair.getFirst().toString());
    assertEquals(3.2188758248682006d, pair.getSecond().getVector().get(1), 1e-4);
  }

  @Test
  public void testWordVectorizerReducer() throws IOException {
    reduceDriver.getConfiguration().set(
        TfIdfCalculatorJob.WORD_COUNT_OUTPUT_KEY, "true");
    reduceDriver.setInput(new Text("ID2012"), Arrays
        .asList(new TextIntIntIntWritable(new Text("this"), new IntWritable(4),
            new IntWritable(2), new IntWritable(1))));

    List<Pair<Text, VectorWritable>> res = reduceDriver.run();
    assertEquals(1, res.size());

    Pair<Text, VectorWritable> pair = res.get(0);
    assertEquals("ID2012", pair.getFirst().toString());
    assertEquals(2, pair.getSecond().getVector().get(1), 1e-4);
  }
}
TOP

Related Classes of de.jungblut.nlp.mr.TfIdfCalculatorJobTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.