Package com.atlantbh.nutch.index.alternativedataflow.flow

Source Code of com.atlantbh.nutch.index.alternativedataflow.flow.CsvDataFlowTest

package com.atlantbh.nutch.index.alternativedataflow.flow;

import static org.mockito.Mockito.*;
import static org.junit.Assert.*;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import javax.xml.namespace.QName;

import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseData;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.BlockJUnit4ClassRunner;
import org.powermock.modules.junit4.legacy.PowerMockRunner;

import com.atlantbh.nutch.index.alternativedataflow.conf.Entry;
import com.atlantbh.nutch.index.alternativedataflow.conf.Field;

@RunWith(BlockJUnit4ClassRunner.class)
public class CsvDataFlowTest {

  private CsvDataFlow csvDataFlow;

  // Test data
  private static final String[] testStringArray = { "testđžš1", "te,,st2", "te\"st3" };
  private static final Float[] testFloatArray = { 1.2f, 2.3f };
  private static final String[] testDateArray = { "15.03.2001""21.08.2003"};
  private static String preCreatedCsv;
 
  // Need to run
  private static final List<Entry> entryList = new LinkedList<Entry>();
  private static final Metadata metadata1 = new Metadata();
  private static final Metadata metadata2 = new Metadata();
  private static final Metadata metadata3 = new Metadata();
  private static File tempDir;

  @Before
  public void init() throws IOException {
    csvDataFlow = new CsvDataFlow();
   
    // Initialize metadata
    metadata1.add("testString", testStringArray[0]);
    metadata1.add("testFloat", String.valueOf(testFloatArray[0]));
    metadata1.add("testDate", testDateArray[0]);
    metadata2.add("testString", testStringArray[1]);
    metadata2.add("testFloat", String.valueOf(testFloatArray[1]));
    metadata2.add("testDate", testDateArray[1]);
    metadata3.add("testString", testStringArray[2]);
   
    // Create temp directory
    tempDir = File.createTempFile("test", null);
    tempDir.delete();
    tempDir.mkdir();
   
    // Precreated CSV to match against
    InputStream preCreatedCsvInputStream = CsvDataFlowTest.class.getResourceAsStream("example.csv");
    byte[] preCreatedCsvData = new byte[preCreatedCsvInputStream.available()];
    preCreatedCsvInputStream.read(preCreatedCsvData);
    preCreatedCsv = new String(preCreatedCsvData);
   
    // Initialize entry list
    Map<QName, String> parameterMap = new HashMap<QName, String>();
    parameterMap.put(CsvDataFlow.NAME, "test");
    parameterMap.put(CsvDataFlow.DESTINATION, tempDir.getAbsolutePath());
    parameterMap.put(CsvDataFlow.ADD_FIELD_NAMES_TO_FIRST_LINE, "true");
    parameterMap.put(CsvDataFlow.APPEND_TIMESTAMP, "true");
   
    Entry entry = new Entry("CSV");
    entry.setParameterMap(parameterMap);
    entry.setFieldList(Arrays.asList(new Field[] {new Field("testString"), new Field("testFloat"), new Field("testDate")}));
    entryList.add(entry);
  }

  @Test
  public void testProcessData() throws IOException {

    // Prepare data
    NutchDocument nutchDocumentIn = new NutchDocument();
    CrawlDatum crawDatum = new CrawlDatum();
    Parse parse = mock(Parse.class);
    Configuration configuration = mock(Configuration.class);
   
    ParseData parseData = new ParseData();
   
    parseData.setParseMeta(metadata1);
    when(parse.getData()).thenReturn(parseData);
   
    csvDataFlow.init(configuration, entryList);
    csvDataFlow.processData(nutchDocumentIn, parse, null, crawDatum, null);
   
    parseData.setParseMeta(metadata2);
    csvDataFlow.processData(nutchDocumentIn, parse, null, crawDatum, null);
 
    parseData.setParseMeta(metadata3);
    csvDataFlow.processData(nutchDocumentIn, parse, null, crawDatum, null);
   
    File[] files = tempDir.listFiles();
    InputStream createdCsvInputStream = new FileInputStream(files[0]);
    byte[] createdCsvData = new byte[createdCsvInputStream.available()];
    createdCsvInputStream.read(createdCsvData);
    String createdCsv = new String(createdCsvData);
   
    assertEquals("Something went wrong with the creation of the csv file!", 1, files.length);
    assertEquals("CSV is not as predicted!", preCreatedCsv, createdCsv);
   
    csvDataFlow.destroy();
  }

}
TOP

Related Classes of com.atlantbh.nutch.index.alternativedataflow.flow.CsvDataFlowTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.