Package com.atlantbh.nutch.index.alternativedataflow

Source Code of com.atlantbh.nutch.index.alternativedataflow.AlternativeDataFlowIndexingFilterTest

package com.atlantbh.nutch.index.alternativedataflow;

import static org.junit.Assert.assertEquals;
import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import javax.xml.namespace.QName;

import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseData;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.BlockJUnit4ClassRunner;

import com.atlantbh.nutch.index.alternativedataflow.conf.Entry;
import com.atlantbh.nutch.index.alternativedataflow.conf.Field;
import com.atlantbh.nutch.index.alternativedataflow.flow.CsvDataFlow;
import com.atlantbh.nutch.index.alternativedataflow.flow.CsvDataFlowTest;

@RunWith(BlockJUnit4ClassRunner.class)
public class AlternativeDataFlowIndexingFilterTest {

 
  private AlternativeDataFlowIndexingFilter alternativeDataFlowIndexingFilter;

  // Test data
  private static final String[] testStringArray = { "test1", "te,,st2", "te\"st3" };
  private static final Float[] testFloatArray = { 1.2f, 2.3f };
  private static final String[] testDateArray = { "15.03.2001""21.08.2003"};
  private static String preCreatedCsv;
 
  // Need to run
  private static final Metadata metadata1 = new Metadata();
  private static final Metadata metadata2 = new Metadata();
  private static final Metadata metadata3 = new Metadata();
  private static File tempDir;

  @Before
  public void init() throws IOException {
    alternativeDataFlowIndexingFilter = new AlternativeDataFlowIndexingFilter();
   
    // Initialize metadata
    metadata1.add("testString", testStringArray[0]);
    metadata1.add("testFloat", String.valueOf(testFloatArray[0]));
    metadata1.add("testDate", testDateArray[0]);
    metadata2.add("testString", testStringArray[1]);
    metadata2.add("testFloat", String.valueOf(testFloatArray[1]));
    metadata2.add("testDate", testDateArray[1]);
    metadata3.add("testString", testStringArray[2]);
   
    tempDir = new File("C:\\csvs");
   
    // Precreated CSV to match against
    InputStream preCreatedCsvInputStream = CsvDataFlowTest.class.getResourceAsStream("example.csv");
    byte[] preCreatedCsvData = new byte[preCreatedCsvInputStream.available()];
    preCreatedCsvInputStream.read(preCreatedCsvData);
    preCreatedCsv = new String(preCreatedCsvData);
  }

  @Test
  @Ignore("It works but it can be tested only manually :(")
  public void testProcessData() throws IOException, IndexingException {

    // Prepare data
    NutchDocument nutchDocumentIn = new NutchDocument();
    ParseData parseData = new ParseData();
    CrawlDatum crawDatum = new CrawlDatum();
    Parse parse = mock(Parse.class);
    Configuration configuration = mock(Configuration.class);
   
    // Mock objects
    when(configuration.get(anyString())).thenReturn("");
    when(configuration.getConfResourceAsReader(anyString())).thenReturn(new InputStreamReader(AlternativeDataFlowIndexingFilterTest.class.getResourceAsStream("example.alternativedataflow-indexfilter-conf.xml")));
     
    parseData.setParseMeta(metadata1);
    when(parse.getData()).thenReturn(parseData);
   
    alternativeDataFlowIndexingFilter.setConf(configuration);
    alternativeDataFlowIndexingFilter.filter(nutchDocumentIn, parse, null, crawDatum, null);
   
    parseData.setParseMeta(metadata2);
    alternativeDataFlowIndexingFilter.filter(nutchDocumentIn, parse, null, crawDatum, null);
 
    parseData.setParseMeta(metadata3);
    alternativeDataFlowIndexingFilter.filter(nutchDocumentIn, parse, null, crawDatum, null);
   
    File[] files = tempDir.listFiles();
    InputStream createdCsvInputStream = new FileInputStream(files[0]);
    byte[] createdCsvData = new byte[createdCsvInputStream.available()];
    createdCsvInputStream.read(createdCsvData);
    String createdCsv = new String(createdCsvData);
   
    assertEquals("Something went wrong with the creation of the csv file!", 1, files.length);
    assertEquals("CSV is not as predicted!", preCreatedCsv, createdCsv);
  }
 
}
TOP

Related Classes of com.atlantbh.nutch.index.alternativedataflow.AlternativeDataFlowIndexingFilterTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.