Package org.kitesdk.morphline.solr

Source Code of org.kitesdk.morphline.solr.SolrMorphlineZkAvroTest

/*
* Copyright 2013 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.morphline.solr;

import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;

import org.apache.avro.Schema.Field;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.FileReader;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Notifications;

import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.io.Files;

@ThreadLeakAction({Action.WARN})
@ThreadLeakLingering(linger = 0)
@ThreadLeakZombies(Consequence.CONTINUE)
@ThreadLeakScope(Scope.NONE)
@SuppressCodecs({"Lucene3x", "Lucene40"})
public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTest {
 
  @Override
  public void doTest() throws Exception {
    Joiner joiner = Joiner.on(File.separator);
    File file = new File(joiner.join(RESOURCES_DIR, "test-documents", "sample-statuses-20120906-141433-medium.avro"));
   
    waitForRecoveriesToFinish(false);
   
    // load avro records via morphline and zk into solr
    morphline = parse("test-morphlines" + File.separator + "tutorialReadAvroContainer");   
    Record record = new Record();
    byte[] body = Files.toByteArray(file);   
    record.put(Fields.ATTACHMENT_BODY, body);
    startSession();
    Notifications.notifyBeginTransaction(morphline);
    assertTrue(morphline.process(record));
    assertEquals(1, collector.getNumStartEvents());
   
    commit();
   
    // fetch sorted result set from solr
    QueryResponse rsp = cloudClient.query(new SolrQuery("*:*").setRows(100000).addSort("id", SolrQuery.ORDER.asc));  
    assertEquals(2104, collector.getRecords().size());
    assertEquals(collector.getRecords().size(), rsp.getResults().size());
   
    Collections.sort(collector.getRecords(), new Comparator<Record>() {
      @Override
      public int compare(Record r1, Record r2) {
        return r1.get("id").toString().compareTo(r2.get("id").toString());
      }     
    });  

    // fetch test input data and sort like solr result set
    List<GenericData.Record> records = new ArrayList();
    FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader());
    while (reader.hasNext()) {
      GenericData.Record expected = reader.next();
      records.add(expected);
    }
    assertEquals(collector.getRecords().size(), records.size());   
    Collections.sort(records, new Comparator<GenericData.Record>() {
      @Override
      public int compare(GenericData.Record r1, GenericData.Record r2) {
        return r1.get("id").toString().compareTo(r2.get("id").toString());
      }     
    });  
   
    Object lastId = null;
    for (int i = 0; i < records.size(); i++) { 
      //System.out.println("myrec" + i + ":" + records.get(i));     
      Object id = records.get(i);
      if (id != null && id.equals(lastId)) {
        throw new IllegalStateException("Detected duplicate id. Test input data must not contain duplicate ids!");       
      }
      lastId = id;
    }
   
    for (int i = 0; i < records.size(); i++) { 
      //System.out.println("myrsp" + i + ":" + rsp.getResults().get(i));     
    }   

    Iterator<SolrDocument> rspIter = rsp.getResults().iterator();
    for (int i = 0; i < records.size(); i++) { 
      // verify morphline spat out expected data
      Record actual = collector.getRecords().get(i);
      GenericData.Record expected = records.get(i);
      Preconditions.checkNotNull(expected);
      assertTweetEquals(expected, actual, i);
     
      // verify Solr result set contains expected data
      actual = new Record();
      actual.getFields().putAll(next(rspIter));
      assertTweetEquals(expected, actual, i);
    }
   
    Notifications.notifyRollbackTransaction(morphline);
    Notifications.notifyShutdown(morphline);
    cloudClient.shutdown();
  }
 
  private void assertTweetEquals(GenericData.Record expected, Record actual, int i) {
    Preconditions.checkNotNull(expected);
    Preconditions.checkNotNull(actual);
//    System.out.println("\n\nexpected: " + toString(expected));
//    System.out.println("actual:   " + actual);
    String[] fieldNames = new String[] {
        "id",
        "in_reply_to_status_id",
        "in_reply_to_user_id",
        "retweet_count",
        "text",
        };
    for (String fieldName : fieldNames) {
      assertEquals(
          i + " fieldName: " + fieldName,
          expected.get(fieldName).toString(),
          actual.getFirstValue(fieldName).toString());
    }
  }

  private String toString(GenericData.Record avroRecord) {
    Record record = new Record();
    for (Field field : avroRecord.getSchema().getFields()) {
      record.put(field.name(), avroRecord.get(field.pos()));
    }
    return record.toString(); // prints sorted by key for human readability
  }

}
TOP

Related Classes of org.kitesdk.morphline.solr.SolrMorphlineZkAvroTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.