Source Code of org.apache.accumulo.core.iterators.FamilyIntersectingIteratorTest

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.accumulo.core.iterators;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
import java.util.Map.Entry;


import junit.framework.TestCase;


import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.file.rfile.RFileTest;
import org.apache.accumulo.core.file.rfile.RFileTest.TestRFile;
import org.apache.accumulo.core.iterators.DefaultIteratorEnvironment;
import org.apache.accumulo.core.iterators.FamilyIntersectingIterator;
import org.apache.accumulo.core.iterators.IntersectingIterator;
import org.apache.accumulo.core.iterators.IteratorEnvironment;
import org.apache.accumulo.core.iterators.MultiIterator;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.hadoop.io.Text;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;


public class FamilyIntersectingIteratorTest extends TestCase {
  
  private static final Logger log = Logger.getLogger(IntersectingIterator.class);
  
  private static final Collection<ByteSequence> EMPTY_COL_FAMS = new ArrayList<ByteSequence>();
  private static final byte[] nullByte = {0};
  
  private static IteratorEnvironment env = new DefaultIteratorEnvironment();
  
  TreeMap<Key,Value> map;
  Text[] columnFamilies;
  Text[] otherColumnFamilies;
  
  static int docid = 0;
  static Text docColf = new Text(FamilyIntersectingIterator.DEFAULT_DOC_COLF);
  
  static {
    log.setLevel(Level.OFF);
    docColf.append(nullByte, 0, 1);
    docColf.append("type".getBytes(), 0, "type".getBytes().length);
  }
  
  static float hitRatio = 0.1f;
  
  private synchronized static TreeMap<Key,Value> createSortedMap(int numRows, int numDocsPerRow, Text[] columnFamilies, Text[] otherColumnFamilies,
      HashSet<Text> docs) {
    StringBuilder sb = new StringBuilder();
    Random r = new Random();
    Value v = new Value(new byte[0]);
    TreeMap<Key,Value> map = new TreeMap<Key,Value>();
    for (int i = 0; i < numRows; i++) {
      Text row = new Text(String.format("%06d", i));
      for (int startDocID = docid; docid - startDocID < numDocsPerRow; docid++) {
        sb.setLength(0);
        sb.append("fake doc contents");
        boolean docHits = true;
        Text doc = new Text("type");
        doc.append(nullByte, 0, 1);
        doc.append(String.format("%010d", docid).getBytes(), 0, 10);
        for (Text cf : columnFamilies) {
          if (r.nextFloat() < hitRatio) {
            Text colq = new Text(cf);
            colq.append(nullByte, 0, 1);
            colq.append(doc.getBytes(), 0, doc.getLength());
            colq.append(nullByte, 0, 1);
            colq.append("stuff".getBytes(), 0, "stuff".length());
            Key k = new Key(row, FamilyIntersectingIterator.DEFAULT_INDEX_COLF, colq);
            map.put(k, v);
            sb.append(" ");
            sb.append(cf);
          } else {
            docHits = false;
          }
        }
        if (docHits) {
          docs.add(doc);
        }
        for (Text cf : otherColumnFamilies) {
          if (r.nextFloat() < hitRatio) {
            Text colq = new Text(cf);
            colq.append(nullByte, 0, 1);
            colq.append(doc.getBytes(), 0, doc.getLength());
            colq.append(nullByte, 0, 1);
            colq.append("stuff".getBytes(), 0, "stuff".length());
            Key k = new Key(row, FamilyIntersectingIterator.DEFAULT_INDEX_COLF, colq);
            map.put(k, v);
            sb.append(" ");
            sb.append(cf);
          }
        }
        sb.append(" docID=").append(doc);
        Key k = new Key(row, docColf, new Text(String.format("%010d", docid).getBytes()));
        map.put(k, new Value(sb.toString().getBytes()));
      }
    }
    return map;
  }
  
  static TestRFile trf = new TestRFile();
  
  private synchronized static SortedKeyValueIterator<Key,Value> createIteratorStack(int numRows, int numDocsPerRow, Text[] columnFamilies,
      Text[] otherColumnFamilies, HashSet<Text> docs) throws IOException {
    // write a map file
    trf.openWriter(false);
    
    TreeMap<Key,Value> inMemoryMap = createSortedMap(numRows, numDocsPerRow, columnFamilies, otherColumnFamilies, docs);
    trf.writer.startNewLocalityGroup("docs", RFileTest.ncfs(docColf.toString()));
    for (Entry<Key,Value> entry : inMemoryMap.entrySet()) {
      if (entry.getKey().getColumnFamily().equals(docColf))
        trf.writer.append(entry.getKey(), entry.getValue());
    }
    trf.writer.startNewLocalityGroup("terms", RFileTest.ncfs(FamilyIntersectingIterator.DEFAULT_INDEX_COLF.toString()));
    for (Entry<Key,Value> entry : inMemoryMap.entrySet()) {
      if (entry.getKey().getColumnFamily().equals(FamilyIntersectingIterator.DEFAULT_INDEX_COLF))
        trf.writer.append(entry.getKey(), entry.getValue());
    }
    
    trf.closeWriter();
    
    trf.openReader();
    return trf.reader;
  }
  
  private synchronized static void cleanup() throws IOException {
    trf.closeReader();
    docid = 0;
  }
  
  public void testNull() {}
  
  @Override
  public void setUp() {
    Logger.getRootLogger().setLevel(Level.ERROR);
  }
  
  private static final int NUM_ROWS = 10;
  private static final int NUM_DOCIDS = 1000;
  
  public void test1() throws IOException {
    columnFamilies = new Text[2];
    columnFamilies[0] = new Text("C");
    columnFamilies[1] = new Text("E");
    otherColumnFamilies = new Text[4];
    otherColumnFamilies[0] = new Text("A");
    otherColumnFamilies[1] = new Text("B");
    otherColumnFamilies[2] = new Text("D");
    otherColumnFamilies[3] = new Text("F");
    
    hitRatio = 0.5f;
    HashSet<Text> docs = new HashSet<Text>();
    SortedKeyValueIterator<Key,Value> source = createIteratorStack(NUM_ROWS, NUM_DOCIDS, columnFamilies, otherColumnFamilies, docs);
    Map<String,String> options = new HashMap<String,String>();
    options.put(FamilyIntersectingIterator.columnFamiliesOptionName, FamilyIntersectingIterator.encodeColumns(columnFamilies));
    FamilyIntersectingIterator iter = new FamilyIntersectingIterator();
    iter.init(source, options, env);
    iter.seek(new Range(), EMPTY_COL_FAMS, false);
    int hitCount = 0;
    while (iter.hasTop()) {
      hitCount++;
      Key k = iter.getTopKey();
      Value v = iter.getTopValue();
      // System.out.println(k.toString());
      // System.out.println(iter.getDocID(k));
      
      assertTrue(docs.contains(iter.getDocID(k)));
      assertTrue(new String(v.get()).endsWith(" docID="+iter.getDocID(k)));
      
      iter.next();
    }
    assertEquals(hitCount, docs.size());
    cleanup();
  }
  
  public void test2() throws IOException {
    columnFamilies = new Text[3];
    columnFamilies[0] = new Text("A");
    columnFamilies[1] = new Text("E");
    columnFamilies[2] = new Text("G");
    otherColumnFamilies = new Text[4];
    otherColumnFamilies[0] = new Text("B");
    otherColumnFamilies[1] = new Text("C");
    otherColumnFamilies[2] = new Text("D");
    otherColumnFamilies[3] = new Text("F");
    
    hitRatio = 0.5f;
    HashSet<Text> docs = new HashSet<Text>();
    SortedKeyValueIterator<Key,Value> source = createIteratorStack(NUM_ROWS, NUM_DOCIDS, columnFamilies, otherColumnFamilies, docs);
    Map<String,String> options = new HashMap<String,String>();
    options.put(FamilyIntersectingIterator.columnFamiliesOptionName, FamilyIntersectingIterator.encodeColumns(columnFamilies));
    FamilyIntersectingIterator iter = new FamilyIntersectingIterator();
    iter.init(source, options, env);
    iter.seek(new Range(), EMPTY_COL_FAMS, false);
    int hitCount = 0;
    while (iter.hasTop()) {
      hitCount++;
      Key k = iter.getTopKey();
      Value v = iter.getTopValue();
      assertTrue(docs.contains(iter.getDocID(k)));
      assertTrue(new String(v.get()).endsWith(" docID="+iter.getDocID(k)));
      iter.next();
    }
    assertEquals(hitCount, docs.size());
    cleanup();
  }
  
  public void test3() throws IOException {
    columnFamilies = new Text[6];
    columnFamilies[0] = new Text("C");
    columnFamilies[1] = new Text("E");
    columnFamilies[2] = new Text("G");
    columnFamilies[3] = new Text("H");
    columnFamilies[4] = new Text("I");
    columnFamilies[5] = new Text("J");
    otherColumnFamilies = new Text[4];
    otherColumnFamilies[0] = new Text("A");
    otherColumnFamilies[1] = new Text("B");
    otherColumnFamilies[2] = new Text("D");
    otherColumnFamilies[3] = new Text("F");
    
    hitRatio = 0.5f;
    HashSet<Text> docs = new HashSet<Text>();
    SortedKeyValueIterator<Key,Value> source = createIteratorStack(NUM_ROWS, NUM_DOCIDS, columnFamilies, otherColumnFamilies, docs);
    SortedKeyValueIterator<Key,Value> source2 = createIteratorStack(NUM_ROWS, NUM_DOCIDS, columnFamilies, otherColumnFamilies, docs);
    ArrayList<SortedKeyValueIterator<Key,Value>> sourceIters = new ArrayList<SortedKeyValueIterator<Key,Value>>();
    sourceIters.add(source);
    sourceIters.add(source2);
    MultiIterator mi = new MultiIterator(sourceIters, false);
    Map<String,String> options = new HashMap<String,String>();
    options.put(FamilyIntersectingIterator.columnFamiliesOptionName, FamilyIntersectingIterator.encodeColumns(columnFamilies));
    FamilyIntersectingIterator iter = new FamilyIntersectingIterator();
    iter.init(mi, options, env);
    iter.seek(new Range(), EMPTY_COL_FAMS, false);
    int hitCount = 0;
    while (iter.hasTop()) {
      hitCount++;
      Key k = iter.getTopKey();
      Value v = iter.getTopValue();
      assertTrue(docs.contains(iter.getDocID(k)));
      assertTrue(new String(v.get()).endsWith(" docID="+iter.getDocID(k)));
      iter.next();
    }
    assertEquals(hitCount, docs.size());
    cleanup();
  }
}
Source Code of org.apache.accumulo.core.iterators.FamilyIntersectingIteratorTest

Related Classes of org.apache.accumulo.core.iterators.FamilyIntersectingIteratorTest