Package com.twitter.elephantbird.mapreduce.input.LuceneIndexInputFormat

Examples of com.twitter.elephantbird.mapreduce.input.LuceneIndexInputFormat.LuceneIndexInputSplit


    assertTrue(split.getIndexDirs().get(0).toString().endsWith("sample_indexes/index-2"));
  }

  @Test
  public void testLuceneIndexInputSplit() throws Exception {
    LuceneIndexInputSplit orig = new LuceneIndexInputSplit(
        Lists.newArrayList(new Path("/index/test"),
                           new Path("/index/test2"),
                           new Path("/index/test3")), 500L);

    ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
    DataOutputStream dataOut = new DataOutputStream(bytesOut);

    orig.write(dataOut);

    LuceneIndexInputSplit deSerialized = new LuceneIndexInputSplit();
    deSerialized.readFields(new DataInputStream((new ByteArrayInputStream(bytesOut.toByteArray()))));

    assertEquals(orig.getIndexDirs(), deSerialized.getIndexDirs());
    assertEquals(orig.getLength(), deSerialized.getLength());

    assertEquals(0, orig.compareTo(deSerialized));

    LuceneIndexInputSplit smaller = new LuceneIndexInputSplit(
        Lists.newArrayList(new Path("/index/small")), 100L);

    assertTrue(orig.compareTo(smaller) > 0);
    assertTrue(smaller.compareTo(orig) < 0);
  }
View Full Code Here


    String[] paths = new String[] {
        "/index/1", "/index/2", "/index/3", "/index/4", "/index/5", "/index/6"};
    Long[] sizes = new Long[]{500L, 300L, 100L, 150L, 1200L, 500L};

    for (int i = 0; i < paths.length; i++) {
      splits.add(new LuceneIndexInputSplit(Lists.newArrayList(new Path(paths[i])), sizes[i]));
    }

    List<InputSplit> combined = lif.combineSplits(splits, 1000L, 10000L);
    assertEquals(3, combined.size());
View Full Code Here

  @Test
  public void testCombineSplitsOneSplit() throws Exception {
    DummyLuceneInputFormat lif = new DummyLuceneInputFormat();

    PriorityQueue<LuceneIndexInputSplit> splits = new PriorityQueue<LuceneIndexInputSplit>();
    splits.add(new LuceneIndexInputSplit(Lists.newArrayList(new Path("/index/1")), 1500L));

    List<InputSplit> combined = lif.combineSplits(splits, 1000L, 10000L);
    assertEquals(1, combined.size());

    List<Path> dirs = ((LuceneIndexInputSplit) combined.get(0)).getIndexDirs();
View Full Code Here

    PriorityQueue<LuceneIndexInputSplit> splits = new PriorityQueue<LuceneIndexInputSplit>();
    String[] paths = new String[]{"/index/1", "/index/2", "/index/3"};
    Long[] sizes = new Long[]{1500L, 1501L, 1502L};
    for (int i = 0; i < paths.length; i++) {
      splits.add(new LuceneIndexInputSplit(Lists.newArrayList(new Path(paths[i])), sizes[i]));
    }

    List<InputSplit> combined = lif.combineSplits(splits, 1000L, 10000L);
    assertEquals(3, combined.size());
View Full Code Here

          break;
        default:
          sizes[i] = 1L;
          paths[i] = "/index/small-" + i;
      }
      splits.add(new LuceneIndexInputSplit(Lists.newArrayList(new Path(paths[i])), sizes[i]));
    }

    List<InputSplit> combined = lif.combineSplits(splits, 150L, 10L);
    assertEquals(12, combined.size());

    for (int i = 0; i < 9; i++) {
      LuceneIndexInputSplit split = (LuceneIndexInputSplit) combined.get(i);
      assertEquals(10L, split.getIndexDirs().size());
      assertEquals(10L, split.getLength());
      for (Path p : split.getIndexDirs()) {
        assertTrue(p.toString().startsWith("/index/small-"));
      }
    }

    LuceneIndexInputSplit split = (LuceneIndexInputSplit) combined.get(9);
    assertEquals(8, split.getIndexDirs().size());
    assertEquals(107, split.getLength());
    for (int i = 0; i < 7; i++) {
      assertTrue(split.getIndexDirs().get(i).toString().startsWith("/index/small-"));
    }
    assertEquals("/index/100", split.getIndexDirs().get(7).toString());

    split = (LuceneIndexInputSplit) combined.get(10);
    assertEquals(1, split.getIndexDirs().size());
    assertEquals(300, split.getLength());
    assertEquals("/index/300", split.getIndexDirs().get(0).toString());

    split = (LuceneIndexInputSplit) combined.get(11);
    assertEquals(1, split.getIndexDirs().size());
    assertEquals(500, split.getLength());
    assertEquals("/index/500", split.getIndexDirs().get(0).toString());
  }
View Full Code Here

  private void testLuceneIndexRecordReader(ArrayList<String> queryStrings,
      ArrayList<Path> indexPaths,
      ArrayList<ArrayList<ArrayList<Integer>>> indexesQueriesDocIds)
      throws Exception {

    LuceneIndexInputSplit split = createStrictMock(LuceneIndexInputSplit.class);
    expect(split.getIndexDirs()).andReturn(indexPaths);
    replay(split);

    Configuration conf = new Configuration();
    TaskAttemptContext context = createStrictMock(TaskAttemptContext.class);
    expect(HadoopCompat.getConfiguration(context)).andStubReturn(conf);
View Full Code Here

    LuceneIndexInputFormat.setInputPaths(inputPaths, conf);

    lif.loadConfig(conf);
    PriorityQueue<LuceneIndexInputSplit> splits = lif.findSplits(conf);
    LuceneIndexInputSplit split;
    split = splits.poll();
    assertEquals(4, split.getLength());
    assertTrue(split.getIndexDirs().get(0).toString().endsWith("sample_indexes/index-1"));

    split = splits.poll();
    assertEquals(6, split.getLength());
    assertTrue(split.getIndexDirs().get(0).toString().endsWith("sample_indexes/more-indexes/index-3"));

    split = splits.poll();
    assertEquals(20, split.getLength());
    assertTrue(split.getIndexDirs().get(0).toString().endsWith("sample_indexes/index-2"));

    assertTrue(splits.isEmpty());
  }
View Full Code Here

    LuceneIndexInputFormat.setMaxCombinedIndexSizePerSplitBytes(15L, conf);
    JobContext jobContext = createStrictMock(JobContext.class);
    expect(HadoopCompat.getConfiguration(jobContext)).andStubReturn(conf);
    replay(jobContext);
    List<InputSplit> splits = lif.getSplits(jobContext);
    LuceneIndexInputSplit split = (LuceneIndexInputSplit) splits.get(0);
    assertEquals(2, split.getIndexDirs().size());
    assertTrue(split.getIndexDirs().get(0).toString().endsWith("sample_indexes/index-1"));
    assertTrue(split.getIndexDirs().get(1).toString()
        .endsWith("sample_indexes/more-indexes/index-3"));
    split = (LuceneIndexInputSplit) splits.get(1);
    assertEquals(1, split.getIndexDirs().size());
    assertTrue(split.getIndexDirs().get(0).toString().endsWith("sample_indexes/index-2"));
  }
View Full Code Here

TOP

Related Classes of com.twitter.elephantbird.mapreduce.input.LuceneIndexInputFormat.LuceneIndexInputSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.