Package org.sindice.siren.index.codecs.block

Source Code of org.sindice.siren.index.codecs.block.TestAForCodec

/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
*  https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*  http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.sindice.siren.index.codecs.block;

import java.io.IOException;
import java.util.Set;
import java.util.TreeSet;

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.junit.Test;
import org.sindice.siren.index.codecs.CodecTestCase;
import org.sindice.siren.index.codecs.siren10.DocsFreqBlockIndexInput;
import org.sindice.siren.index.codecs.siren10.DocsFreqBlockIndexInput.DocsFreqBlockReader;
import org.sindice.siren.index.codecs.siren10.DocsFreqBlockIndexOutput;
import org.sindice.siren.index.codecs.siren10.DocsFreqBlockIndexOutput.DocsFreqBlockWriter;
import org.sindice.siren.index.codecs.siren10.Siren10BlockStreamFactory;

public class TestAForCodec extends CodecTestCase {

  private DocsFreqBlockIndexOutput getIndexOutput(final int blockSize) throws IOException {
    final Siren10BlockStreamFactory factory = new Siren10BlockStreamFactory(blockSize);
    factory.setDocsBlockCompressor(new AForBlockCompressor());
    factory.setFreqBlockCompressor(new AForBlockCompressor());
    return factory.createDocsFreqOutput(directory, "test", newIOContext(random()));
  }

  private DocsFreqBlockIndexInput getIndexInput() throws IOException {
    final Siren10BlockStreamFactory factory = new Siren10BlockStreamFactory(0);
    factory.setDocsBlockDecompressor(new AForBlockDecompressor());
    factory.setFreqBlockDecompressor(new AForBlockDecompressor());
    return factory.openDocsFreqInput(directory, "test", newIOContext(random()));
  }

  public void testReadDoc() throws IOException {

    final DocsFreqBlockIndexOutput out = this.getIndexOutput(512);
    final DocsFreqBlockWriter writer = out.getBlockWriter();

    writer.setNodeBlockIndex(out.index());
    writer.setPosBlockIndex(out.index());
    for (int i = 0; i < 11777; i++) {
      if (writer.isFull()) {
        writer.flush();
      }
      writer.write(i);
    }

    writer.flush(); // flush remaining data
    out.close();

    final DocsFreqBlockIndexInput in = this.getIndexInput();
    final DocsFreqBlockReader reader = in.getBlockReader();

    reader.setNodeBlockIndex(in.index());
    reader.setPosBlockIndex(in.index());
    for (int i = 0; i < 11777; i++) {
      if (reader.isExhausted()) {
        reader.nextBlock();
      }
      assertEquals(i, reader.nextDocument());
    }

    in.close();
  }

  public void testReadDocAndFreq() throws IOException {

    final DocsFreqBlockIndexOutput out = this.getIndexOutput(512);
    final DocsFreqBlockWriter writer = out.getBlockWriter();

    writer.setNodeBlockIndex(out.index());
    writer.setPosBlockIndex(out.index());
    for (int i = 0; i < 11777; i++) {
      if (writer.isFull()) {
        writer.flush();
      }
      writer.write(i);
      writer.writeNodeFreq(random().nextInt(10) + 1);
    }

    writer.flush(); // flush remaining data
    out.close();

    final DocsFreqBlockIndexInput in = this.getIndexInput();
    final DocsFreqBlockReader reader = in.getBlockReader();

    reader.setNodeBlockIndex(in.index());
    reader.setPosBlockIndex(in.index());
    for (int i = 0; i < 11777; i++) {
      if (reader.isExhausted()) {
        reader.nextBlock();
      }
      assertEquals(i, reader.nextDocument());
      final int frq = reader.nextNodeFreq();
      assertTrue(frq > 0);
      assertTrue(frq <= 10);
    }

    in.close();
  }

  public void testRandomDoc() throws IOException {

    final int blockSize = BLOCK_SIZES[random().nextInt(BLOCK_SIZES.length)];
    final DocsFreqBlockIndexOutput out = this.getIndexOutput(blockSize);
    final DocsFreqBlockWriter writer = out.getBlockWriter();

    writer.setNodeBlockIndex(out.index());
    writer.setPosBlockIndex(out.index());

    // generate doc ids
    final Set<Integer> docIds = new TreeSet<Integer>();
    final int lenght = (int) this.nextLong(128000, 512000);
    for (int i = 0; i < lenght; i++) {
      docIds.add((int) this.nextLong(0, ((1L << 31) - 1)));
    }

    for (final int docId : docIds) {
      if (writer.isFull()) {
        writer.flush();
      }
      writer.write(docId);
    }

    writer.flush(); // flush remaining data
    out.close();

    final DocsFreqBlockIndexInput in = this.getIndexInput();
    final DocsFreqBlockReader reader = in.getBlockReader();

    reader.setNodeBlockIndex(in.index());
    reader.setPosBlockIndex(in.index());
    for (final int docId : docIds) {
      if (reader.isExhausted()) {
        reader.nextBlock();
      }
      assertEquals(docId, reader.nextDocument());
    }

    in.close();
  }

  @Test
  public void testIntegerRange() throws Exception {
    this.doTestIntegerRange(1, 32, new AForBlockCompressor(), new AForBlockDecompressor());
  }

  @Test
  public void testShortPostingList() throws IOException {

    final DocsFreqBlockIndexOutput out = this.getIndexOutput(512);
    final DocsFreqBlockWriter writer = out.getBlockWriter();

    writer.setNodeBlockIndex(out.index());
    writer.setPosBlockIndex(out.index());
    for (int i = 0; i < 5; i++) {
      if (writer.isFull()) {
        writer.flush();
      }
      writer.write(i);
    }

    writer.flush(); // flush remaining data
    out.close();

    final DocsFreqBlockIndexInput in = this.getIndexInput();
    final DocsFreqBlockReader reader = in.getBlockReader();

    reader.setNodeBlockIndex(in.index());
    reader.setPosBlockIndex(in.index());
    for (int i = 0; i < 5; i++) {
      if (reader.isExhausted()) {
        reader.nextBlock();
      }
      assertEquals(i, reader.nextDocument());
    }

    in.close();
  }

  @Test
  public void testIncompleteFrame() throws IOException {
    final BlockCompressor compressor = new AForBlockCompressor();

    final IntsRef input = new IntsRef(64);
    final BytesRef output = new BytesRef(compressor.maxCompressedSize(64));

    // fill first part with 1
    for (int i = 0; i < 33; i++) {
      input.ints[i] = 1;
    }

    // fill the rest with random numbers
    for (int i = 33; i < 64; i++) {
      input.ints[i] = (int) this.nextLong(64, Short.MAX_VALUE);
    }

    input.offset = 0;
    input.length = 33;

    // the random numbers after the end of the input array should not impact
    // compression
    compressor.compress(input, output);

    // should be frame code 1 : 32 ints encoded with 1 bits
    assertEquals(1, output.bytes[0]);
    // followed by 4 bytes at 255
    assertEquals(0xFF, output.bytes[1] & 0xFF);
    assertEquals(0xFF, output.bytes[2] & 0xFF);
    assertEquals(0xFF, output.bytes[3] & 0xFF);
    assertEquals(0xFF, output.bytes[4] & 0xFF);
    // then frame code 34 : 16 ints encoded with 1 bits
    assertEquals(34, output.bytes[5]);
    // followed by 1 byte with at least 128 and a second byte with 0
    assertEquals(128, output.bytes[6] & 0x80);
    assertEquals(0, output.bytes[7] & 0xFF);
    // followed by frame code 33: 16 ints encoded with 0 bits
    assertEquals(33, output.bytes[8]);
  }

}
TOP

Related Classes of org.sindice.siren.index.codecs.block.TestAForCodec

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.