Package ivory.lsh

Source Code of ivory.lsh.SignatureTest

package ivory.lsh;

import static org.junit.Assert.assertTrue;
import ivory.lsh.data.Bits;
import ivory.lsh.data.MinhashSignature;
import ivory.lsh.data.NBitSignature;
import ivory.lsh.data.PermutationByBit;
import ivory.lsh.data.PermutationByBlock;
import ivory.lsh.data.Signature;

import java.io.IOException;
import java.util.List;
import java.util.PriorityQueue;

import junit.framework.JUnit4TestAdapter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.junit.Test;

import edu.umd.cloud9.io.SequenceFileUtils;
import edu.umd.cloud9.io.array.ArrayListOfIntsWritable;
import edu.umd.cloud9.io.pair.PairOfWritables;

public class SignatureTest {
  private static final String TMP_FILENAME1 = "tmp1.out";
  private static int D = 20;

  private NBitSignature getRandomSignature() {
    NBitSignature s = new NBitSignature(D);
    for (int i = 0; i < s.size(); i++) {
      s.set(i, (Math.random() > 0.5 ? true : false));
    }
    return s;
  }

  private MinhashSignature getRandomMinhashSignature() {
    MinhashSignature s = new MinhashSignature(D);
    for (int i = 0; i < s.size(); i++) {
      s.add((int) (Math.random() * 39000));
    }
    return s;
  }

  @Test
  public void testPermuteBit() throws IOException {
    PermutationByBit p = new PermutationByBit(D);

    NBitSignature s = getRandomSignature();

    int cntBits = s.countSetBits();

    int loopcnt = 0;
    NBitSignature permutedS = new NBitSignature(D);
    while (loopcnt++ < 100) {
      ArrayListOfIntsWritable a = p.nextPermutation();
      s.perm(a, permutedS);
      System.out.println(permutedS.countSetBits());
      assertTrue(cntBits == permutedS.countSetBits());
      // System.out.println(permutedS);
    }
  }

  @Test
  public void testPermuteBlk() throws IOException {
    PermutationByBlock p = new PermutationByBlock(64);
    int tempD = D;
    D = 64;
    NBitSignature s = getRandomSignature();
    System.out.println(s + "\n========================");

    int cntBits = s.countSetBits();
    System.out.println(s.countSetBits());

    int loopcnt = 0;
    NBitSignature permutedS = new NBitSignature(D);
    while (true) {
      try {
        ArrayListOfIntsWritable a = p.nextPermutation();
        s.perm(a, permutedS);
        System.out.println((loopcnt++) + "\n" + permutedS);
        assertTrue(cntBits == permutedS.countSetBits());
        // System.out.println(permutedS);
      } catch (RuntimeException e) {
        D = tempD;
        return;
      }
    }
  }

  @Test
  public void testReadWrite() throws IOException {
    NBitSignature s = getRandomSignature();
    NBitSignature s2 = getRandomSignature();

    FileSystem fs;
    SequenceFile.Writer w;
    Configuration conf = new Configuration();

    fs = FileSystem.get(conf);
    w = SequenceFile.createWriter(fs, conf, new Path(TMP_FILENAME1),
          IntWritable.class, NBitSignature.class);
    w.append(new IntWritable(1), s);
    w.append(new IntWritable(2), s2);
    w.close();

    List<PairOfWritables<WritableComparable, Writable>> listOfKeysPairs =
        SequenceFileUtils.readFile(new Path(TMP_FILENAME1));
    fs.delete(new Path(TMP_FILENAME1), true);

    NBitSignature read1 = (NBitSignature) listOfKeysPairs.get(0).getRightElement();
    NBitSignature read2 = (NBitSignature) listOfKeysPairs.get(1).getRightElement();

    assertTrue(read1.toString().equals(s.toString()));
    assertTrue(read2.toString().equals(s2.toString()));

    // System.out.println(read1.toString());
    // System.out.println(read2.toString());
  }

  public void testSignatureSizeOnDisk() throws IOException {
    FileSystem fs;
    SequenceFile.Writer w;
    Configuration conf = new Configuration();

    fs = FileSystem.get(conf);
    w = SequenceFile.createWriter(fs, conf, new Path(TMP_FILENAME1),
          IntWritable.class, NBitSignature.class);
    for (int i = 0; i < 100; i++) {
      NBitSignature s = new NBitSignature(64);
      w.append(new IntWritable(1), s);
    }
    w.close();

    fs.delete(new Path(TMP_FILENAME1), true);
  }

  public void testWrite() throws IOException {
    NBitSignature s = new NBitSignature(1000);

    FileSystem fs;
    SequenceFile.Writer w;
    Configuration conf = new Configuration();

    try {
      fs = FileSystem.get(conf);
      w = SequenceFile.createWriter(fs, conf, new Path("test"),
          IntWritable.class, NBitSignature.class);
      int i = 0;
      while (i++ < 100) {
        if (i % 100000 == 0)
          System.out.println(i + "=" + s);
        w.append(new IntWritable(i), s);
      }
      w.close();
    } catch (IOException e) {
      e.printStackTrace();
    }

  }

  @Test
  public void testBasic() {
    NBitSignature s = new NBitSignature(D);
    s.set(0, true);
    s.set(1, false);
    assertTrue(s.get(0));
    assertTrue(!s.get(1));

    s.set(0, false);
    assertTrue(!s.get(0));
  }

  @Test
  public void testHammingDistance() {
    Bits b1 = new Bits("11100011010111101010");// 11000001011100011110001111100000101001011110111111010111101100101111101100110001110000101000011010001000001001111010111001101001100100011100101010111100000110001101001001010000100001111000000110011110001010101110001001001111010111010111100011001111111000110110000010001100111000001010010000110000000010100101110011001110111011110011010001000110110100111000001001111000111110111111100101011000010110000110100101001101000001110001110110010101000101001001000000000110110101001011111010111010101010001010010000111000001100100101110001111111011100001101011101000011110011111111010001001100001100111110000010111110101010000000101100011110011000010100001011101011001111111011011110010011110101000010001110011100111100101000110111010100011111101000100111010111111100001110110001011010110000000011100000001011101100000001100110010100100000011111001110010101111100100100000110010100010011110111001101110010111010001101110110001010000000000101010010011111010110000110110111100101110010100111");
    NBitSignature s1 = new NBitSignature(b1);
    Bits b2 = new Bits("11101111111111110000");// 01110010110000000101011111000000101001011111111000000110011010111100100111110000010010010101001101101000100100010111111101101001000010010101111110111100010110000100100000010101100010001000100110110110001001100100000111000100010111000101101001001011110001110011000000000010100100001011001110100010001100000100100000111110111110101011000101000111111010001000001101011110100001110110110111110100000010011100000100101101000010110001100110110111000001001000011110000100011001010100100011111110101010000100110000100010000000100101110100011111101110101001011001100100010011111011000110101000010101110000101111100100101001011111110110100110011010000111111001001010100111100111000100000000010011000010101110001101011110110000110011010111000110100100000111001111110101011010110110001000111011111010100010011001100110001000110110111100011101011111001111100101000000010000010111011011011101101100100110111010101111010101110010111011101100011111110010110110010010101010000101101000111100010110");
    NBitSignature s2 = new NBitSignature(b2);

    assertTrue(7 == s1.hammingDistance(s2));
    assertTrue(7 == s2.hammingDistance(s1));
  }

  public void benchmark() {
    int origD = this.D;
    this.D = 1000;
    NBitSignature s1 = this.getRandomSignature();
    NBitSignature s2 = this.getRandomSignature();
    long time = System.currentTimeMillis();
    int i = 0;
    while (i++ < 15000000) {
      s1.hammingDistance(s2, 400);
    }
    System.out.println("Bit signatures finished in " + (System.currentTimeMillis() - time) / 1000.0);

    this.D = 31;
    MinhashSignature m1 = this.getRandomMinhashSignature();
    MinhashSignature m2 = this.getRandomMinhashSignature();
    time = System.currentTimeMillis();
    i = 0;
    while (i++ < 3000000) {
      m1.hammingDistance(m2);
    }
    System.out.println("Minhash signatures finished in " + (System.currentTimeMillis() - time)
        / 1000.0);

    this.D = origD;
  }

  @Test
  public void testCompare() {
    Bits b1 = new Bits("01111111111111111111111111111110");
    Signature s1 = new NBitSignature(b1);
    Bits b2 = new Bits("10000000000000000000000000000001");
    Signature s2 = new NBitSignature(b2);

    assertTrue(s1.compareTo(s2) + "", s1.compareTo(s2) < 0);
    assertTrue(s2.compareTo(s1) + "", s2.compareTo(s1) > 0);
    assertTrue(s1.compareTo(s1) + "", s1.compareTo(s1) == 0);
    assertTrue(s2.compareTo(s2) + "", s2.compareTo(s2) == 0);

  }

  @Test
  public void testSort() {
    // 00000000
    NBitSignature s1 = new NBitSignature(64);
    for (int i = 0; i < 64; i++) {
      s1.set(i, false);
    }

    // 10000000
    NBitSignature s2 = new NBitSignature(64);
    s2.set(0, true);

    // 01000000
    NBitSignature s3 = new NBitSignature(64);
    s3.set(1, true);

    // 000100000
    NBitSignature s4 = new NBitSignature(64);
    s4.set(3, true);

    PriorityQueue<NBitSignature> sorted = new PriorityQueue<NBitSignature>();
    sorted.add(s1);
    sorted.add(s2);
    sorted.add(s3);
    sorted.add(s4);

    assertTrue(sorted.poll() == s1);
    assertTrue(sorted.poll() == s4);
    assertTrue(sorted.poll() == s3);
    assertTrue(sorted.poll() == s2);

  }

  // @Test
  // public void testGetSlide(){
  // BitsSignature s = new BitsSignature(10);
  // BitsSignature s2 = new BitsSignature(10);
  //
  // Slide slide = s.getSlide(0, 5);
  // Slide slide2 = s.getSlide(0, 5);
  //
  // System.out.println(slide);
  // System.out.println(slide.hashCode());
  // System.out.println(slide2);
  // System.out.println(slide2.hashCode());
  //
  //   
  // System.out.println(-299566668 % 100);
  // System.out.println(Integer.MIN_VALUE);
  //   
  //   
  // int i=0, j=0;
  // float sum=0;
  // while(i<100000000){
  // while(j<100000000){
  // float f1 = (float) Math.random();
  // float f2 = (float) Math.random();
  // float f = (f1*f2);
  // sum = sum + f;
  // j++;
  // }
  // i++;
  // }
  //
  //   
  // }

  @Test
  public void testSubSignature() {
    PermutationByBit p = new PermutationByBit(D);
    ArrayListOfIntsWritable a = p.nextPermutation();

    for (int i = 0; i < 100000; i++) {

      NBitSignature s = getRandomSignature();

      // System.out.println(s);

      NBitSignature slide = s.getSubSignature(0, D / 2);
      NBitSignature slide2 = s.getSubSignature(D / 2 + 1, D - 1);
      // System.out.println(slide+""+slide2);

      assertTrue(s.toString().equals(slide.toString() + slide2.toString()));
    }
    System.out.println("done");

    // System.out.println(slide);
    // System.out.println(slide2);

  }

  public static junit.framework.Test suite() {
    return new JUnit4TestAdapter(SignatureTest.class);
  }
}
TOP

Related Classes of ivory.lsh.SignatureTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.