Package ivory.lsh

Source Code of ivory.lsh.MinhashSignatureTest

package ivory.lsh;

import static org.junit.Assert.assertTrue;
import ivory.lsh.data.MinhashSignature;
import ivory.lsh.data.PermutationByBit;
import java.io.IOException;
import java.util.List;
import junit.framework.JUnit4TestAdapter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.junit.Test;
import edu.umd.cloud9.io.SequenceFileUtils;
import edu.umd.cloud9.io.array.ArrayListOfIntsWritable;
import edu.umd.cloud9.io.pair.PairOfWritables;

public class MinhashSignatureTest {
  private static final String TMP_FILENAME1 = "tmp1.out";

  private static int D = 20, vocabSize = 1000;

  private MinhashSignature getRandomSignature() {
    MinhashSignature s = new MinhashSignature(D);
    for (int i = 0; i < D; i++) {
      int elt = (int) (Math.random() * vocabSize);
      s.add(elt);
    }
    return s;
  }

  @Test
  public void testPermute() throws IOException {
    PermutationByBit p = new PermutationByBit(D);

    MinhashSignature s = getRandomSignature();
    System.out.println(s);

    int loopcnt = 0;
    MinhashSignature permutedS = new MinhashSignature(D);
    while (loopcnt++ < 10) {
      ArrayListOfIntsWritable a = p.nextPermutation();
      s.perm(a, permutedS);
      for (int i = 0; i < s.size(); i++) {
        assertTrue(permutedS.containsTerm(s.get(i)));
      }
      assertTrue(permutedS.size() == s.size());
      System.out.println(permutedS);
    }
  }

  @Test
  public void testReadWrite() throws IOException {
    MinhashSignature s = getRandomSignature();
    MinhashSignature s2 = getRandomSignature();

    FileSystem fs;
    SequenceFile.Writer w;
    Configuration conf = new Configuration();

    try {
      fs = FileSystem.get(conf);
      w = SequenceFile.createWriter(fs, conf, new Path(TMP_FILENAME1),
          IntWritable.class, MinhashSignature.class);
      w.append(new IntWritable(1), s);
      w.append(new IntWritable(2), s2);
      w.close();
    } catch (IOException e) {
      e.printStackTrace();
    }

    List<PairOfWritables<WritableComparable, Writable>> listOfKeysPairs = SequenceFileUtils
        .readFile(new Path(TMP_FILENAME1));
    FileSystem.get(conf).delete(new Path(TMP_FILENAME1), true);

    MinhashSignature read1 = (MinhashSignature) listOfKeysPairs.get(0).getRightElement();
    MinhashSignature read2 = (MinhashSignature) listOfKeysPairs.get(1).getRightElement();

    assertTrue(read1.toString().equals(s.toString()));
    assertTrue(read2.toString().equals(s2.toString()));

    System.out.println(read1.toString());
    System.out.println(read2.toString());
  }

  public void testSignatureSizeOnDisk() throws IOException {
    FileSystem fs;
    SequenceFile.Writer w;
    Configuration conf = new Configuration();

    try {
      MinhashSignature s = getRandomSignature();
      fs = FileSystem.get(conf);
      w = SequenceFile.createWriter(fs, conf, new Path("test2"),
          IntWritable.class, MinhashSignature.class);
      for (int i = 0; i < 1000000; i++) {
        w.append(new IntWritable(1), s);
      }
      w.close();
    } catch (IOException e) {
      e.printStackTrace();
    }

  }

  @Test
  public void testBasic() {
    MinhashSignature s = new MinhashSignature(D);

    s.add(1);
    s.add(2);
    assertTrue(s.get(0) == 1);
    assertTrue(s.get(1) == 2);

    s.set(0, 3);
    assertTrue(s.get(0) == 3);
  }

  @Test
  public void testHammingDistance() {
    MinhashSignature s1 = new MinhashSignature(D);
    MinhashSignature s2 = new MinhashSignature(D);
    s1.add(1);
    s1.add(2);
    s1.add(3);
    s1.add(4);
    s1.add(5);

    s2.add(3);
    s2.add(2);
    s2.add(5);
    s2.add(4);
    s2.add(1);

    assertTrue(s1.hammingDistance(s2) == 3);
    assertTrue(s1.hammingDistance(s2, 2) == 3);
    assertTrue(s1.hammingDistance(s2, 5) == 3);

    for (int i = 0; i < 1000; i++) {
      s1.hammingDistance(s2);
    }
    System.out.println(s1.hammingDistance(s2));

  }

  @Test
  public void testCompare() {
    MinhashSignature s1 = new MinhashSignature(D);
    MinhashSignature s2 = new MinhashSignature(D);

    s1.add(1);
    s1.add(2);
    s1.add(3);
    s1.add(4);
    s1.add(5);

    s2.add(3);
    s2.add(2);
    s2.add(5);
    s2.add(4);
    s2.add(1);

    assertTrue(s1.compareTo(s2) + "", s1.compareTo(s2) < 0);
    assertTrue(s2.compareTo(s1) + "", s2.compareTo(s1) > 0);
    assertTrue(s1.compareTo(s1) + "", s1.compareTo(s1) == 0);
    assertTrue(s2.compareTo(s2) + "", s2.compareTo(s2) == 0);

  }

  @Test
  public void testSubSignature() {
    for (int i = 0; i < 100; i++) {

      MinhashSignature s = getRandomSignature();

      System.out.println(s);

      MinhashSignature slide = s.getSubSignature(0, D / 2);
      MinhashSignature slide2 = s.getSubSignature(D / 2 + 1, D - 1);
      System.out.println(slide + "," + slide2);

      assertTrue(s.toString().equals(slide.toString() + "," + slide2.toString()));
    }
    System.out.println("done");
  }
 
  public static junit.framework.Test suite() {
    return new JUnit4TestAdapter(MinhashSignatureTest.class);
  }
}
TOP

Related Classes of ivory.lsh.MinhashSignatureTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.