Package ivory.ffg.data

Source Code of ivory.ffg.data.DocumentVectorTest

package ivory.ffg.data;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import junit.framework.JUnit4TestAdapter;

import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import ivory.core.data.document.IntDocVector;
import ivory.core.data.document.LazyIntDocVector;

public class DocumentVectorTest {
  private static final String[] documentVectorClass = new String[] {
    "ivory.ffg.data.DocumentVectorHashedArray",
    "ivory.ffg.data.DocumentVectorMiniInvertedIndex",
    "ivory.ffg.data.DocumentVectorPForDeltaArray",
    "ivory.ffg.data.DocumentVectorVIntArray"
  };

  private static IntDocVector intDocVector;
  private static final int[] document = new int[500];
  private static final SortedMap<Integer, int[]> indexedDocument =
    new TreeMap<Integer, int[]>();
  private static int[] terms;

  @BeforeClass public static void setUp() throws Exception {
    Map<Integer, List<Integer>> map = Maps.newHashMap();
    for(int i = 0; i < document.length; i++) {
      document[i] = (int) (Math.random() * 70000) + 1;
      if(!map.containsKey(document[i])) {
        List<Integer> list = Lists.newArrayList();
        map.put(document[i], list);
      }
      map.get(document[i]).add(i + 1);
    }

    for(int key: map.keySet()) {
      int[] positions = new int[map.get(key).size()];
      int i = 0;
      for(int pos: map.get(key)) {
        positions[i++] = pos;
      }
      indexedDocument.put(key, positions);
    }

    terms = new int[map.size()];
    int i = 0;
    for(int key: map.keySet()) {
      terms[i++] = key;
    }

    intDocVector = new LazyIntDocVector(indexedDocument);

    ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
    DataOutputStream dataOut = new DataOutputStream(byteOut);
    intDocVector.write(dataOut);
    dataOut.close();

    ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray());
    DataInputStream dataIn = new DataInputStream(byteIn);
    intDocVector = new LazyIntDocVector();
    intDocVector.readFields(dataIn);
  }

  @Test public void testDecompressDocuments() throws Exception {
    for(String dvclass: documentVectorClass) {
      DocumentVector dv = DocumentVectorUtility.newInstance(dvclass, intDocVector);
      assertEquals(document.length, dv.getDocumentLength());

      try {
        int[] transformedDoc = dv.decompressDocument();
        int[] transformedTerms = dv.transformTerms(terms);

        int[][] positions = DocumentVectorUtility.
          getPositions(transformedDoc, transformedTerms);

        for(int i = 0; i < positions.length; i++) {
          assertEquals(indexedDocument.get(terms[i]).length, positions[i].length);
          for(int j = 0; j < positions[i].length; j++) {
            assertEquals(terms[i], document[positions[i][j] - 1]);
          }
        }
      } catch(UnsupportedOperationException e) {
        continue;
      }
    }
  }

  @Test public void testDecompressPositions() throws Exception {
    for(String dvclass: documentVectorClass) {
      DocumentVector dv = DocumentVectorUtility.newInstance(dvclass, intDocVector);

      try {
        int[][] positions = dv.decompressPositions(terms);
        if(dvclass.contains("mini")) {
          for(int i = 0; i < positions.length; i++) {
            for(int j = 0; j < positions[i].length; j++) {
              System.out.print(positions[i][j] + " ");
            }
            System.out.println();
          }
        }

        for(int i = 0; i < positions.length; i++) {
          assertEquals(indexedDocument.get(terms[i]).length, positions[i].length);
          for(int j = 0; j < positions[i].length; j++) {
            assertEquals(terms[i], document[positions[i][j] - 1]);
          }
        }
      } catch(UnsupportedOperationException e) {
        continue;
      }
    }
  }

  @Test public void testIO() throws Exception {
    for(String dvclass: documentVectorClass) {
      DocumentVector dv = DocumentVectorUtility.newInstance(dvclass, intDocVector);

      ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
      DataOutputStream dataOut = new DataOutputStream(byteOut);
      dv.write(dataOut);
      dataOut.close();

      ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray());
      DataInputStream dataIn = new DataInputStream(byteIn);
      DocumentVector dvCopy = DocumentVectorUtility.readInstance(dvclass, dataIn);

      assertEquals(dv, dvCopy);
    }
  }

  public static junit.framework.Test suite() {
    return new JUnit4TestAdapter(DocumentVectorTest.class);
  }
}
TOP

Related Classes of ivory.ffg.data.DocumentVectorTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.