Package org.apache.mahout.utils.vectors

Source Code of org.apache.mahout.utils.vectors.VectorHelperTest

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.utils.vectors;

import com.google.common.collect.Iterables;

import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.junit.Before;
import org.junit.Test;

import java.util.Random;

public final class VectorHelperTest extends MahoutTestCase {

  private static final int NUM_DOCS = 100;

  private Path inputPathOne;
  private Path inputPathTwo;

  private Configuration conf;

  @Override
  @Before
  public void setUp() throws Exception {
    super.setUp();
    conf = getConfiguration();

    inputPathOne = getTestTempFilePath("documents/docs-one.file");
    FileSystem fs = FileSystem.get(inputPathOne.toUri(), conf);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, inputPathOne, Text.class, IntWritable.class);
    try {
      Random rd = RandomUtils.getRandom();
      for (int i = 0; i < NUM_DOCS; i++) {
        // Make all indices higher than dictionary size
        writer.append(new Text("Document::ID::" + i), new IntWritable(NUM_DOCS + rd.nextInt(NUM_DOCS)));
      }
    } finally {
      Closeables.close(writer, false);
    }

    inputPathTwo = getTestTempFilePath("documents/docs-two.file");
    fs = FileSystem.get(inputPathTwo.toUri(), conf);
    writer = new SequenceFile.Writer(fs, conf, inputPathTwo, Text.class, IntWritable.class);
    try {
      Random rd = RandomUtils.getRandom();
      for (int i = 0; i < NUM_DOCS; i++) {
        // Keep indices within number of documents
        writer.append(new Text("Document::ID::" + i), new IntWritable(rd.nextInt(NUM_DOCS)));
      }
    } finally {
      Closeables.close(writer, false);
    }
  }

  @Test
  public void testJsonFormatting() throws Exception {
    Vector v = new SequentialAccessSparseVector(10);
    v.set(2, 3.1);
    v.set(4, 1.0);
    v.set(6, 8.1);
    v.set(7, -100);
    v.set(9, 12.2);
    String UNUSED = "UNUSED";
    String[] dictionary = {
        UNUSED, UNUSED, "two", UNUSED, "four", UNUSED, "six", "seven", UNUSED, "nine"
    };

    assertEquals("sorted json form incorrect: ", "{nine:12.2,six:8.1,two:3.1}",
        VectorHelper.vectorToJson(v, dictionary, 3, true));
    assertEquals("unsorted form incorrect: ", "{two:3.1,four:1.0}",
        VectorHelper.vectorToJson(v, dictionary, 2, false));
    assertEquals("sorted json form incorrect: ", "{nine:12.2,six:8.1,two:3.1,four:1.0}",
        VectorHelper.vectorToJson(v, dictionary, 4, true));
    assertEquals("sorted json form incorrect: ", "{nine:12.2,six:8.1,two:3.1,four:1.0,seven:-100.0}",
        VectorHelper.vectorToJson(v, dictionary, 5, true));
    assertEquals("sorted json form incorrect: ", "{nine:12.2,six:8.1}",
        VectorHelper.vectorToJson(v, dictionary, 2, true));
    assertEquals("unsorted form incorrect: ", "{two:3.1,four:1.0}",
        VectorHelper.vectorToJson(v, dictionary, 2, false));
  }

  @Test
  public void testTopEntries() throws Exception {
    Vector v = new SequentialAccessSparseVector(10);
    v.set(2, 3.1);
    v.set(4, 1.0);
    v.set(6, 8.1);
    v.set(7, -100);
    v.set(9, 12.2);
    v.set(1, 0.0);
    v.set(3, 0.0);
    v.set(8, 2.7);
    // check if sizeOFNonZeroElementsInVector = maxEntries
    assertEquals(6, VectorHelper.topEntries(v, 6).size());
    // check if sizeOfNonZeroElementsInVector < maxEntries
    assertTrue(VectorHelper.topEntries(v, 9).size() < 9);
    // check if sizeOfNonZeroElementsInVector > maxEntries
    assertTrue(VectorHelper.topEntries(v, 5).size() < Iterables.size(v.nonZeroes()));
  }

  @Test
  public void testTopEntriesWhenAllZeros() throws Exception {
    Vector v = new SequentialAccessSparseVector(10);
    v.set(2, 0.0);
    v.set(4, 0.0);
    v.set(6, 0.0);
    v.set(7, 0);
    v.set(9, 0.0);
    v.set(1, 0.0);
    v.set(3, 0.0);
    v.set(8, 0.0);
    assertEquals(0, VectorHelper.topEntries(v, 6).size());
  }

  @Test
  public void testLoadTermDictionary() throws Exception {
    // With indices higher than dictionary size
    VectorHelper.loadTermDictionary(conf, inputPathOne.toString());
    // With dictionary size higher than indices
    VectorHelper.loadTermDictionary(conf, inputPathTwo.toString());
  }
}
TOP

Related Classes of org.apache.mahout.utils.vectors.VectorHelperTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.