/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.vectorizer.encoders;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
import org.junit.Test;
public class InteractionValueEncoderTest extends MahoutTestCase {
@Test
public void testAddToVector() {
WordValueEncoder wv = new StaticWordValueEncoder("word");
ContinuousValueEncoder cv = new ContinuousValueEncoder("cont");
InteractionValueEncoder enc = new InteractionValueEncoder("interactions", wv, cv);
Vector v1 = new DenseVector(200);
enc.addInteractionToVector("a","1.0",1.0, v1);
int k = enc.getProbes();
// should set k distinct locations to 1
assertEquals((float) k, v1.norm(1), 0);
assertEquals(1.0, v1.maxValue(), 0);
// adding same interaction again should increment weights
enc.addInteractionToVector("a","1.0",1.0,v1);
assertEquals((float) k*2, v1.norm(1), 0);
assertEquals(2.0, v1.maxValue(), 0);
Vector v2 = new DenseVector(20000);
enc.addInteractionToVector("a","1.0",1.0,v2);
wv.addToVector("a", v2);
cv.addToVector("1.0", v2);
k = enc.getProbes();
//this assumes no hash collision
assertEquals((float) (k + wv.getProbes()+cv.getProbes()), v2.norm(1), 1.0e-3);
}
@Test
public void testAddToVectorUsesProductOfWeights() {
WordValueEncoder wv = new StaticWordValueEncoder("word");
ContinuousValueEncoder cv = new ContinuousValueEncoder("cont");
InteractionValueEncoder enc = new InteractionValueEncoder("interactions", wv, cv);
Vector v1 = new DenseVector(200);
enc.addInteractionToVector("a","0.9",0.5, v1);
int k = enc.getProbes();
// should set k distinct locations to 0.9*0.5
assertEquals((float) k*0.5*0.9, v1.norm(1), 0);
assertEquals(0.5*0.9, v1.maxValue(), 0);
}
@Test
public void testAddToVectorWithTextValueEncoder() {
WordValueEncoder wv = new StaticWordValueEncoder("word");
TextValueEncoder tv = new TextValueEncoder("text");
InteractionValueEncoder enc = new InteractionValueEncoder("interactions", wv, tv);
Vector v1 = new DenseVector(200);
enc.addInteractionToVector("a","some text here",1.0, v1);
int k = enc.getProbes();
// should interact "a" with each of "some","text" and "here"
assertEquals((float) k*3, v1.norm(1), 0);
}
}