Package com.netflix.zeno.genericobject

Source Code of com.netflix.zeno.genericobject.JaccardMatrixPairwiseMatcher

/*
*
*  Copyright 2013 Netflix, Inc.
*
*     Licensed under the Apache License, Version 2.0 (the "License");
*     you may not use this file except in compliance with the License.
*     You may obtain a copy of the License at
*
*         http://www.apache.org/licenses/LICENSE-2.0
*
*     Unless required by applicable law or agreed to in writing, software
*     distributed under the License is distributed on an "AS IS" BASIS,
*     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*     See the License for the specific language governing permissions and
*     limitations under the License.
*
*/
package com.netflix.zeno.genericobject;

import com.netflix.zeno.diff.DiffPropertyPath;
import com.netflix.zeno.diff.DiffRecord;
import com.netflix.zeno.diff.DiffRecordValueListMap;
import com.netflix.zeno.genericobject.GenericObject.Field;

import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;

/**
* This class pulls out matches pairs of objects based on maximum jaccard similarity.<p/>
*
* For a good discussion of how jaccard similarity matrix works, see http://matpalm.com/resemblance/jaccard_distance/
*
* @author dkoszewnik
*
*/
public class JaccardMatrixPairwiseMatcher {

    private final List<Field> objects1;
    private final List<Field> objects2;

    private final float matrix[][];

    private int rowSize = 0;
    private int columnSize = 0;
    private final BitSet illegalColumns;
    private final BitSet illegalRows;

    private Field x;
    private Field y;


    public JaccardMatrixPairwiseMatcher(List<Field> objects1, List<DiffRecord> recs1, List<Field> objects2, List<DiffRecord> recs2) {
        this.objects1 = objects1;
        this.objects2 = objects2;

        columnSize = objects1.size();
        rowSize = objects2.size();

        matrix = new float[recs1.size()][recs2.size()];

        for(int i=0;i<columnSize;i++) {
            for(int j=0;j<rowSize;j++) {
                matrix[i][j] = calculateJaccardDistance(recs1.get(i), recs2.get(j));
            }
        }

        illegalColumns = new BitSet(columnSize);
        illegalRows = new BitSet();
    }

    private float calculateJaccardDistance(DiffRecord rec1, DiffRecord rec2) {
        DiffRecordValueListMap map1 = rec1.getValueListMap();
        DiffRecordValueListMap map2 = rec2.getValueListMap();

        int xorCardinality = 0;
        int unionCardinality = 0;


        for(DiffPropertyPath key : map1.keySet()) {
            List<Object> list1 = map1.getList(key);
            List<Object> list2 = map2.getList(key);

            if(list2 == null) {
                xorCardinality += list1.size();
            } else {
                list2 = new ArrayList<Object>(list2);
                for(Object o1 : list1) {
                    if(list2.contains(o1)) {
                        list2.remove(o1);
                        unionCardinality++;
                    } else {
                        xorCardinality++;
                    }
                }
            }
        }

        for(DiffPropertyPath key : map2.keySet()) {
            List<Object> list1 = map1.getList(key);

            if(list1 == null) {
                List<Object> list2 = map2.getList(key);
                xorCardinality += list2.size();
            }
        }

        if(xorCardinality == 0 && unionCardinality == 0) {
            return 0;
        }

        return ((float)xorCardinality) / ((float)(xorCardinality + unionCardinality));
    }

    public boolean nextPair() {
        int minColumn = -1;
        int minRow = -1;
        float minDistance = 1.0F;

        for(int i=0;i<columnSize;i++) {
            if(!illegalColumns.get(i)) {
                for(int j=0;j<rowSize;j++) {
                    if(!illegalRows.get(j)) {
                        if(matrix[i][j] < minDistance) {
                            minColumn = i;
                            minRow = j;
                            minDistance = matrix[i][j];
                        }
                    }
                }
            }
        }

        if(minDistance != 1.0F) {
            illegalColumns.set(minColumn);
            illegalRows.set(minRow);
            x = objects1.get(minColumn);
            y = objects2.get(minRow);
            return true;
        }

        for(int i=0;i<columnSize;i++) {
            if(!illegalColumns.get(i)) {
                illegalColumns.set(i);
                x = objects1.get(i);
                y = null;
                return true;
            }
        }


        for(int j=0;j<rowSize;j++) {
            if(!illegalRows.get(j)) {
                illegalRows.set(j);
                x = null;
                y = objects2.get(j);
                return true;
            }
        }

        x = null;
        y = null;
        return false;
    }

    public Field getX() {
        return x;
    }

    public Field getY() {
        return y;
    }

}
TOP

Related Classes of com.netflix.zeno.genericobject.JaccardMatrixPairwiseMatcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.