Package org.data2semantics.exp.old.kernels

Source Code of org.data2semantics.exp.old.kernels.RDFWLSubTreeKernelString$EdgeIndexPair

package org.data2semantics.exp.old.kernels;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.data2semantics.proppred.kernels.Bucket;
import org.data2semantics.proppred.kernels.KernelUtils;
import org.data2semantics.proppred.kernels.rdfgraphkernels.RDFFeatureVectorKernel;
import org.data2semantics.proppred.kernels.rdfgraphkernels.RDFGraphKernel;
import org.data2semantics.proppred.learners.SparseVector;
import org.data2semantics.tools.graphs.Edge;
import org.data2semantics.tools.graphs.Vertex;
import org.data2semantics.tools.rdf.RDFDataSet;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;

import edu.uci.ics.jung.graph.DirectedGraph;
import edu.uci.ics.jung.graph.DirectedSparseMultigraph;
import edu.uci.ics.jung.graph.util.EdgeType;

/**
* This class implements a WL kernel directly on an RDF graph. The difference with a normal WL kernel is that subgraphs are not
* explicitly extracted. However we use the idea of subgraph implicitly by tracking for each vertex/edge the distance from an instance vertex.
* For one thing, this leads to the fact that 1 black list is applied to the entire RDF graph, instead of 1 (small) blacklist per graph.
*
*
*
* @author Gerben
*
*/
public class RDFWLSubTreeKernelString implements RDFGraphKernel, RDFFeatureVectorKernel {
  private static final String ROOT_LABEL = "1";
  private static final String BLANK_VERTEX_LABEL = "1";
  private static final String BLANK_EDGE_LABEL   = "2";

  private Map<String, String> labelMap;
  private Map<String, Vertex<Map<Integer,String>>> instanceVertices;
  private Map<String, Map<Vertex<Map<Integer,String>>, Integer>> instanceVertexIndexMap;
  private Map<String, Map<Edge<Map<Integer,String>>, Integer>> instanceEdgeIndexMap;

  private int labelCounter;
  private int depth;
  private int iterations;
  private boolean inference;
  private boolean blankLabels;
  private String label;
  private boolean normalize;


  public RDFWLSubTreeKernelString(int iterations, int depth, boolean inference, boolean normalize, boolean blankLabels) {
    this(iterations, depth, inference, normalize);
    this.blankLabels = blankLabels;

  }

  public RDFWLSubTreeKernelString(int iterations, int depth, boolean inference, boolean normalize) {
    this.normalize = normalize;
    this.label = "RDF_WL_Kernel_" + depth + "_" + iterations;
    this.blankLabels = false;

    labelMap = new HashMap<String, String>();
    instanceVertices = new HashMap<String, Vertex<Map<Integer,String>>>();
    this.instanceVertexIndexMap = new HashMap<String, Map<Vertex<Map<Integer,String>>, Integer>>();
    this.instanceEdgeIndexMap = new HashMap<String, Map<Edge<Map<Integer,String>>, Integer>>();

    labelCounter = 2;
    this.depth = depth;
    this.inference = inference;
    this.iterations = iterations;
  }


  public RDFWLSubTreeKernelString() {
    this(2, 2, false, true);
  }



  public String getLabel() {
    return label;
  }

  public void setNormalize(boolean normalize) {
    this.normalize = normalize;
  }

  public SparseVector[] computeFeatureVectors(RDFDataSet dataset, List<Resource> instances, List<Statement> blackList) {
    SparseVector[] featureVectors = new SparseVector[instances.size()];
    for (int i = 0; i < featureVectors.length; i++) {
      featureVectors[i] = new SparseVector();
   

    DirectedGraph<Vertex<Map<Integer,String>>,Edge<Map<Integer,String>>> graph = createGraphFromRDF(dataset, instances, blackList);
    createInstanceIndexMaps(graph, instances);

    if (blankLabels) {
      setBlankLabels(graph);
    }

    computeFVs(graph, instances, Math.sqrt(1.0 / ((double) (iterations + 1))), featureVectors);

    int startLabel = 1; // start at 1, since featureVectors need to start at index 1
    for (int i = 0; i < iterations; i++) { 
      relabelGraph2MultisetLabels(graph, startLabel);
      startLabel = labelCounter;
      compressGraphLabels(graph);
      computeFVs(graph, instances, Math.sqrt((2.0 + i) / ((double) (iterations + 1))), featureVectors);
    }
    if (this.normalize) {
      featureVectors = KernelUtils.normalize(featureVectors);
    }

    return featureVectors;
  }

  public double[][] compute(RDFDataSet dataset, List<Resource> instances, List<Statement> blackList) {
    SparseVector[] featureVectors = computeFeatureVectors(dataset, instances, blackList);
    double[][] kernel = KernelUtils.initMatrix(instances.size(), instances.size());
    computeKernelMatrix(instances, featureVectors, kernel);
    return kernel;
  }


  /*
  public double[][] compute(RDFDataSet dataset, List<Resource> instances, List<Statement> blackList) {
    DirectedGraph<Vertex<Map<Integer,String>>,Edge<Map<Integer,String>>> graph = createGraphFromRDF(dataset, instances, blackList);
    createInstanceIndexMaps(graph, instances);

    if (blankLabels) {
      setBlankLabels(graph);
    }

    SparseVector[] featureVectors = new SparseVector[instances.size()];
    for (int i = 0; i < featureVectors.length; i++) {
      featureVectors[i] = new SparseVector();
    }
    computeFVs(graph, instances, 1, featureVectors);

    double[][] kernel = initMatrix(instances.size(), instances.size());
    computeKernelMatrix(instances, featureVectors, kernel, 1.0 / ((double) (iterations + 1)));


    int startLabel = 1; // start at 1, since featureVectors need to start at index 1
    for (int i = 0; i < iterations; i++) { 
      relabelGraph2MultisetLabels(graph, startLabel);
      startLabel = labelCounter;
      compressGraphLabels(graph);

      featureVectors = new SparseVector[instances.size()];
      for (int j = 0; j < featureVectors.length; j++) {
        featureVectors[j] = new SparseVector();
      } 
      computeFVs(graph, instances, 1, featureVectors);
      computeKernelMatrix(instances, featureVectors, kernel, (2.0 + i) / ((double) (iterations + 1)));
    }
    if (this.normalize) {
      kernel = Kernel.normalize(kernel);
    }
    return kernel;
  }
   */

  private DirectedGraph<Vertex<Map<Integer,String>>,Edge<Map<Integer,String>>> createGraphFromRDF(RDFDataSet dataset, List<Resource> instances, List<Statement> blackList) {
    Map<String, Vertex<Map<Integer,String>>> vertexMap = new HashMap<String, Vertex<Map<Integer, String>>>();
    Map<String, Edge<Map<Integer,String>>> edgeMap = new HashMap<String, Edge<Map<Integer, String>>>();

    DirectedGraph<Vertex<Map<Integer,String>>,Edge<Map<Integer,String>>> graph = new DirectedSparseMultigraph<Vertex<Map<Integer,String>>,Edge<Map<Integer,String>>>();

    List<Resource> queryNodes = new ArrayList<Resource>();
    List<Resource> newQueryNodes;
    List<Statement> result;

    Vertex<Map<Integer,String>> startV;
    Vertex<Map<Integer,String>> newV;
    Edge<Map<Integer,String>> newE;

    String idStr, idStr2;

    for (Resource instance : instances) {
      idStr = instance.toString();     
      // If the instance is already part of the graph (because it was retrieved for an earlier instance),
      // then we use that one, but we need to change the labels to the start label of instance nodes, for which we use ROOT_LABEL
      if (vertexMap.containsKey(idStr)) {
        startV = vertexMap.get(idStr);
        for (int di : startV.getLabel().keySet()) {
          startV.getLabel().put(di, ROOT_LABEL);
        }

        // Else we construct a new node for the instance, and label it with ROOT_LABEL for the provided depth
      } else {
        startV = new Vertex<Map<Integer,String>>(new HashMap<Integer, String>());
        vertexMap.put(idStr, startV);
        graph.addVertex(startV);
      }
      startV.getLabel().put(depth, ROOT_LABEL);
      labelMap.put(idStr, ROOT_LABEL); // This label is (re)set to ROOT_LABEL
      instanceVertices.put(idStr, startV); // So that we can reconstruct subgraphs later, we save the instance vertices

      queryNodes.add(instance);

      for (int i = depth - 1; i >= 0; i--) {
        newQueryNodes = new ArrayList<Resource>();

        for (Resource queryNode : queryNodes) {
          result = dataset.getStatements(queryNode, null, null, inference);

          for (Statement stmt : result) {

            // Process new vertex
            idStr = stmt.getObject().toString();
            if (vertexMap.containsKey(idStr)) { // existing vertex
              newV = vertexMap.get(idStr);        
              newV.getLabel().put(i, labelMap.get(idStr)); // Set the label for depth i to the already existing label for this vertex

            } else { // New vertex
              newV = new Vertex<Map<Integer,String>>(new HashMap<Integer, String>());
              labelMap.put(idStr, Integer.toString(labelCounter));
              newV.getLabel().put(i, Integer.toString(labelCounter));
              labelCounter++;
              vertexMap.put(idStr, newV);
              graph.addVertex(newV);
            }

            // Process new Edge
            idStr = stmt.toString();
            idStr2 = stmt.getPredicate().toString();
            if (edgeMap.containsKey(idStr)) { // existing edge
              newE = edgeMap.get(idStr);
              newE.getLabel().put(i, labelMap.get(idStr2)); // Set the label for depth i to the already existing label for this edge

            } else { // new edge
              newE = new Edge<Map<Integer,String>>(new HashMap<Integer,String>());
              if (!labelMap.containsKey(idStr2)) { // Edge labels are not unique, in contrast to vertex labels, thus we need to check whether it exists already
                labelMap.put(idStr2, Integer.toString(labelCounter));
                labelCounter++;
              }
              newE.getLabel().put(i, labelMap.get(idStr2));
              edgeMap.put(idStr, newE)
              graph.addEdge(newE, vertexMap.get(stmt.getSubject().toString()), newV, EdgeType.DIRECTED);
            }


            // Store the object nodes if the loop continues (i>0) and if its a Resource
            if (i > 0 && stmt.getObject() instanceof Resource) {
              newQueryNodes.add((Resource) stmt.getObject());
            }
          }
        }

        queryNodes = newQueryNodes;
      }   
    }

    // Remove edges for statements on the blackList
    for (Statement stmt : blackList) {
      graph.removeEdge(edgeMap.get(stmt.toString()));
    }
    return graph;
  }


  private void createInstanceIndexMaps(DirectedGraph<Vertex<Map<Integer,String>>, Edge<Map<Integer,String>>> graph, List<Resource> instances) {
    Vertex<Map<Integer, String>> startV;
    List<Vertex<Map<Integer, String>>> frontV, newFrontV;
    Map<Vertex<Map<Integer, String>>, Integer> vertexIndexMap;
    Map<Edge<Map<Integer, String>>, Integer> edgeIndexMap;

    for (int i = 0; i < instances.size(); i++) {       
      vertexIndexMap = new HashMap<Vertex<Map<Integer, String>>, Integer>();
      edgeIndexMap   = new HashMap<Edge<Map<Integer, String>>, Integer>();

      instanceVertexIndexMap.put(instances.get(i).toString(), vertexIndexMap);
      instanceEdgeIndexMap.put(instances.get(i).toString(), edgeIndexMap);


      // Get the start node
      startV = instanceVertices.get(instances.get(i).toString());
      frontV = new ArrayList<Vertex<Map<Integer,String>>>();
      frontV.add(startV);

      // Process the start node
      vertexIndexMap.put(startV, depth);

      for (int j = depth - 1; j >= 0; j--) {
        newFrontV = new ArrayList<Vertex<Map<Integer,String>>>();
        for (Vertex<Map<Integer, String>> qV : frontV) {
          for (Edge<Map<Integer, String>> edge : graph.getOutEdges(qV)) {
            // Process the edge, if we haven't seen it before
            if (!edgeIndexMap.containsKey(edge)) {
              edgeIndexMap.put(edge, j);
            }

            // Process the vertex if we haven't seen it before
            if (!vertexIndexMap.containsKey(graph.getDest(edge))) {
              vertexIndexMap.put(graph.getDest(edge), j);
            }

            // Add the vertex to the new front, if we go into a new round
            if (j > 0) {
              newFrontV.add(graph.getDest(edge));
            }
          }
        }
        frontV = newFrontV;
      }
    }   
  }



  private void relabelGraph2MultisetLabels(DirectedGraph<Vertex<Map<Integer,String>>, Edge<Map<Integer,String>>> graph, int startLabel) {
    Map<String, Bucket<VertexIndexPair>> bucketsV = new HashMap<String, Bucket<VertexIndexPair>>();
    Map<String, Bucket<EdgeIndexPair>> bucketsE   = new HashMap<String, Bucket<EdgeIndexPair>>();

    // Initialize buckets
    for (int i = startLabel; i < labelCounter; i++) {
      bucketsV.put(Integer.toString(i), new Bucket<VertexIndexPair>(Integer.toString(i)));
      bucketsE.put(Integer.toString(i), new Bucket<EdgeIndexPair>(Integer.toString(i)));
    }

    // 1. Fill buckets
    // Add each edge source (i.e.) start vertex to the bucket of the edge label
    for (Edge<Map<Integer,String>> edge : graph.getEdges()) {
      // for each label we add a vertex-index-pair to the bucket
      for (int index : edge.getLabel().keySet()) {
        bucketsV.get(edge.getLabel().get(index)).getContents().add(new VertexIndexPair(graph.getDest(edge), index));
      }
    }

    // Add each incident edge to the bucket of the node label
    for (Vertex<Map<Integer,String>> vertex : graph.getVertices()) {     
      Collection<Edge<Map<Integer,String>>> v2 = graph.getOutEdges(vertex)

      for (int index : vertex.getLabel().keySet()) {
        if (index > 0) { // If index is 0 then we treat it as a fringe node, thus the label will not be propagated to the edges
          for (Edge<Map<Integer,String>> e2 : v2) {
            bucketsE.get(vertex.getLabel().get(index)).getContents().add(new EdgeIndexPair(e2, index - 1));
          }
        }
      }
    } 

    // 2. add bucket labels to existing labels
    // Change the original label to a prefix label

    for (Edge<Map<Integer,String>> edge : graph.getEdges()) {
      for (int i : edge.getLabel().keySet()) {
        edge.getLabel().put(i,edge.getLabel().get(i) + "_");  
      }

    }
    for (Vertex<Map<Integer,String>> vertex : graph.getVertices()) {
      for (int i : vertex.getLabel().keySet()) {
        vertex.getLabel().put(i, vertex.getLabel().get(i) + "_");
      }
    }

    // 3. Relabel to the labels in the buckets
    for (int i = startLabel; i < labelCounter; i++) {
      // Process vertices
      Bucket<VertexIndexPair> bucketV = bucketsV.get(Integer.toString(i));     
      for (VertexIndexPair vp : bucketV.getContents()) {
        vp.getVertex().getLabel().put(vp.getIndex(), vp.getVertex().getLabel().get(vp.getIndex()) + bucketV.getLabel() + "_");
      }
      // Process edges
      Bucket<EdgeIndexPair> bucketE = bucketsE.get(Integer.toString(i));     
      for (EdgeIndexPair ep : bucketE.getContents()) {
        ep.getEdge().getLabel().put(ep.getIndex(), ep.getEdge().getLabel().get(ep.getIndex()) + bucketE.getLabel() + "_");
      }
    }
  }


  private void compressGraphLabels(DirectedGraph<Vertex<Map<Integer,String>>, Edge<Map<Integer,String>>> graph) {
    String label;

    for (Edge<Map<Integer,String>> edge : graph.getEdges()) {
      for (int i : edge.getLabel().keySet()) {
        label = labelMap.get(edge.getLabel().get(i));           
        if (label == null) {         
          label = Integer.toString(labelCounter);
          labelCounter++;
          labelMap.put(edge.getLabel().get(i), label);       
        }
        edge.getLabel().put(i, label);
      }
    }

    for (Vertex<Map<Integer,String>> vertex : graph.getVertices()) {
      for (int i : vertex.getLabel().keySet()) {
        label = labelMap.get(vertex.getLabel().get(i));
        if (label == null) {
          label = Integer.toString(labelCounter);
          labelCounter++;
          labelMap.put(vertex.getLabel().get(i), label);
        }
        vertex.getLabel().put(i, label);
      }
    }
  }



  /**
   * The computation of the feature vectors assumes that each edge and vertex is only processed once. We can encounter the same
   * vertex/edge on different depths during computation, this could lead to multiple counts of the same vertex, possibly of different
   * depth labels.
   *
   * @param graph
   * @param instances
   * @param weight
   * @param featureVectors
   */
  private void computeFVs(DirectedGraph<Vertex<Map<Integer,String>>, Edge<Map<Integer,String>>> graph, List<Resource> instances, double weight, SparseVector[] featureVectors) {
    int index;
    Map<Vertex<Map<Integer,String>>, Integer> vertexIndexMap;
    Map<Edge<Map<Integer,String>>, Integer> edgeIndexMap;

    for (int i = 0; i < instances.size(); i++) {
      featureVectors[i].setLastIndex(labelCounter - 1);
     
      vertexIndexMap = instanceVertexIndexMap.get(instances.get(i).toString());
      for (Vertex<Map<Integer,String>> vertex : vertexIndexMap.keySet()) {
        index = Integer.parseInt(vertex.getLabel().get(vertexIndexMap.get(vertex)).toString());
        featureVectors[i].setValue(index, featureVectors[i].getValue(index) + weight);
      }
      edgeIndexMap = instanceEdgeIndexMap.get(instances.get(i).toString());
      for (Edge<Map<Integer,String>> edge : edgeIndexMap.keySet()) {
        index = Integer.parseInt(edge.getLabel().get(edgeIndexMap.get(edge)).toString());
        featureVectors[i].setValue(index, featureVectors[i].getValue(index) + weight);
      }
    }
  }



  /**
   * The computation of the feature vectors assumes that each edge and vertex is only processed once. We can encounter the same
   * vertex/edge on different depths during computation, this could lead to multiple counts of the same vertex, possibly of different
   * depth labels.
   *
   *
   * @param graph
   * @param instances
   * @param startLabel
   * @param featureVectors
   */

  /*
  private void computeFeatureVectors(DirectedGraph<Vertex<Map<Integer,String>>, Edge<Map<Integer,String>>> graph, List<Resource> instances, int startLabel, SparseVector[] featureVectors) {
    int index;
    Map<Vertex<Map<Integer,String>>, Integer> vertexIndexMap;
    Map<Edge<Map<Integer,String>>, Integer> edgeIndexMap;

    for (int i = 0; i < instances.size(); i++) {
      featureVectors[i] = new SparseVector();
      //featureVectors[i] = new double[labelCounter - startLabel];
      //Arrays.fill(featureVectors[i], 0.0);

      vertexIndexMap = instanceVertexIndexMap.get(instances.get(i).toString());
      for (Vertex<Map<Integer,String>> vertex : vertexIndexMap.keySet()) {
        index = Integer.parseInt(vertex.getLabel().get(vertexIndexMap.get(vertex))) - startLabel;
        featureVectors[i].setValue(index, featureVectors[i].getValue(index) + 1);
        //featureVectors[i][index] += 1.0;
      }
      edgeIndexMap = instanceEdgeIndexMap.get(instances.get(i).toString());
      for (Edge<Map<Integer,String>> edge : edgeIndexMap.keySet()) {
        index = Integer.parseInt(edge.getLabel().get(edgeIndexMap.get(edge))) - startLabel;
        featureVectors[i].setValue(index, featureVectors[i].getValue(index) + 1);
        //featureVectors[i][index] += 1.0;
      }
    }
  }*/


  /*
  private void computeFeatureVectors(DirectedGraph<Vertex<Map<Integer,String>>, Edge<Map<Integer,String>>> graph, List<Resource> instances, int startLabel, double[][] featureVectors) {
    int index;
    Vertex<Map<Integer, String>> startV;
    List<Vertex<Map<Integer, String>>> frontV, newFrontV;
    Set<Vertex<Map<Integer, String>>> proccedV;
    Set<Edge<Map<Integer, String>>> proccedE;


    for (int i = 0; i < instances.size(); i++) {   
      // new empty processed lists
      proccedV = new HashSet<Vertex<Map<Integer, String>>>();
      proccedE = new HashSet<Edge<Map<Integer, String>>>();

      // Initialize current feature vector to all 0's
      featureVectors[i] = new double[labelCounter - startLabel];   
      Arrays.fill(featureVectors[i], 0.0);

      // Get the start node
      startV = instanceVertices.get(instances.get(i).toString());
      frontV = new ArrayList<Vertex<Map<Integer,String>>>();
      frontV.add(startV);

      // Process the start node
      index = Integer.parseInt(startV.getLabel().get(depth)) - startLabel;   
      featureVectors[i][index] += 1.0;
      proccedV.add(startV);

      for (int j = depth - 1; j >= 0; j--) {
        newFrontV = new ArrayList<Vertex<Map<Integer,String>>>();
        for (Vertex<Map<Integer, String>> qV : frontV) {
          for (Edge<Map<Integer, String>> edge : graph.getOutEdges(qV)) {
            // Process the edge, if we haven't seen it before
            if (!proccedE.contains(edge)) {
              index = Integer.parseInt(edge.getLabel().get(j)) - startLabel;
              featureVectors[i][index] += 1.0;
              proccedE.add(edge);
            }

            // Process the vertex if we haven't seen it before
            if (!proccedV.contains(graph.getDest(edge))) {
              index = Integer.parseInt(graph.getDest(edge).getLabel().get(j)) - startLabel;
              featureVectors[i][index] += 1.0;
              proccedV.add(graph.getDest(edge));
            }

            // Add the vertex to the new front, if we go into a new round
            if (j > 0) {
              newFrontV.add(graph.getDest(edge));
            }
          }
        }
        frontV = newFrontV;
      }
    }
  }
   */

  /*
  private void computeKernelMatrix(List<Resource> instances, SparseVector[] featureVectors, double[][] kernel, double weight) {
    for (int i = 0; i < instances.size(); i++) {
      for (int j = i; j < instances.size(); j++) {
        kernel[i][j] += featureVectors[i].dot(featureVectors[j]) * weight;
        kernel[j][i] = kernel[i][j];
      }
    }
  }
   */


  private void computeKernelMatrix(List<Resource> instances, SparseVector[] featureVectors, double[][] kernel) {
    for (int i = 0; i < instances.size(); i++) {
      for (int j = i; j < instances.size(); j++) {
        kernel[i][j] += featureVectors[i].dot(featureVectors[j]);
        kernel[j][i] = kernel[i][j];
      }
    }
  }

  private void setBlankLabels(DirectedGraph<Vertex<Map<Integer,String>>, Edge<Map<Integer,String>>> graph) {
    for (Vertex<Map<Integer,String>> v : graph.getVertices()) {
      for (int k : v.getLabel().keySet()) {
        v.getLabel().put(k, BLANK_VERTEX_LABEL);
      }
    }

    for (Edge<Map<Integer,String>> e : graph.getEdges()) {
      for (int k : e.getLabel().keySet()) {
        e.getLabel().put(k, BLANK_EDGE_LABEL);
      }
    } 
  }


  private class VertexIndexPair {
    private Vertex<Map<Integer,String>> vertex;
    private int index;

    public VertexIndexPair(Vertex<Map<Integer, String>> vertex, int index) {
      this.vertex = vertex;
      this.index = index;
    }

    public Vertex<Map<Integer, String>> getVertex() {
      return vertex;
    }
    public int getIndex() {
      return index;
    }   
  }

  private class EdgeIndexPair {
    private Edge<Map<Integer,String>> edge;
    private int index;

    public EdgeIndexPair(Edge<Map<Integer, String>> edge, int index) {
      this.edge = edge;
      this.index = index;
    }

    public Edge<Map<Integer, String>> getEdge() {
      return edge;
    }
    public int getIndex() {
      return index;
    }   
  }

}
TOP

Related Classes of org.data2semantics.exp.old.kernels.RDFWLSubTreeKernelString$EdgeIndexPair

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.
div>