/*
* Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2006.
*
* Licensed under the Aduna BSD-style license.
*/
package org.openrdf.repository.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import org.openrdf.model.BNode;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.util.ModelUtil;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.store.StoreException;
/**
* Utility methods for comparing sets of statements (graphs) with each other.
* The supplied comparison operations map bnodes in the two supplied models on
* to each other and thus define a graph isomorphism.
*
* @author jeen
* @author Arjohn Kampman
*/
public class RepositoryUtil {
/**
* Compares the models in the default contexts of the two supplied
* repositories and returns true if they are equal. Models are equal if they
* contain the same set of statements. bNodes IDs are not relevant for model
* equality, they are mapped from one model to the other by using the
* attached properties. Note that the method pulls the entire default context
* of both repositories into main memory. Use with caution.
*/
public static boolean equals(Repository rep1, Repository rep2)
throws StoreException
{
// Fetch statements from rep1 and rep2
Set<Statement> model1, model2;
RepositoryConnection con1 = rep1.getConnection();
try {
model1 = con1.match(null, null, null, true).asSet();
}
finally {
con1.close();
}
RepositoryConnection con2 = rep2.getConnection();
try {
model2 = con2.match(null, null, null, true).asSet();
}
finally {
con2.close();
}
return ModelUtil.equals(model1, model2);
}
/**
* Compares the models of the default context of two repositories and returns
* true if rep1 is a subset of rep2. Note that the method pulls the entire
* default context of both repositories into main memory. Use with caution.
*/
public static boolean isSubset(Repository rep1, Repository rep2)
throws StoreException
{
Set<Statement> model1, model2;
RepositoryConnection con1 = rep1.getConnection();
try {
model1 = con1.match(null, null, null, true).asSet();
}
finally {
con1.close();
}
RepositoryConnection con2 = rep2.getConnection();
try {
model2 = con2.match(null, null, null, true).asSet();
}
finally {
con2.close();
}
return ModelUtil.isSubset(model1, model2);
}
/**
* Compares two models defined by the default context of two repositories and
* returns the difference between the first and the second model (that is,
* all statements that are present in rep1 but not in rep2). Blank node IDs
* are not relevant for model equality, they are mapped from one model to the
* other by using the attached properties. Note that the method pulls the
* entire default context of both repositories into main memory. Use with
* caution.
* <p>
* <b>NOTE: this algorithm is currently broken; it doesn't actually map blank
* nodes between the two models.</b>
*
* @return The collection of statements that is the difference between rep1
* and rep2.
*/
public static Collection<? extends Statement> difference(Repository rep1, Repository rep2)
throws StoreException
{
Collection<Statement> model1 = new HashSet<Statement>();
Collection<Statement> model2 = new HashSet<Statement>();
RepositoryConnection con1 = rep1.getConnection();
try {
con1.match(null, null, null, false).addTo(model1);
}
finally {
con1.close();
}
RepositoryConnection con2 = rep2.getConnection();
try {
con2.match(null, null, null, false).addTo(model2);
}
finally {
con2.close();
}
return difference(model1, model2);
}
/**
* Compares two models, defined by two statement collections, and returns the
* difference between the first and the second model (that is, all statements
* that are present in model1 but not in model2). Blank node IDs are not
* relevant for model equality, they are mapped from one model to the other
* by using the attached properties. *
* <p>
* <b>NOTE: this algorithm is currently broken; it doesn't actually map blank
* nodes between the two models.</b>
*
* @return The collection of statements that is the difference between model1
* and model2.
*/
public static Collection<? extends Statement> difference(Collection<? extends Statement> model1,
Collection<? extends Statement> model2)
{
// Create working copies
LinkedList<Statement> copy1 = new LinkedList<Statement>(model1);
LinkedList<Statement> copy2 = new LinkedList<Statement>(model2);
Collection<Statement> result = new ArrayList<Statement>();
// Compare statements that don't contain bNodes
Iterator<Statement> iter1 = copy1.iterator();
while (iter1.hasNext()) {
Statement st = iter1.next();
if (st.getSubject() instanceof BNode || st.getObject() instanceof BNode) {
// One or more of the statement's components is a bNode,
// these statements are handled later
continue;
}
// Try to remove the statement from model2
boolean removed = copy2.remove(st);
if (!removed) {
// statement was not present in model2 and is part of the difference
result.add(st);
}
iter1.remove();
}
// FIXME: this algorithm is broken: bNodeMapping is assumed to contain a
// bnode mapping while in reallity it is an empty map
HashMap<BNode, BNode> bNodeMapping = new HashMap<BNode, BNode>();
// mapBlankNodes(copy1, copy2, bNodeMapping, 0);
for (Statement st1 : copy1) {
boolean foundMatch = false;
for (Statement st2 : copy2) {
if (statementsMatch(st1, st2, bNodeMapping)) {
// Found a matching statement
foundMatch = true;
break;
}
}
if (!foundMatch) {
// No statement matching st1 was found in model2, st1 is part of
// the difference.
result.add(st1);
}
}
return result;
}
private static boolean statementsMatch(Statement st1, Statement st2, Map<BNode, BNode> bNodeMapping) {
URI pred1 = st1.getPredicate();
URI pred2 = st2.getPredicate();
if (!pred1.equals(pred2)) {
// predicates don't match
return false;
}
Resource subj1 = st1.getSubject();
Resource subj2 = st2.getSubject();
if (!(subj1 instanceof BNode)) {
if (!subj1.equals(subj2)) {
// subjects are not bNodes and don't match
return false;
}
}
else { // subj1 instanceof BNode
BNode mappedBNode = bNodeMapping.get(subj1);
if (mappedBNode != null) {
// bNode 'subj1' was already mapped to some other bNode
if (!subj2.equals(mappedBNode)) {
// 'subj1' and 'subj2' do not match
return false;
}
}
else {
// 'subj1' was not yet mapped. we need to check if 'subj2' is a
// possible mapping candidate
if (bNodeMapping.containsValue(subj2)) {
// 'subj2' is already mapped to some other value.
return false;
}
}
}
Value obj1 = st1.getObject();
Value obj2 = st2.getObject();
if (!(obj1 instanceof BNode)) {
if (!obj1.equals(obj2)) {
// objects are not bNodes and don't match
return false;
}
}
else { // obj1 instanceof BNode
BNode mappedBNode = bNodeMapping.get(obj1);
if (mappedBNode != null) {
// bNode 'obj1' was already mapped to some other bNode
if (!obj2.equals(mappedBNode)) {
// 'obj1' and 'obj2' do not match
return false;
}
}
else {
// 'obj1' was not yet mapped. we need to check if 'obj2' is a
// possible mapping candidate
if (bNodeMapping.containsValue(obj2)) {
// 'obj2' is already mapped to some other value.
return false;
}
}
}
return true;
}
}