/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.ontology.jena;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
// Commons Logging imports
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntClass;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.ontology.Individual;
import com.hp.hpl.jena.rdf.model.Literal;
import org.apache.nutch.ontology.*;
import org.apache.nutch.util.LogUtil;
/**
* implementation of parser for w3c's OWL files
*
* @author michael j pan
*/
public class OwlParser implements Parser {
private final static Log LOG = LogFactory.getLog(OwlParser.class);
public OwlParser () {
}
/**
* parse owl ontology files using jena
*/
public void parse(OntModel m) {
for (Iterator i = rootClasses( m ); i.hasNext(); ) {
OntClass c = (OntClass) i.next();
//dont deal with anonymous classes
if (c.isAnon()) {
continue;
}
parseClass( c, new ArrayList(), 0 );
}
}
protected void parseClass( OntClass cls, List occurs, int depth ) {
//dont deal with anonymous classes
if (cls.isAnon()) {
return;
}
//add cls to Ontology searchterms
//list labels
Iterator labelIter = cls.listLabels(null);
//if has no labels
if (!labelIter.hasNext()) {
//add rdf:ID as a label
cls.addLabel(rdfidToLabel(cls.getLocalName()), null);
}
//reset the label iterator
labelIter = cls.listLabels(null);
while(labelIter.hasNext()) {
Literal l = (Literal) labelIter.next();
OntologyImpl.addSearchTerm(l.toString(), cls);
}
// recurse to the next level down
if (cls.canAs( OntClass.class ) && !occurs.contains( cls )) {
//list subclasses
for (Iterator i = cls.listSubClasses( true ); i.hasNext(); ) {
OntClass sub = (OntClass) i.next();
// we push this expression on the occurs list before we recurse
occurs.add( cls );
parseClass(sub, occurs, depth+1);
occurs.remove( cls );
}
//list instances
for (Iterator i=cls.listInstances(); i.hasNext(); ) {
//add search terms for each instance
//list labels
Individual individual = (Individual) i.next();
for (Iterator j=individual.listLabels(null); j.hasNext();) {
Literal l = (Literal) j.next();
OntologyImpl.addSearchTerm(l.toString(), individual);
}
}
}
}
public Iterator rootClasses( OntModel m ) {
List roots = new ArrayList();
for (Iterator i = m.listClasses(); i.hasNext(); ) {
OntClass c = (OntClass) i.next();
try {
// too confusing to list all the restrictions as root classes
if (c.isAnon()) {
continue;
}
if (c.hasSuperClass( m.getProfile().THING(), true ) ) {
// this class is directly descended from Thing
roots.add( c );
} else if (c.getCardinality( m.getProfile().SUB_CLASS_OF() ) == 0 ) {
// this class has no super-classes
// (can occur if we're not using the reasoner)
roots.add( c );
}
} catch (Exception e) {
e.printStackTrace(LogUtil.getWarnStream(LOG));
}
}
return roots.iterator();
}
public String rdfidToLabel (String idString) {
Pattern p = Pattern.compile("([a-z0-9])([A-Z])");
Matcher m = p.matcher(idString);
String labelString = new String(idString);
while(m.find()) {
labelString = labelString.replaceAll(m.group(1)+m.group(2),
m.group(1)+" "+m.group(2));
}
return labelString;
}
}