Package edu.isi.karma.modeling.alignment.learner

Source Code of edu.isi.karma.modeling.alignment.learner.ModelReader$Statement

/*******************************************************************************
* Copyright 2012 University of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This code was developed by the Information Integration Group as part
* of the Karma project at the Information Sciences Institute of the
* University of Southern California.  For more information, publications,
* and related projects, please see: http://www.isi.edu/integration
******************************************************************************/

package edu.isi.karma.modeling.alignment.learner;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jgrapht.graph.DirectedWeightedMultigraph;

import edu.isi.karma.modeling.alignment.LinkIdFactory;
import edu.isi.karma.modeling.alignment.SemanticModel;
import edu.isi.karma.modeling.research.Params;
import edu.isi.karma.rep.alignment.ColumnNode;
import edu.isi.karma.rep.alignment.DataPropertyLink;
import edu.isi.karma.rep.alignment.InternalNode;
import edu.isi.karma.rep.alignment.Label;
import edu.isi.karma.rep.alignment.LabeledLink;
import edu.isi.karma.rep.alignment.LiteralNode;
import edu.isi.karma.rep.alignment.Node;
import edu.isi.karma.rep.alignment.ObjectPropertyLink;
import edu.isi.karma.rep.alignment.ObjectPropertyType;
import edu.isi.karma.rep.alignment.SemanticType;
import edu.isi.karma.rep.alignment.SemanticType.Origin;
import edu.isi.karma.util.RandomGUID;

public class ModelReader {
 
//  public static String varPrefix = "var:";
  public static String attPrefix = "att:";
 
  private static String typePredicate = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
  private static HashMap<String, String> prefixNsMapping;

  static class Statement {
   
    public Statement(String subject, String predicate, String object) {
      this.subject = subject;
      this.predicate = predicate;
      this.object = object;
    }
   
    private String subject;
    private String predicate;
    private String object;
   
    public String getSubject() {
      return subject;
    }
    public String getPredicate() {
      return predicate;
    }
    public String getObject() {
      return object;
   
    public void print() {
      System.out.print("subject=" + this.subject);
      System.out.print(", predicate=" + this.predicate);
      System.out.println(", object=" + this.object);
    }
  }
 
 
  public static void main(String[] args) throws Exception {
   
    List<SemanticModel> semanticModels = null;

    try {

      semanticModels = importSemanticModelsFromJsonFiles(Params.MODEL_DIR, Params.MODEL_MAIN_FILE_EXT);
//      semanticModels = importSemanticModels(Params.INPUT_DIR);
      if (semanticModels != null) {
        for (SemanticModel sm : semanticModels) {
          sm.print();
          sm.writeGraphviz(Params.GRAPHVIS_DIR + sm.getName() + Params.GRAPHVIS_MAIN_FILE_EXT, true, true);
//          sm.writeJson(Params.MODEL_DIR + sm.getName() + Params.MODEL_MAIN_FILE_EXT);
         
          // To test JsonReader and JsonWriter
//          SemanticModel m = SemanticModel.readJson(Params.MODEL_DIR + sm.getName() + ".main.model.json");
//          m.writeJson(Params.MODEL_DIR + sm.getName() + ".main.model2.json");
        }
      }

    } catch (IOException e) {
      e.printStackTrace();
    }
  }
 
//  public static void main(String[] args) throws Exception {
//   
//    List<ServiceModel> serviceModels = null;
//
//    try {
//
//      serviceModels = importServiceModels(Params.INPUT_DIR);
//      if (serviceModels != null) {
//        for (ServiceModel sm : serviceModels) {
//          sm.print();
//          sm.exportModelToGraphviz(Params.GRAPHVIS_DIR);
//          GraphUtil.serialize(sm.getModel(), Params.JGRAPHT_DIR + sm.getServiceNameWithPrefix() + ".main.jgraph");
//        }
//      }
//
//    } catch (IOException e) {
//      e.printStackTrace();
//    }
//  }
 
  private static void initPrefixNsMapping() {
   
    prefixNsMapping = new HashMap<String, String>();
   
//    // experiment 1
    prefixNsMapping.put("geo", "http://www.w3.org/2003/01/geo/wgs84_pos#");
    prefixNsMapping.put("gn", "http://www.geonames.org/ontology#");
    prefixNsMapping.put("schema", "http://schema.org/");
    prefixNsMapping.put("dbpprop", "http://dbpedia.org/property/");
    prefixNsMapping.put("dbpedia-owl", "http://dbpedia.org/ontology/");
    prefixNsMapping.put("skos", "http://www.w3.org/2004/02/skos/core#");
    prefixNsMapping.put("tzont", "http://www.w3.org/2006/timezone#");
    prefixNsMapping.put("qudt", "http://qudt.org/1.1/schema/qudt#");
    prefixNsMapping.put("yago", "http://dbpedia.org/class/yago/");
    prefixNsMapping.put("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
    prefixNsMapping.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
    prefixNsMapping.put("foaf", "http://xmlns.com/foaf/0.1/");
    prefixNsMapping.put("km", "http://isi.edu/integration/karma/dev#");
   
       
    // experiment 2 - museum data
    prefixNsMapping.put("status", "http://metadataregistry.org/uri/RegStatus/");
    prefixNsMapping.put("owl2xml", "http://www.w3.org/2006/12/owl2-xml#");
    prefixNsMapping.put("schema", "http://schema.org/");
    prefixNsMapping.put("aac-ont", "http://www.americanartcollaborative.org/ontology/");
    prefixNsMapping.put("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
    prefixNsMapping.put("reg", "http://metadataregistry.org/uri/profile/RegAp/");
    prefixNsMapping.put("foaf", "http://xmlns.com/foaf/0.1/");
    prefixNsMapping.put("dcterms", "http://purl.org/dc/terms/");
    prefixNsMapping.put("xsd", "http://www.w3.org/2001/XMLSchema#");
    prefixNsMapping.put("DOLCE-Lite", "http://www.loa-cnr.it/ontologies/DOLCE-Lite.owl#");
    prefixNsMapping.put("dcmitype", "http://purl.org/dc/dcmitype/");
    prefixNsMapping.put("wgs84_pos", "http://www.w3.org/2003/01/geo/wgs84_pos#");
    prefixNsMapping.put("FRBRentitiesRDA", "http://rdvocab.info/uri/schema/FRBRentitiesRDA/");
    prefixNsMapping.put("saam-ont", "http://americanart.si.edu/ontology/");
    prefixNsMapping.put("wot", "http://xmlns.com/wot/0.1/");
    prefixNsMapping.put("edm", "http://www.europeana.eu/schemas/edm/");
    prefixNsMapping.put("dc", "http://purl.org/dc/elements/1.1/");
    prefixNsMapping.put("ElementsGr2", "http://rdvocab.info/ElementsGr2/");
    prefixNsMapping.put("skos", "http://www.w3.org/2008/05/skos#");
    prefixNsMapping.put("crm", "http://www.cidoc-crm.org/rdfs/cidoc-crm#");
    prefixNsMapping.put("vs", "http://www.w3.org/2003/06/sw-vocab-status/ns#");
    prefixNsMapping.put("frbr_core", "http://purl.org/vocab/frbr/core#");
    prefixNsMapping.put("owl", "http://www.w3.org/2002/07/owl#");
    prefixNsMapping.put("ore", "http://www.openarchives.org/ore/terms/");
    prefixNsMapping.put("abc", "http://metadata.net/harmony/abc#");
    prefixNsMapping.put("dcam", "http://purl.org/dc/dcam/");
    prefixNsMapping.put("rdfg", "http://www.w3.org/2004/03/trix/rdfg-1/");
    prefixNsMapping.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
    prefixNsMapping.put("rr", "http://www.w3.org/ns/r2rml#");
    prefixNsMapping.put("km-dev", "http://isi.edu/integration/karma/dev#");

  }
 
  public static List<SemanticModel> importSemanticModelsFromJsonFiles(String path, String fileExtension) throws Exception {

    File ff = new File(path);
    File[] files = ff.listFiles();
   
    List<SemanticModel> semanticModels = new ArrayList<SemanticModel>();
   
    for (File f : files) {
      if (f.getName().endsWith(fileExtension)) {
        SemanticModel model = SemanticModel.readJson(f.getAbsolutePath());
        semanticModels.add(model);
      }
    }
   
    return semanticModels;

  }
 
  public static List<SemanticModel> importSemanticModels(String importDir) throws IOException {
   
    initPrefixNsMapping();
   
    List<SemanticModel> semanticModels = new ArrayList<SemanticModel>();
   
    File dir = new File(importDir);
    File[] modelExamples = dir.listFiles();

    Pattern fileNamePattern = Pattern.compile("s[0-9](|[0-9])-.*\\.txt", Pattern.CASE_INSENSITIVE);
//    Pattern fileNamePattern = Pattern.compile("s1-.*\\.txt", Pattern.CASE_INSENSITIVE);
    Pattern serviceNamePattern = Pattern.compile("S[0-9](|[0-9]):(.*)\\(", Pattern.CASE_INSENSITIVE);
    Matcher matcher;

    String subject = "", predicate = "", object = "";
   
    int count = 1;
   
    if (modelExamples != null)
    for (File f : modelExamples) {

      String id = "s" + String.valueOf(count);
      String name = "", description = "";

      matcher = fileNamePattern.matcher(f.getName());
      if (!matcher.find()) {
        continue;
      }
     
      List<Statement> statements = null;
      LineNumberReader lr = new LineNumberReader(new FileReader(f));
      String curLine = "";
      while ((curLine = lr.readLine()) != null) {
       
        matcher = serviceNamePattern.matcher(curLine);
        if (matcher.find()) {
          name = f.getName().replaceAll(".txt", "");
          description = curLine.trim();
        }
       
        if (!curLine.trim().startsWith("<N3>"))
          continue;
       
        statements = new ArrayList<Statement>();
        while ((curLine = lr.readLine()) != null) {
          if (curLine.trim().startsWith("</N3>"))
            break;
//          System.out.println(curLine);
          if (curLine.trim().startsWith("#"))
            continue;
         
          String[] parts = curLine.trim().split("\\s+");
          if (parts == null || parts.length < 3) {
            System.out.println("Cannot extract statement from \"" + curLine + " \"");
            continue;
          }
         
          subject = parts[0].trim();
          predicate = parts[1].trim();
          object = parts[2].trim();
          Statement st = new Statement(subject, predicate, object);
          statements.add(st);
        }
       
      }
     
      lr.close();

      DirectedWeightedMultigraph<Node, LabeledLink> graph = buildGraphsFromStatements2(statements);

      SemanticModel semanticModel = new SemanticModel(id, graph);
      semanticModel.setName(name);
      semanticModel.setDescription(description);

      semanticModels.add(semanticModel);
      count++;
    }
   
    return semanticModels;
  }

 
//  private static DirectedWeightedMultigraph<Node, Link> buildGraphsFromStatements(List<Statement> statements) {
//   
//    DirectedWeightedMultigraph<Node, Link> graph =
//        new DirectedWeightedMultigraph<Node, Link>(Link.class);
//   
//    if (statements == null || statements.size() == 0)
//      return null;
//   
//    HashMap<String, Node> uri2Nodes = new HashMap<String, Node>();
//
//    for (Statement st : statements) {
//     
//      Node subj = uri2Nodes.get(st.getSubject());
//      if (subj == null) {
//        subj = new Node(st.getSubject(), null, null);
//        uri2Nodes.put(st.getSubject(), subj);
//        graph.addNode(subj);
//      }
//
//      Node obj = uri2Nodes.get(st.getObject());
//      if (obj == null) {
//        obj = new Node(st.getObject(), null, null);
//        uri2Nodes.put(st.getObject(), obj);
//        graph.addNode(obj);
//      }
//     
//      Link e = new Link(st.getPredicate(), null, null);
//      graph.addEdge(subj, obj, e);
//     
//    }
//   
//    return graph;
//  }
 
  private static String getUri(String prefixedUri) {
   
    String uri = prefixedUri;
    String prefix = "";
    String name = "";
    if (prefixedUri.indexOf(":") != -1) {
      prefix = prefixedUri.substring(0 , prefixedUri.indexOf(":")).trim();
      name = prefixedUri.substring(prefixedUri.indexOf(":") + 1 , prefixedUri.length()).trim();
      if (prefixNsMapping.containsKey(prefix)) {
        uri = prefixNsMapping.get(prefix) + name;
      }
    }
    return uri;
  }
 
  private static DirectedWeightedMultigraph<Node, LabeledLink> buildGraphsFromStatements2(List<Statement> statements) {
   
    if (statements == null || statements.size() == 0)
      return null;

    DirectedWeightedMultigraph<Node, LabeledLink> graph =
        new DirectedWeightedMultigraph<Node, LabeledLink>(LabeledLink.class);
   
    // Assumption: there is only one rdf:type for each URI
    HashMap<String, Node> uri2Classes = new HashMap<String, Node>();
    for (Statement st : statements) {
     
      String subjStr = st.getSubject();
      String predicateStr = st.getPredicate();
      String objStr = st.getObject();
     
      subjStr = getUri(subjStr);
      predicateStr = getUri(predicateStr);
      objStr = getUri(objStr);

      if (predicateStr.equalsIgnoreCase(typePredicate)) {
       
        Node classNode = new InternalNode(objStr, new Label(objStr));
        uri2Classes.put(subjStr, classNode);
        graph.addVertex(classNode);
       
      }
    }
   
//    int countOfLiterals = 0;
    String id;
    for (Statement st : statements) {
     
      String subjStr = st.getSubject();
      String predicateStr = st.getPredicate();
      String objStr = st.getObject();
     
      subjStr = getUri(subjStr);
      predicateStr = getUri(predicateStr);
      objStr = getUri(objStr);

      if (predicateStr.equalsIgnoreCase(typePredicate))
        continue;
     
      Node subj = uri2Classes.get(subjStr);
      if (subj == null) {
        subj = new InternalNode(subjStr, new Label(subjStr));
        graph.addVertex(subj);
      }

      Node obj = uri2Classes.get(objStr);
      if (obj == null) {
        if (objStr.startsWith(attPrefix)) {
          id = new RandomGUID().toString();
          obj = new ColumnNode(id, objStr, objStr, null);
          SemanticType semanticType = new SemanticType(((ColumnNode)obj).getHNodeId(),
              new Label(predicateStr),
              subj.getLabel(),
              Origin.User,
              1.0);
          ((ColumnNode)obj).setUserSelectedSemanticType(semanticType);

        } else if (objStr.indexOf(":") == -1 && objStr.indexOf("\"") != -1) {
//          String literalId = "lit:" + serviceId + "_l" + String.valueOf(countOfLiterals);
          obj = new LiteralNode(objStr, objStr, null, false);
//          countOfLiterals ++;
        } else
          obj = new InternalNode(objStr, new Label(objStr));
       
        graph.addVertex(obj);
      }
     
      LabeledLink e;
      if (obj instanceof InternalNode)
        e = new ObjectPropertyLink(LinkIdFactory.getLinkId(predicateStr, subj.getId(), obj.getId()), new Label(predicateStr), ObjectPropertyType.None);
      else
        e = new DataPropertyLink(LinkIdFactory.getLinkId(predicateStr, subj.getId(), obj.getId()), new Label(predicateStr));
      graph.addEdge(subj, obj, e);
     
    }
   
    return graph;
  }


}
TOP

Related Classes of edu.isi.karma.modeling.alignment.learner.ModelReader$Statement

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.