Package edu.isi.karma.rdf

Source Code of edu.isi.karma.rdf.AddSameAsToBloomFilters

package edu.isi.karma.rdf;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.util.HelpFormatter;
import org.apache.hadoop.util.bloom.Key;
import org.apache.hadoop.util.hash.Hash;

import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;

import edu.isi.karma.er.helper.TripleStoreUtil;
import edu.isi.karma.kr2rml.writer.KR2RMLBloomFilter;
import edu.isi.karma.webserver.KarmaException;

public class AddSameAsToBloomFilters {
  static String filepath;
    static String triplestoreURL;
    static String predicate;
  public static void main(String[] args) throws KarmaException, IOException {
    Group options = createCommandLineOptions();
        Parser parser = new Parser();
        parser.setGroup(options);
        HelpFormatter hf = new HelpFormatter();
        parser.setHelpFormatter(hf);
        parser.setHelpTrigger("--help");
        CommandLine cl = parser.parseAndHelp(args);
        if (cl == null || cl.getOptions().size() == 0 || cl.hasOption("--help")) {
            hf.setGroup(options);
            hf.print();
            return;
        }
        filepath = (String) cl.getValue("--filepath");
        triplestoreURL = (String) cl.getValue("--triplestoreurl");
        predicate = (String) cl.getValue("--predicate");
        TripleStoreUtil utilObj = new TripleStoreUtil();
        Set<String> predicates = new HashSet<String>();
        predicates.add(predicate);
        List<String> predicateObjectMaps = new ArrayList<String>();
        for (String t : utilObj.getPredicatesForParentTriplesMapsWithSameClass(triplestoreURL, null, predicates).get("refObjectMaps")) {
          predicateObjectMaps.addAll(Arrays.asList(t.split(",")));
        }
        for (String t : utilObj.getPredicatesForTriplesMapsWithSameClass(triplestoreURL, null, predicates).get("predicateObjectMaps")) {
          predicateObjectMaps.addAll(Arrays.asList(t.split(",")));
        }
        Map<String, String> serializedmapping = utilObj.getBloomFiltersForMaps(triplestoreURL, null, predicateObjectMaps);
        Map<String, KR2RMLBloomFilter> mapping = new HashMap<String, KR2RMLBloomFilter>();
       
        for (Entry<String, String> entry : serializedmapping.entrySet()) {
          String key = entry.getKey();
          String value = entry.getValue();
          KR2RMLBloomFilter bf = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
          bf.populateFromCompressedAndBase64EncodedString(value);
          mapping.put(key, bf);
        }
       
        Model model = ModelFactory.createDefaultModel();
        InputStream s = new FileInputStream(new File(filepath));
    model.read(s, null, "TURTLE");
    StmtIterator iterator = model.listStatements();
    while(iterator.hasNext()) {
      Statement st = iterator.next();
      String subject = "<" + st.getSubject().toString() + ">";
      String object = "<" + st.getObject().toString() + ">";
      for (Entry<String, KR2RMLBloomFilter> entry : mapping.entrySet()) {
        KR2RMLBloomFilter bf = entry.getValue();
        if (bf.membershipTest(new Key(subject.getBytes("UTF-8"))))
          bf.add(new Key(object.getBytes("UTF-8")));
        if (bf.membershipTest(new Key(object.getBytes("UTF-8"))))
          bf.add(new Key(subject.getBytes("UTF-8")));
      }
    }
   
    utilObj.updateTripleStoreWithBloomFilters(mapping, serializedmapping, triplestoreURL, null);
   
  }
 
  private static Group createCommandLineOptions() {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Group options =
        gbuilder
        .withName("options")
        .withOption(buildOption("filepath", "location of the input file directory", "filepath", obuilder, abuilder))
        .withOption(buildOption("triplestoreurl", "location of the triplestore", "triplestoreurl", obuilder, abuilder))
        .withOption(buildOption("predicate", "the uri or the predicate", "predicate", obuilder, abuilder))
        .withOption(obuilder
            .withLongName("help")
            .withDescription("print this message")
            .create())
            .create();

    return options;
  }

  public static Option buildOption(String shortName, String description, String argumentName,
      DefaultOptionBuilder obuilder, ArgumentBuilder abuilder) {
    return obuilder
        .withLongName(shortName)
        .withDescription(description)
        .withArgument(
            abuilder
            .withName(argumentName)
            .withMinimum(1)
            .withMaximum(1)
            .create())
            .create();
  }


}
TOP

Related Classes of edu.isi.karma.rdf.AddSameAsToBloomFilters

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.