Package edu.isi.karma.rdf

Source Code of edu.isi.karma.rdf.CombineBloomFiltersFromRDF

package edu.isi.karma.rdf;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Field;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.util.HelpFormatter;
import org.apache.hadoop.util.bloom.BloomFilter;
import org.apache.hadoop.util.hash.Hash;

import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.uwyn.jhighlight.tools.FileUtils;

import edu.isi.karma.er.helper.TripleStoreUtil;
import edu.isi.karma.kr2rml.writer.KR2RMLBloomFilter;
import edu.isi.karma.webserver.KarmaException;



public class CombineBloomFiltersFromRDF {

  static String filepath;
    static String triplestoreURL;
    static String context;
    static String predicateURI = "http://isi.edu/integration/karma/dev#hasBloomFilter";
  public static void main(String[] args) throws IOException, KarmaException {
    Group options = createCommandLineOptions();
        Parser parser = new Parser();
        parser.setGroup(options);
        parser.setGroup(options);
        HelpFormatter hf = new HelpFormatter();
        parser.setHelpFormatter(hf);
        parser.setHelpTrigger("--help");
        CommandLine cl = parser.parseAndHelp(args);
        if (cl == null || cl.getOptions().size() == 0 || cl.hasOption("--help")) {
            hf.setGroup(options);
            hf.print();
            return;
        }
        filepath = (String) cl.getValue("--filepath");
        triplestoreURL = (String) cl.getValue("--triplestoreurl");
        context = (String) cl.getValue("--context");
        if (filepath == null || triplestoreURL == null || context == null)
          return;
    File file = new File(filepath);
    Map<String, BloomFilterWorker> workers = new HashMap<String, BloomFilterWorker>();
    Map<String, KR2RMLBloomFilter> bfs = new HashMap<String, KR2RMLBloomFilter>();
    long start = System.currentTimeMillis();
    if (file.isDirectory()) {
      File[] files = file.listFiles();
      for (File f : files) {
        if (FileUtils.getExtension(f.getName()) != null) {
          Model model = ModelFactory.createDefaultModel();
          InputStream s = new FileInputStream(f);
          model.read(s, null, "TURTLE");
          StmtIterator iterator = model.listStatements();
          while(iterator.hasNext()) {
            Statement st = iterator.next();
            String subject = st.getSubject().toString();
            String object = st.getObject().toString();
            String predicate = st.getPredicate().toString();
            if (predicate.contains("hasBloomFilter")) {
              //predicateURI = predicate;
              BloomFilterWorker worker = workers.get(subject);
              if (worker == null) {
                worker = new BloomFilterWorker();
                Thread t = new Thread(worker);
                t.start();
              }
              worker.addBloomfilters(object);
              workers.put(subject, worker);
            }
          }
        }

      }
      for (Entry<String, BloomFilterWorker> entry : workers.entrySet()) {
        entry.getValue().setDone();
      }
      for (Entry<String, BloomFilterWorker> entry : workers.entrySet()) {
        while(!entry.getValue().isFinished());
        bfs.put(entry.getKey(), entry.getValue().getKR2RMLBloomFilter());
      }
      TripleStoreUtil utilObj = new TripleStoreUtil();
      Set<String> triplemaps = bfs.keySet();
      Map<String, String> bloomfilterMapping = new HashMap<String, String>();
      bloomfilterMapping.putAll(utilObj.getBloomFiltersForMaps(triplestoreURL, context, triplemaps));
      utilObj.updateTripleStoreWithBloomFilters(bfs, bloomfilterMapping, triplestoreURL, context);
      System.out.println("process time: " + (System.currentTimeMillis() - start));
      Map<String, String> verification = new HashMap<String, String>();
      verification.putAll(utilObj.getBloomFiltersForMaps(triplestoreURL, context, triplemaps));
      boolean verify = true;
      for (Entry<String, String> entry : verification.entrySet()) {
        String key = entry.getKey();
        String value = entry.getValue();
        KR2RMLBloomFilter bf2 = new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH);
        KR2RMLBloomFilter bf = bfs.get(key);
        bf2.populateFromCompressedAndBase64EncodedString(value);
        bf2.and(bf);
        bf2.xor(bf);
        try {
          Field f = BloomFilter.class.getDeclaredField("bits");
          f.setAccessible(true);
          BitSet bits = (BitSet) f.get(bf2);
          if (bits.cardinality() != 0) {
            verify = false;
            break;
          }
        } catch (Exception e) {

        }
      }
      if (!verify) {
        utilObj.updateTripleStoreWithBloomFilters(bfs, verification, triplestoreURL, context);
      }
    }

  }

  private static Group createCommandLineOptions() {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Group options =
        gbuilder
        .withName("options")
        .withOption(buildOption("filepath", "location of the input file directory", "filepath", obuilder, abuilder))
        .withOption(buildOption("triplestoreurl", "location of the triplestore", "triplestoreurl", obuilder, abuilder))
        .withOption(buildOption("context", "the context uri", "context", obuilder, abuilder))
        .withOption(obuilder
            .withLongName("help")
            .withDescription("print this message")
            .create())
            .create();

    return options;
  }

  public static Option buildOption(String shortName, String description, String argumentName,
      DefaultOptionBuilder obuilder, ArgumentBuilder abuilder) {
    return obuilder
        .withLongName(shortName)
        .withDescription(description)
        .withArgument(
            abuilder
            .withName(argumentName)
            .withMinimum(1)
            .withMaximum(1)
            .create())
            .create();
  }

}
TOP

Related Classes of edu.isi.karma.rdf.CombineBloomFiltersFromRDF

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.