Package org.data2semantics.RDFmodel.modules

Source Code of org.data2semantics.RDFmodel.modules.RDFCompression

package org.data2semantics.RDFmodel.modules;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;

import org.data2semantics.RDFmodel.Boundary;
import org.data2semantics.RDFmodel.CLAccountant;
import org.data2semantics.RDFmodel.RDFGraph;
import org.data2semantics.RDFmodel.RDFhelper;
import org.data2semantics.RDFmodel.StringTree;
import org.data2semantics.platform.annotation.In;
import org.data2semantics.platform.annotation.Main;
import org.data2semantics.platform.annotation.Module;
import org.data2semantics.platform.annotation.Out;
import org.openrdf.model.Literal;

@Module(name="RDFCompression") public class RDFCompression extends RDFhelper {
 
  private String _fn;
  private RDFGraph _G;
  private List<String> _uris;
  private List<Literal> _lits;
  private StringTree _ST;
  private int _minlinks;
  private Set<Integer> _tbox;
  private Boundary _boundary;
 
  // output fields
  @Out(name        = "URI codelength",
     description = "The uncompressed codelength of the set of all URI's in the data")
  public int _cl_uris;
 
  @Out(name        = "URI gzipped codelength",
     description = "The gzipped codelength of the set of all URI's in the data")
  public int _cl_uris_gzipped;
 
  @Out(name        = "URI packed codelength",
     description = "The packed codelength of the set of all URI's in the data")
  public int _cl_uris_packed;
 
  @Out(name        = "URI gzipped,packed codelengths",
     description = "The gzipped,packed codelength of the set of all URI's in the data")
  public int _cl_uris_packed_gzipped;
 
  @Out(name        = "Literal codelengths",
     description = "The codelength of the list of all literals in the data")  
  public int _cl_lits;
 
  @Out(name        = "Literal gzipped codelengths",
     description = "The gzipped codelength of the list of all literals in the data")  
  public int _cl_lits_gzipped;
 
  public RDFCompression(@In(name="file") String filename, @In(name="minlinks") int minlinks) {
    _fn = filename;
    _minlinks = minlinks;
  }
 

  private void codelengths() {
    String uristr = set2string(_uris);
    String packed = _ST.getPacked();
    _cl_uris                = uristr.length() * 8;
    _cl_uris_gzipped        = gzip(uristr);
    _cl_uris_packed         = packed.length() * 8;
    _cl_uris_packed_gzipped = gzip(packed);
   
    String lits = set2string(_lits);
    _cl_lits                = lits.length() * 8;
    _cl_lits_gzipped        = gzip(lits);
  }
 
  @Out(name="Structure codelength no-URIs",
     description="The codelength of all structural information in the data, without using URIs")
  public double cl_structure_no_uris() {
    Boundary root_b = new Boundary(); root_b.add(_ST);
    return cl_structure(root_b);
  }
 
  @Out(name="Structure codelength URIs",
       description="The codelength of all structural information in the data, using conditioning on the URIs")
  public double cl_structure_uris() {
    return cl_structure(_boundary);
  }

  private double cl_structure(Boundary B) {
    CLAccountant acc = encode("test", _G, B.get_uri_map(_uris, _ST), _tbox).getResults();
    return acc.L();
  }
 
  @Main
  public void main() {
    _uris = new ArrayList<String>();
    _lits = new ArrayList<Literal>();
    _G = new RDFGraph(new RDFGraph.TripleFile(_fn)); // FIXME: URIs uninitialized!
    _ST = new StringTree(_uris);
    _tbox = tbox_heuristic_most_incoming(_G, _minlinks);
    _boundary = findBestBoundary(_G, _uris, _tbox, _ST);
    codelengths();
  }
 
}
TOP

Related Classes of org.data2semantics.RDFmodel.modules.RDFCompression

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.