Package no.priv.garshol.duke.test

Source Code of no.priv.garshol.duke.test.NTriplesDataSourceTest$NullCleaner

package no.priv.garshol.duke.test;

import java.io.StringReader;

import org.junit.Test;
import org.junit.Before;
import static junit.framework.Assert.assertTrue;
import static junit.framework.Assert.assertFalse;
import static junit.framework.Assert.assertEquals;

import no.priv.garshol.duke.Record;
import no.priv.garshol.duke.Cleaner;
import no.priv.garshol.duke.RecordIterator;
import no.priv.garshol.duke.cleaners.RegexpCleaner;
import no.priv.garshol.duke.cleaners.FamilyCommaGivenCleaner;
import no.priv.garshol.duke.datasources.Column;
import no.priv.garshol.duke.datasources.NTriplesDataSource;

public class NTriplesDataSourceTest {
  private NTriplesDataSource source;
  private static final String RDF_TYPE =
    "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";

  @Before
  public void setup() {
    source = new NTriplesDataSource();
  }
 
  @Test
  public void testEmpty() {
    RecordIterator it = read("");
    assertFalse("empty data source contains records",
                it.hasNext());
  }

  @Test
  public void testEmptyIncremental() {
    source.setIncrementalMode(true);
    testEmpty();
  }

  @Test
  public void testEmptyBlank() {
    RecordIterator it = read("\n\n");
    assertFalse("empty data source contains records",
                it.hasNext());
  }

  @Test
  public void testEmptyBlankInc() {
    source.setIncrementalMode(true);
    testEmptyBlank();
  }

  @Test
  public void testSingleRecord() {
    source.addColumn(new Column("?uri", "ID", null, null));
    source.addColumn(new Column("http://b", "PROP", null, null));
   
    RecordIterator it = read("<http://a> <http://b> \"foo\" .\n");

    Record r = it.next();
    assertEquals("http://a", r.getValue("ID"));
    assertEquals("foo", r.getValue("PROP"));

    assertFalse(it.hasNext());
  }

  @Test
  public void testSingleRecordInc() {
    source.setIncrementalMode(true);
    testSingleRecord();
  }

  @Test
  public void testSingleRecord2() {
    source.addColumn(new Column("?uri", "ID", null, null));
    source.addColumn(new Column("http://b", "PROP", null, null));
   
    RecordIterator it = read("<http://a> <http://b> \"foo\" .\n" +
                             "<http://a> <http://c> \"foo\" .\n");

    Record r = it.next();
    assertEquals("http://a", r.getValue("ID"));
    assertEquals("foo", r.getValue("PROP"));

    assertFalse(it.hasNext());
  }
 
  @Test
  public void testSingleRecord2Inc() {
    source.setIncrementalMode(true);
    testSingleRecord2();
  }

  @Test
  public void testSingleRecord2Spaces() {
    source.addColumn(new Column("?uri", "ID", null, null));
    source.addColumn(new Column("http://b", "PROP", null, null));
   
    RecordIterator it = read("\n<http://a> <http://b> \"foo\" .\n\n" +
                             "<http://a> <http://c> \"foo\" .\n\n");

    Record r = it.next();
    assertEquals("http://a", r.getValue("ID"));
    assertEquals("foo", r.getValue("PROP"));

    assertFalse(it.hasNext());
  }
 
  @Test
  public void testSingleRecord2SpacesInc() {
    source.setIncrementalMode(true);
    testSingleRecord2Spaces();
  }

  @Test
  public void testTwoRecords() {
    source.addColumn(new Column("?uri", "ID", null, null));
    source.addColumn(new Column("http://b", "PROP", null, null));
   
    RecordIterator it = read("<http://a> <http://b> \"foo\" .\n" +
                             "<http://a> <http://c> \"foo\" .\n" +
                             "<http://a> <" + RDF_TYPE + "> \"http://d\" .\n" +
                             "<http://e> <http://b> \"bar\" .\n" +
                             "<http://e> <http://c> \"foo\" .\n" +
                             "<http://e> <" + RDF_TYPE + "> \"http://f\" .\n");

    Record r = it.next();
    checkAorE(r); // we don't know the order
    assertTrue("second record not found", it.hasNext());
    r = it.next();
    checkAorE(r);
  }

  private void checkAorE(Record r) {
    if (r.getValue("ID").equals("http://a"))
      assertEquals("foo", r.getValue("PROP"));
    else {
      assertEquals("http://e", r.getValue("ID"));
      assertEquals("bar", r.getValue("PROP"));
    }
  }

  @Test
  public void testTwoRecordsInc() {
    source.setIncrementalMode(true);
    testTwoRecords();
  }
 
  @Test
  public void testTypeFiltering() {
    source.addColumn(new Column("?uri", "ID", null, null));
    source.addColumn(new Column("http://b", "PROP", null, null));
    source.setAcceptTypes("http://d");
   
    RecordIterator it = read("<http://a> <http://b> \"foo\" .\n" +
                             "<http://a> <http://c> \"foo\" .\n" +
                             "<http://a> <" + RDF_TYPE + "> \"http://d\" .\n" +
                             "<http://e> <http://b> \"bar\" .\n" +
                             "<http://e> <http://c> \"foo\" .\n" +
                             "<http://e> <" + RDF_TYPE + "> \"http://f\" .\n");

    Record r = it.next();
    assertEquals("http://a", r.getValue("ID"));
    assertEquals("foo", r.getValue("PROP"));

    assertFalse("e record not filtered out", it.hasNext());
  }

  @Test
  public void testTypeFilteringInc() {
    source.setIncrementalMode(true);
    testTypeFiltering();
  }

  @Test
  public void testSingleRecordDoubleProp() {
    source.addColumn(new Column("?uri", "ID", null, null));
    source.addColumn(new Column("http://b", "PROP", null, null));
    // yes, we map b two times. might be necessary to split a compound
    // value into two different properties.
    source.addColumn(new Column("http://b", "PROP2", null, null));
   
    RecordIterator it = read("<http://a> <http://b> \"foo\" .\n");

    Record r = it.next();
    assertEquals("http://a", r.getValue("ID"));
    assertEquals("foo", r.getValue("PROP"));
    assertEquals("foo", r.getValue("PROP2"));

    assertFalse(it.hasNext());
  }

  @Test
  public void testSingleRecordDoublePropInc() {
    source.setIncrementalMode(true);
    testSingleRecordDoubleProp();
  }

  @Test
  public void testRealData() {
    String data = "<http://data.deichman.no/person/ahlgren_ernst_1850-1888> <http://data.deichman.no/catalogueName> \"Ahlgren, Ernst\" .\n" +
      "<http://data.deichman.no/person/ahlgren_ernst_1850-1888> <http://data.deichman.no/lifespan> \"1850-1888\" .\n" +
"<http://data.deichman.no/person/ahlgren_ernst_1850-1888> <http://www.foafrealm.org/xfoaf/0.1/nationality> <http://data.deichman.no/nationality/sv> .\n" +
"<http://data.deichman.no/person/ahlgren_ernst_1850-1888> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .\n" +
"<http://data.deichman.no/person/ahlgren_ernst_1850-1888> <http://xmlns.com/foaf/0.1/name> \"Ahlgren, Ernst\" .\n" +
"<http://data.deichman.no/person/ahlgren_ernst_1850-1888> <http://xmlns.com/foaf/0.1/title> \"psevd. for Victoria Benedictsson\" .\n";

    RegexpCleaner birthcleaner = new RegexpCleaner();
    birthcleaner.setRegexp("^(\\d\\d\\d\\d)-");
    RegexpCleaner deathcleaner = new RegexpCleaner();
    deathcleaner.setRegexp("-(\\d\\d\\d\\d)$");
   
    source.addColumn(new Column("?uri", "ID", null, null));   
    source.addColumn(new Column("http://xmlns.com/foaf/0.1/name", "NAME",
                                null, new FamilyCommaGivenCleaner()));
    source.addColumn(new Column("http://data.deichman.no/lifespan",
                                "YEAROFBIRTH", null, birthcleaner));
    source.addColumn(new Column("http://data.deichman.no/lifespan",
                                "YEAROFDEATH", null, deathcleaner));

    RecordIterator it = read(data);
    Record r = it.next();
    assertEquals("http://data.deichman.no/person/ahlgren_ernst_1850-1888",
                 r.getValue("ID"));
    assertEquals("ernst ahlgren", r.getValue("NAME"));
    assertEquals("1850", r.getValue("YEAROFBIRTH"));
    assertEquals("1888", r.getValue("YEAROFDEATH"));

    assertFalse(it.hasNext());
  }

  @Test
  public void testCleanedNullIsDiscarded() {
    source.addColumn(new Column("?uri", "ID", null, null));
    source.addColumn(new Column("http://b", "PROP", null,
                                new NullCleaner()));
   
    RecordIterator it = read("<http://a> <http://b> \"foo\" .\n");

    assertFalse(it.hasNext());
  }

  @Test
  public void testCleanedEmptyIsDiscarded() {
    source.addColumn(new Column("?uri", "ID", null, null));
    source.addColumn(new Column("http://b", "PROP", null,
                                new EmptyCleaner()));
   
    RecordIterator it = read("<http://a> <http://b> \"foo\" .\n");

    assertFalse(it.hasNext());
  }
 
  @Test
  public void testEmptyRecord() {
    source.addColumn(new Column("?uri", "ID", null, null));
    source.addColumn(new Column("http://b", "PROP", null, null));
   
    RecordIterator it = read("<http://a> <http://c> \"foo\" .\n");

    assertFalse("failed to filter out empty records",
                it.hasNext());
  }
 
  // --- helpers
 
  private RecordIterator read(String csvdata) {
    source.setReader(new StringReader(csvdata));
    return source.getRecords();
  }

  static class NullCleaner implements Cleaner {
    public String clean(String value) {
      return null;
    }
  }

  static class EmptyCleaner implements Cleaner {
    public String clean(String value) {
      return "";
    }
  }
}
TOP

Related Classes of no.priv.garshol.duke.test.NTriplesDataSourceTest$NullCleaner

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.