Package no.priv.garshol.duke

Examples of no.priv.garshol.duke.Configuration


    p.setHighProbability(new_value);
  }

  public void setFromOther(GeneticConfiguration cfg1,
                           GeneticConfiguration cfg2) {
    Configuration config = cfg1.getConfiguration();
    Configuration other = cfg2.getConfiguration();

    Property p1 = config.getPropertyByName(prop.getName());
    Property p2 = other.getPropertyByName(prop.getName());
    p1.setHighProbability(p2.getHighProbability());
  }
View Full Code Here


  @Rule
  public TemporaryFolder tmpdir = new TemporaryFolder();

  @Test
  public void testEmpty() throws IOException, SAXException {
    Configuration config = ConfigLoader.load("classpath:config-empty.xml");

    assertTrue(config.getDataSources().isEmpty());
    assertTrue(config.getDataSources(1).isEmpty());
    assertTrue(config.getDataSources(2).isEmpty());
    assertEquals(config.getThreshold(), 0.4);
    assertEquals(config.getMaybeThreshold(), 0.0);
    assertTrue(config.getProperties().isEmpty());

    File outfile = tmpdir.newFile("config.xml");
    ConfigWriter.write(config, outfile.getAbsolutePath());
    config = ConfigLoader.load(outfile.getAbsolutePath());
   
    assertTrue(config.getDataSources().isEmpty());
    assertTrue(config.getDataSources(1).isEmpty());
    assertTrue(config.getDataSources(2).isEmpty());
    assertEquals(config.getThreshold(), 0.4);
    assertEquals(config.getMaybeThreshold(), 0.0);
    assertTrue(config.getProperties().isEmpty());
  }
View Full Code Here

    List<Property> props = new ArrayList();
    props.add(new PropertyImpl("ID"));
    props.add(new PropertyImpl("NAME", lev, 0.3, 0.8));
    props.add(new PropertyImpl("EMAIL", lev, 0.3, 0.8));
   
    Configuration config = new ConfigurationImpl();
    ((ConfigurationImpl) config).setProperties(props);
    ((ConfigurationImpl) config).setThreshold(0.85);
    ((ConfigurationImpl) config).setMaybeThreshold(0.7);

    // --- write and reload
    File outfile = tmpdir.newFile("config.xml");
    ConfigWriter.write(config, outfile.getAbsolutePath());
    config = ConfigLoader.load(outfile.getAbsolutePath());

    // --- verify loaded correctly   
    assertTrue(config.getDataSources().isEmpty());
    assertTrue(config.getDataSources(1).isEmpty());
    assertTrue(config.getDataSources(2).isEmpty());
    assertEquals(config.getThreshold(), 0.85);
    assertEquals(config.getMaybeThreshold(), 0.7);
    assertEquals(3, config.getProperties().size());

    Property prop = config.getPropertyByName("ID");
    assertTrue("ID property lost", prop.isIdProperty());

    prop = config.getPropertyByName("NAME");
    assertEquals(lev.getClass(), prop.getComparator().getClass());
    assertEquals(0.3, prop.getLowProbability());
    assertEquals(0.8, prop.getHighProbability());

    prop = config.getPropertyByName("EMAIL");
    assertEquals(lev.getClass(), prop.getComparator().getClass());
    assertEquals(0.3, prop.getLowProbability());
    assertEquals(0.8, prop.getHighProbability());
  }
View Full Code Here

    List<Property> props = new ArrayList();
    props.add(new PropertyImpl("ID"));
    props.add(new PropertyImpl("NAME", lev, 0.3, 0.8));
    props.add(new PropertyImpl("EMAIL", lev, 0.3, 0.8));
   
    Configuration config = new ConfigurationImpl();
    ((ConfigurationImpl) config).setProperties(props);
    ((ConfigurationImpl) config).setThreshold(0.85);
    ((ConfigurationImpl) config).setMaybeThreshold(0.7);

    CSVDataSource csv = new CSVDataSource();
    csv.setInputFile("test.csv");
    csv.addColumn(new Column("id", "ID", null, null));
    csv.addColumn(new Column("name", "NAME", null, null));
    Column emailCol = new Column("email", "EMAIL", null, null);
    emailCol.setSplitOn(";");
    csv.addColumn(emailCol);
    ((ConfigurationImpl) config).addDataSource(0, csv);
   
    // --- write and reload
    File outfile = tmpdir.newFile("config.xml");           
    ConfigWriter.write(config, outfile.getAbsolutePath());
    config = ConfigLoader.load(outfile.getAbsolutePath());
   
    // --- verify loaded correctly   
    assertEquals(1, config.getDataSources().size());

    csv = (CSVDataSource) config.getDataSources().iterator().next();
    assertTrue(csv.getInputFile().endsWith("test.csv"));
    assertEquals(3, csv.getColumns().size());
    Collection<Column> csvEmailColList = csv.getColumn("email");
    Column csvEmailCol = (Column) csvEmailColList.toArray()[0];
    assertTrue(csvEmailCol.isSplit());
    // FIXME: check the columns (kind of hard given lack of ordering)
   
    assertTrue(config.getDataSources(1).isEmpty());
    assertTrue(config.getDataSources(2).isEmpty());
    assertEquals(config.getThreshold(), 0.85);
    assertEquals(config.getMaybeThreshold(), 0.7);
    assertEquals(3, config.getProperties().size());

    Property prop = config.getPropertyByName("ID");
    assertTrue("ID property lost", prop.isIdProperty());

    prop = config.getPropertyByName("NAME");
    assertEquals(lev.getClass(), prop.getComparator().getClass());
    assertEquals(0.3, prop.getLowProbability());
    assertEquals(0.8, prop.getHighProbability());

    prop = config.getPropertyByName("EMAIL");
    assertEquals(lev.getClass(), prop.getComparator().getClass());
    assertEquals(0.3, prop.getLowProbability());
    assertEquals(0.8, prop.getHighProbability());
  }
View Full Code Here

    List<Property> props = new ArrayList();
    props.add(new PropertyImpl("ID"));
    props.add(new PropertyImpl("NAME", lev, 0.3, 0.8));
    props.add(new PropertyImpl("EMAIL", lev, 0.3, 0.8));
   
    Configuration config = new ConfigurationImpl();
    ((ConfigurationImpl) config).setProperties(props);
    ((ConfigurationImpl) config).setThreshold(0.85);
    ((ConfigurationImpl) config).setMaybeThreshold(0.7);

    JDBCDataSource jdbc = new JDBCDataSource();
    jdbc.setDriverClass("klass");
    jdbc.setConnectionString("konnection");
    jdbc.setUserName("user");
    jdbc.setPassword("secret");
    jdbc.setQuery("select");
    jdbc.addColumn(new Column("id", "ID", null, null));
    jdbc.addColumn(new Column("name", "NAME", null, null));
    jdbc.addColumn(new Column("email", "EMAIL", null, null));
    ((ConfigurationImpl) config).addDataSource(0, jdbc);
   
    // --- write and reload
    File outfile = tmpdir.newFile("config.xml");           
    ConfigWriter.write(config, outfile.getAbsolutePath());
    config = ConfigLoader.load(outfile.getAbsolutePath());
   
    // --- verify loaded correctly   
    assertEquals(1, config.getDataSources().size());

    jdbc = (JDBCDataSource) config.getDataSources().iterator().next();
    assertEquals("klass", jdbc.getDriverClass());
    assertEquals("konnection", jdbc.getConnectionString());
    assertEquals("user", jdbc.getUserName());
    assertEquals("secret", jdbc.getPassword());
    assertEquals("select", jdbc.getQuery());
    assertEquals(3, jdbc.getColumns().size());
    // FIXME: check the columns (kind of hard given lack of ordering)
   
    assertTrue(config.getDataSources(1).isEmpty());
    assertTrue(config.getDataSources(2).isEmpty());
    assertEquals(config.getThreshold(), 0.85);
    assertEquals(config.getMaybeThreshold(), 0.7);
    assertEquals(3, config.getProperties().size());

    Property prop = config.getPropertyByName("ID");
    assertTrue("ID property lost", prop.isIdProperty());

    prop = config.getPropertyByName("NAME");
    assertEquals(lev.getClass(), prop.getComparator().getClass());
    assertEquals(0.3, prop.getLowProbability());
    assertEquals(0.8, prop.getHighProbability());

    prop = config.getPropertyByName("EMAIL");
    assertEquals(lev.getClass(), prop.getComparator().getClass());
    assertEquals(0.3, prop.getLowProbability());
    assertEquals(0.8, prop.getHighProbability());
  }
View Full Code Here

    List<Property> props = new ArrayList();
    props.add(new PropertyImpl("ID"));
    props.add(new PropertyImpl("NAME", lev, 0.3, 0.8));
    props.add(new PropertyImpl("EMAIL", lev, 0.3, 0.8));
   
    Configuration config = new ConfigurationImpl();
    ((ConfigurationImpl) config).setProperties(props);
    ((ConfigurationImpl) config).setThreshold(0.85);
    ((ConfigurationImpl) config).setMaybeThreshold(0.7);

    JNDIDataSource jndi = new JNDIDataSource();
    jndi.setJndiPath("path");
    jndi.setQuery("select");
    jndi.addColumn(new Column("id", "ID", null, null));
    jndi.addColumn(new Column("name", "NAME", null, null));
    jndi.addColumn(new Column("email", "EMAIL", null, null));
    ((ConfigurationImpl) config).addDataSource(0, jndi);
   
    // --- write and reload
    File outfile = tmpdir.newFile("config.xml");           
    ConfigWriter.write(config, outfile.getAbsolutePath());
    config = ConfigLoader.load(outfile.getAbsolutePath());
   
    // --- verify loaded correctly   
    assertEquals(1, config.getDataSources().size());

    jndi = (JNDIDataSource) config.getDataSources().iterator().next();
    assertEquals("select", jndi.getQuery());
    assertEquals(3, jndi.getColumns().size());
    // FIXME: check the columns (kind of hard given lack of ordering)
   
    assertTrue(config.getDataSources(1).isEmpty());
    assertTrue(config.getDataSources(2).isEmpty());
    assertEquals(config.getThreshold(), 0.85);
    assertEquals(config.getMaybeThreshold(), 0.7);
    assertEquals(3, config.getProperties().size());

    Property prop = config.getPropertyByName("ID");
    assertTrue("ID property lost", prop.isIdProperty());

    prop = config.getPropertyByName("NAME");
    assertEquals(lev.getClass(), prop.getComparator().getClass());
    assertEquals(0.3, prop.getLowProbability());
    assertEquals(0.8, prop.getHighProbability());

    prop = config.getPropertyByName("EMAIL");
    assertEquals(lev.getClass(), prop.getComparator().getClass());
    assertEquals(0.3, prop.getLowProbability());
    assertEquals(0.8, prop.getHighProbability());
  }
View Full Code Here

    }

    // if asked to, write config
    if (outfile != null) {
      try {
        Configuration b = population.getBestConfiguration().getConfiguration();
        ConfigWriter.write(b, outfile);
      } catch (IOException e) {
        System.err.println("ERROR: Cannot write to '" + outfile + "': " + e);
      }
    }
View Full Code Here

   *                 be null.
   * @return The F-number of the configuration.
   */
  private double evaluate(GeneticConfiguration config,
                          MatchListener listener) {
    Configuration cconfig = config.getConfiguration();
    Processor proc = new Processor(cconfig, database);
    TestFileListener eval = makeEval(cconfig, testdb, proc);

    if (active || incomplete)
      // in active learning the test file is incomplete, so F-number eval
      // should be optimistic. similarly if the test file is known to be
      // incomplete, for whatever reason
      eval.setPessimistic(false);

    proc.addMatchListener(eval);
    TestFileListener seval = null;
    if (scientific) {
      seval = makeEval(cconfig, ((LinkFileOracle) oracle).getLinkDatabase(),
                       proc);
      seval.setPessimistic(true);
      proc.addMatchListener(seval);
    }
    if (listener != null)
      proc.addMatchListener(listener);
    if (cconfig.isDeduplicationMode())
      proc.linkRecords(cconfig.getDataSources());
    else
      proc.linkRecords(cconfig.getDataSources(2), false);

    if (seval != null)
      sciencetracker.put(config, seval.getFNumber());

    config.setFNumber(eval.getFNumber());
View Full Code Here

        r2 = secondary.get(id2);

      List<GeneticConfiguration> configs = population.getConfigs();
      boolean[] believers = new boolean[configs.size()];
      for (int ix = 0; ix < configs.size(); ix++) {
        Configuration config = configs.get(ix).getConfiguration();
        Processor proc = new Processor(config, database);
        believers[ix] = proc.compare(r1, r2) > config.getThreshold();
      }
      return believers;
    }
View Full Code Here

      System.err.println("ERROR: scientific mode requires a test file");
      System.exit(1);
    }

    // get started
    Configuration config = ConfigLoader.load(argv[0]);
    GeneticAlgorithm genetic =
      new GeneticAlgorithm(config, testfile,
                           parser.getOptionState("scientific"));
    genetic.setPopulation(parser.getOptionInteger("population", 100));
    genetic.setGenerations(parser.getOptionInteger("generations", 100));
View Full Code Here

TOP

Related Classes of no.priv.garshol.duke.Configuration

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.