Package ivory.core.data.dictionary

Examples of ivory.core.data.dictionary.PrefixEncodedLexicographicallySortedDictionary


    data.put("aaabcb", 7);
    data.put("aad", 8);
    data.put("abd", 9);
    data.put("abde", 10);

    PrefixEncodedLexicographicallySortedDictionary m =
        new PrefixEncodedLexicographicallySortedDictionary(8);

    // Add entries, in order.
    for (String key : data.keySet()) {
      m.add(key);
    }

    // Verify size.
    assertEquals(data.size(), m.size());
    // Verify bidirectional mapping.
    for ( Map.Entry<String, Integer> entry : data.entrySet()) {
      assertEquals((int) entry.getValue(), m.getId(entry.getKey()));
      assertEquals(entry.getKey(), m.getTerm(entry.getValue()));
    }

    Iterator<String> iter1 = m.iterator();
    Iterator<String> iter2 = data.keySet().iterator();
    for (int i=0; i< m.size(); i++) {
      assertTrue(iter1.hasNext());
      assertTrue(iter2.hasNext());

      assertEquals(iter2.next(), iter1.next());
    }
    assertFalse(iter1.hasNext());
    assertFalse(iter2.hasNext());
   
    assertEquals(0.6923077, m.getCompresssionRatio(), 10e-6);

    FileSystem fs = FileSystem.getLocal(new Configuration());
    m.store("tmp.dat", fs);

    PrefixEncodedLexicographicallySortedDictionary n =
        PrefixEncodedLexicographicallySortedDictionary.load(new Path("tmp.dat"), fs);

    // Verify size.
    assertEquals(data.size(), n.size());
    // Verify bidirectional mapping.
    for ( Map.Entry<String, Integer> entry : data.entrySet()) {
      assertEquals((int) entry.getValue(), n.getId(entry.getKey()));
      assertEquals(entry.getKey(), n.getTerm(entry.getValue()));
    }

    iter1 = m.iterator();
    iter2 = data.keySet().iterator();
    for (int i=0; i< m.size(); i++) {
View Full Code Here


  }

  @Test
  public void test2() throws IOException {
    FileSystem fs = FileSystem.getLocal(new Configuration());
    PrefixEncodedLexicographicallySortedDictionary m =
        PrefixEncodedLexicographicallySortedDictionary.loadFromPlainTextFile(
            new Path("etc/dictionary-test.txt"), fs, 8);

    assertEquals(0, m.getId("a"));
    assertEquals(1, m.getId("a1"));
    assertEquals(248, m.getId("aardvark"));
    assertEquals(2291, m.getId("affair"));
    assertEquals(3273, m.getId("airwolf"));
    assertEquals(6845, m.getId("anntaylor"));
    assertEquals(11187, m.getId("augustus"));
    assertEquals(12339, m.getId("azzuz"));

    assertEquals(0.5631129, m.getCompresssionRatio(), 10e-6);

    m.store("tmp.dat", fs);

    PrefixEncodedLexicographicallySortedDictionary n =
        PrefixEncodedLexicographicallySortedDictionary.load(
            new Path("tmp.dat"), fs);

    assertEquals(0, n.getId("a"));
    assertEquals(1, n.getId("a1"));
    assertEquals(248, n.getId("aardvark"));
    assertEquals(2291, n.getId("affair"));
    assertEquals(3273, n.getId("airwolf"));
    assertEquals(6845, n.getId("anntaylor"));
    assertEquals(11187, n.getId("augustus"));
    assertEquals(12339, n.getId("azzuz"));

    fs.delete(new Path("tmp.dat"), true);
  }
View Full Code Here

  // Test the actual dictionary for the TREC corpus.
  @Test
  public void test3() throws IOException {
    FileSystem fs = FileSystem.getLocal(new Configuration());
    PrefixEncodedLexicographicallySortedDictionary dictionary =
        new PrefixEncodedLexicographicallySortedDictionary();

    FSDataInputStream in = fs.open(new Path("etc/trec-index-terms.dat"));
    dictionary.readFields(in);
    in.close();

    assertEquals(312232, dictionary.size());
    // Note: termids start at 0;
    assertEquals("0", dictionary.getTerm(0));
    assertEquals("mainichi", dictionary.getTerm(200000));
    assertEquals("wassberg", dictionary.getTerm(300000));

    // Check bounds.
    assertEquals(null, dictionary.getTerm(-1));
    assertEquals(null, dictionary.getTerm(312232));
  }
View Full Code Here

TOP

Related Classes of ivory.core.data.dictionary.PrefixEncodedLexicographicallySortedDictionary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.