Package gnu.trove

Examples of gnu.trove.TIntHashSet$HashProcedure


      {
        termids[i] = TermCodes.getCode(a);
        tfs[i] = b;
        for(int fi=0;fi<fieldCount;fi++)
          fields[fi][i] = field_occurrences[fi].get(a);
        final TIntHashSet ids = term_blocks.get(a);
        blockfreqs[i] = ids.size();
        this.blockTotal += ids.size();
        final int[] bids = ids.toArray();
        Arrays.sort(bids);
        term2blockids.put(termids[i], bids);
        i++;
        return true;
      }
View Full Code Here


 
  /** Insert a term into this document, occurs at given block id */
  public void insert(String t, int blockId)
  {
    insert(t);
    TIntHashSet blockids = null;
    if ((blockids = term_blocks.get(t)) == null)
    {
      term_blocks.put(t, blockids = new TIntHashSet(/*TODO */));
    }
    blockids.add(blockId);
    blockCount++; 
  }
 
View Full Code Here

      int blockTotal = 0;
      public boolean execute(final String a, final int b)
      {
        termids[i] = getTermId(a);
        tfs[i] = b;
        final TIntHashSet ids = term_blocks.get(a);
        blockfreqs[i] = ids.size();
        blockTotal += ids.size();
        final int[] bids = ids.toArray();
        Arrays.sort(bids);
        term2blockids.put(termids[i], bids);
        //System.err.println(a+": tid="+termids[i]+" tf="+tfs[i]+" bf="+blockfreqs[i] +" blocks="+Arrays.toString(bids));
        i++;
        return true;
View Full Code Here

      int blockTotal = 0;
      public boolean execute(final String a, final int b)
      {
        termids[i] = getTermId(a);
        tfs[i] = b;
        final TIntHashSet ids = term_blocks.get(a);
        blockfreqs[i] = ids.size();
        blockTotal += ids.size();
        final int[] bids = ids.toArray();
        Arrays.sort(bids);
        term2blockids.put(termids[i], bids);
        //System.err.println(a+": tid="+termids[i]+" tf="+tfs[i]+" bf="+blockfreqs[i] +" blocks="+Arrays.toString(bids));
        i++;
        return true;
View Full Code Here

   * @param arr1
   * @param arr2
   * @return intersection
   */
  public static int[] intersection(int[] arr1, int[] arr2) {
    TIntHashSet set = new TIntHashSet();
    set.addAll(arr1);
   
    Arrays.sort(arr2);
    TIntArrayList list = new TIntArrayList();
    for (int i : arr2) {
      if (set.contains(i)) {
        list.add(i);
      }
    }
    return list.toNativeArray();
  }
View Full Code Here

   * @param arr1
   * @param arr2
   * @return int[]
   */
  public static int[] union(int[] arr1, int[] arr2) {
    TIntHashSet set = new TIntHashSet();
    set.addAll(arr1);
    set.addAll(arr2);

    int[] arr = set.toArray();
    Arrays.sort(arr);
   
    return arr;
  }
View Full Code Here

      JobConf jc = HadoopPlugin.getJobFactory("testSplits").newJob();
      HadoopUtility.toHConfiguration(index, jc);
      BitPostingIndexInputFormat.setStructures(jc, "direct", "document");
      InputSplit[] splits = informat.getSplits(jc, 2);
     
      TIntHashSet termIds = new TIntHashSet();
     
      long tokens = 0;
      long pointers = 0;
      int docid = 0;
     
      for(InputSplit split : splits)
      {
        RecordReader<IntWritable, IntObjectWrapper<IterablePosting>> rr = informat.getRecordReader(split, jc, null);
        IntWritable key = rr.createKey();
        IntObjectWrapper<IterablePosting> value = rr.createValue();
        while(rr.next(key, value))
        {
          docid = key.get();
          int doclen = 0int docpointers = 0;
          IterablePosting ip = value.getObject();
          assertEquals("Number of pointers for docid " + docid + " is incorrect", documentPointers[docid], value.getInt());
          while(ip.next() != IterablePosting.EOL)
          {
            //System.err.println("termid" +ip.getId() + " f=" + ip.getFrequency());
            termIds.add(ip.getId());
            tokens += ip.getFrequency();
            doclen += ip.getFrequency();
            pointers++; docpointers++;
            if (numberOfTerms > 0)
              assertTrue("Got too big a termid ("+ip.getId()+") from direct index input stream, numTerms=" + numberOfTerms, ip.getId() < maxTermId);
          }
          if (documentPointers.length > 0)
            assertEquals("Number of pointers for docid " + docid + " is incorrect", documentPointers[docid], docpointers);
          assertEquals("Document length for docid "+docid+" is incorrect", documentLengths[docid], doclen);
        }
      }
      CollectionStatistics cs = index.getCollectionStatistics();
      assertEquals("Number of documents is incorrect", cs.getNumberOfDocuments(), docid + 1);
      assertEquals("Number of pointers is incorrect", cs.getNumberOfPointers(), pointers);
      assertEquals("Number of tokens is incorrect", cs.getNumberOfTokens(), tokens);
      if (numberOfTerms > 0)
      {
        assertEquals("Not all termIds found in direct index", termIds.size(), numberOfTerms);
      }
    }
View Full Code Here

    mqt.setTermProperty("quick|waggily");
    mqt.setDefaultTermWeightingModel(new DLH13());
    rs = matching.match("query1", mqt);
    assertNotNull(rs);
    assertEquals(2, rs.getResultSize());
    TIntHashSet docids = new TIntHashSet(rs.getDocids());
    System.err.println("" + rs.getDocids()[0] + " "+ rs.getScores()[0]);
    System.err.println("" + rs.getDocids()[1] + " "+ rs.getScores()[1]);
    assertTrue(docids.contains(0));
    assertTrue(docids.contains(2));
    assertEquals(2, rs.getDocids()[0]);
    assertEquals(0, rs.getDocids()[1]);
    assertTrue(rs.getScores()[0] > 0);
    assertTrue(rs.getScores()[1] > 0);
   
View Full Code Here

      //make a note of this term for the stream checking
      checkFreqs.put(foundTerm, CHECK_TERMS_DFS[i]);
    }
   
    //check as stream
    TIntHashSet termIds = new TIntHashSet();
    Iterator<Map.Entry<String, LexiconEntry>> lexIn = (Iterator<Entry<String, LexiconEntry>>) index.getIndexStructureInputStream("lexicon");
    int count = 0;
    while(lexIn.hasNext())
    {
      Map.Entry<String, LexiconEntry> lee = lexIn.next();
      assertNotNull(lee);
      assertNotNull(lee.getKey());
      assertTrue(lee.getKey().length() > 1);
      assertNotNull(lee.getValue());
      if (checkFreqs.containsKey(lee.getKey()))
      {
        assertEquals(checkFreqs.get(lee.getKey()), lee.getValue().getDocumentFrequency());
        checkFreqs.remove(lee.getKey());
      }
      termIds.add(lee.getValue().getTermId());
      count++;
    }
    assertEquals(NUMBER_UNIQUE_TERMS, count);
    assertEquals(NUMBER_UNIQUE_TERMS, termIds.size());
    assertEquals(0, StaTools.min(termIds.toArray()));
    assertEquals(NUMBER_UNIQUE_TERMS-1, StaTools.max(termIds.toArray()));
    assertTrue("Not all terms found in lexicon as stream", checkFreqs.size() == 0);
    IndexUtil.close(lexIn);
  }
View Full Code Here

    IndexUtil.close(lexIn);
  }
 
 
  public void checkDirectIndex(Index index, int maxTermId, int numberOfTerms, int documentLengths[], int[] documentPointers) throws Exception {
    TIntHashSet termIds = new TIntHashSet();
   
    long tokens = 0;
    long pointers = 0;
    int docid = 0;
    final PostingIndexInputStream piis = (PostingIndexInputStream) index.getIndexStructureInputStream("direct");
    assertNotNull("No direct index input stream found", piis);
    while(piis.hasNext())
    {
      IterablePosting ip = piis.next();
      int doclen = 0int docpointers = 0;   
      docid += piis.getEntriesSkipped();
      //System.err.println("getEntriesSkipped=" + piis.getEntriesSkipped());
      //System.err.println("docid=" + docid);
      while(ip.next() != IterablePosting.EOL)
      {
        //System.err.println("termid" +ip.getId() + " f=" + ip.getFrequency());
        termIds.add(ip.getId());
        tokens += ip.getFrequency();
        doclen += ip.getFrequency();
        pointers++; docpointers++;
        if (numberOfTerms > 0)
          assertTrue("Got too big a termid ("+ip.getId()+") from direct index input stream, numTerms=" + numberOfTerms, ip.getId() < maxTermId);
      }
      if (documentPointers.length > 0)
        assertEquals("Numebr of pointers for docid " + docid + " is incorrect", documentPointers[docid], docpointers);
      assertEquals("Document length for docid "+docid+" is incorrect", documentLengths[docid], doclen);
      docid++;
    }
   
    CollectionStatistics cs = index.getCollectionStatistics();
    assertEquals("Number of documents is incorrect", cs.getNumberOfDocuments(), docid);
    assertEquals("Number of pointers is incorrect", cs.getNumberOfPointers(), pointers);
    assertEquals("Number of tokens is incorrect", cs.getNumberOfTokens(), tokens);
    if (numberOfTerms > 0)
    {
      assertEquals("Not all termIds found in direct index", termIds.size(), numberOfTerms);
    }
  }
View Full Code Here

TOP

Related Classes of gnu.trove.TIntHashSet$HashProcedure

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.