Package gnu.trove

Examples of gnu.trove.TIntHashSet


    if (trainingProportion.value > 0) {
      if (clusterings.size() > 1)
        throw new IllegalArgumentException("Expect one clustering to do train/test split, not " + clusterings.size());
      Clustering clustering = clusterings.get(0);
      int targetTrainSize = (int)(trainingProportion.value * clustering.getNumInstances());
      TIntHashSet clustersSampled = new TIntHashSet();
      Randoms random = new Randoms(123);
      LabelAlphabet lalph = new LabelAlphabet();
      InstanceList trainingInstances = new InstanceList(new Noop(null, lalph));
      while (trainingInstances.size() < targetTrainSize) {
        int cluster = random.nextInt(clustering.getNumClusters());
        if (!clustersSampled.contains(cluster)) {
          clustersSampled.add(cluster);
          InstanceList instances = clustering.getCluster(cluster);
          for (int i = 0; i < instances.size(); i++) {
            Instance inst = instances.get(i);
            trainingInstances.add(new Instance(inst.getData(), lalph.lookupLabel(new Integer(cluster)), inst.getName(), inst.getSource()));
          }
        }
      }
      trainingInstances.shuffle(random);
      Clustering trainingClustering = createSmallerClustering(trainingInstances);
     
      InstanceList testingInstances = new InstanceList(null, lalph);
      for (int i = 0; i < clustering.getNumClusters(); i++) {
        if (!clustersSampled.contains(i)) {
          InstanceList instances = clustering.getCluster(i);
          for (int j = 0; j < instances.size(); j++) {
            Instance inst = instances.get(j);
            testingInstances.add(new Instance(inst.getData(), lalph.lookupLabel(new Integer(i)), inst.getName(), inst.getSource()));
          }         
View Full Code Here


    int totalX = 0;
    int totalY = 0;

    int key, x, y;

    TIntHashSet distinctKeys = new TIntHashSet();
    distinctKeys.addAll(countsX.keys());
    distinctKeys.addAll(countsY.keys());

    TIntIterator iterator = distinctKeys.iterator();
    while (iterator.hasNext()) {
      key = iterator.next();

      x = 0;
      if (countsX.containsKey(key)) {
View Full Code Here

    int indices[];
    int maxSparseIndex = -1;
    int maxDenseIndex = -1;

    // First, we find the union of all the indices used in the instances
    TIntHashSet hIndices = new TIntHashSet(instances.getDataAlphabet().size());

    while (instanceItr.hasNext()) {
      instance = (Instance) instanceItr.next();
      v = (SparseVector) (instance.getData());
      indices = v.getIndices();

      if (indices != null) {
        hIndices.addAll(indices);

        if (indices[indices.length - 1] > maxSparseIndex)
          maxSparseIndex = indices[indices.length - 1];
      } else // dense
      if (v.numLocations() > maxDenseIndex)
        maxDenseIndex = v.numLocations() - 1;
    }

    if (maxDenseIndex > -1) // dense vectors were present
    {
      if (maxSparseIndex > maxDenseIndex)
      // sparse vectors were present and they had greater indices than
      // the dense vectors
      {
        // therefore, we create sparse vectors and
        // add all the dense indices
        for (int i = 0; i <= maxDenseIndex; i++)
          hIndices.add(i);
      } else
      // sparse indices may have been present, but we don't care
      // since they never had indices that exceeded those of the
      // dense vectors
      {
        return mean(instances, maxDenseIndex + 1);
      }
    }

    // reaching this statement implies we can create a sparse vector
    return mean(instances, hIndices.toArray());

  }
View Full Code Here

     * term id. This is acceptable, as documents are assumed to have sufficiently small postings that
     * they can fit in memory */
   
    List<Posting> postingList = new ArrayList<Posting>();
    int doclen = 0;
    TIntHashSet foundIds = new TIntHashSet();
    while(documentPostings.hasNext())
    {
      final Posting p = documentPostings.next().asWritablePosting();
      //check for duplicate pointers
      if (! foundIds.contains(p.getId()) )
      {
        postingList.add(p);
        doclen += p.getFrequency();
        reporter.progress();
        foundIds.add(p.getId());
      }
      else
      {
        dupPointers++;
      }
View Full Code Here

             * @param _queryid String the query identifier.
             */
            public QrelsHashSet(String _queryid){
                    this.queryid = _queryid;
                    nonRelDocnos = new THashSet<String>();
                    relGrade = new TIntHashSet();
                    relGradeDocnosMap = new TIntObjectHashMap<THashSet<String>>();
            }
View Full Code Here

   * @param resultFilename String the filename
   *        of the result file to evaluate.
   */
  public void evaluate(String resultFilename) {
  //  //logger.info("Result file: "+resultFilename);
    queryNumbers = new TIntHashSet();
    recipRank = new TIntDoubleHashMap();
    //initialise the arr
    arr = 0.0d;
    inTop50 = 0;
    inTop20 = 0;
View Full Code Here

  /** Insert a term into this document, occurs at given block id, and in the given field */
  public void insert(String t, int fieldId, int blockId)
  {
    super.insert(t, fieldId);
    TIntHashSet blockids = null;
    if ((blockids = term_blocks.get(t)) == null)
    {
      term_blocks.put(t, blockids = new TIntHashSet(/*TODO */));
    }
    blockids.add(blockId);
    blockCount++;
  }
 
View Full Code Here

  /** Insert a term into this document, occurs at given block id, and in the given fields */
  public void insert(String t, int[] fieldIds, int blockId)
  {
    super.insert(t, fieldIds);
    TIntHashSet blockids = null;
    if ((blockids = term_blocks.get(t)) == null)
    {
      term_blocks.put(t, blockids = new TIntHashSet(/*TODO */));
    }
    blockids.add(blockId);
    blockCount++;
  }
 
View Full Code Here

  /** Insert a term into this document tf times, occurs at given block id, and in the given fields */
  public void insert(int tf, String t, int[] fieldIds, int blockId)
  {
    super.insert(tf, t, fieldIds);
    TIntHashSet blockids = null;
    if ((blockids = term_blocks.get(t)) == null)
    {
      term_blocks.put(t, blockids = new TIntHashSet(/*TODO */));
    }
    blockids.add(blockId);
    blockCount++;
  }
 
View Full Code Here

      {
        termids[i] = TermCodes.getCode(a);
        tfs[i] = b;
        for(int fi=0;fi<fieldCount;fi++)
          fields[fi][i] = field_occurrences[fi].get(a);
        final TIntHashSet ids = term_blocks.get(a);
        blockfreqs[i] = ids.size();
        this.blockTotal += ids.size();
        final int[] bids = ids.toArray();
        Arrays.sort(bids);
        term2blockids.put(termids[i], bids);
        i++;
        return true;
      }
View Full Code Here

TOP

Related Classes of gnu.trove.TIntHashSet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.