Package it.unimi.dsi.fastutil.ints

Examples of it.unimi.dsi.fastutil.ints.IntList


   * @throws IOException
   */

  public static IntList readSizesSuccinct( final CharSequence filename, final int N ) throws IOException {
    LOGGER.debug( "Loading sizes..." );
    final IntList sizes = new AbstractIntList() {
      final EliasFanoLongBigList list = new EliasFanoLongBigList( new GammaCodedIterableList( BinIO.loadBytes( filename ), N ) );

      public int getInt( int index ) {
        return (int)list.getLong( index );
      }
View Full Code Here


    final Coding positionCoding = flags.get( Component.POSITIONS );
   
    if ( countCoding == null && positionCoding != null ) throw new IllegalArgumentException( "Index " + basename + " has positions but no counts (this can't happen)" );
   
    // Load document sizes if forced to do so, or if the pointer/position compression methods make it necessary.
    IntList sizes = null;
    // TODO: quick patch to avoid loading sizes in case of payloads.
    if ( payload == null && ( documentSizes || positionCoding == Coding.GOLOMB || positionCoding == Coding.INTERPOLATIVE ) ) {
      sizes = queryProperties != null && queryProperties.containsKey( UriKeys.SUCCINCTSIZES ) ? readSizesSuccinct( basename + DiskBasedIndex.SIZES_EXTENSION, numberOfDocuments ) : readSizes( basename + DiskBasedIndex.SIZES_EXTENSION, numberOfDocuments );
      if ( sizes.size() != numberOfDocuments ) throw new IllegalStateException( "The length of the size list (" + sizes.size() + ") is not equal to the number of documents (" + numberOfDocuments + ")" );
    }
   
    // Load offsets if forced to do so. Depending on a property, we use the core-memory or the semi-external version.
    final LongList offsets;
    // TODO: quick patch to avoid loading sizes in case of payloads.
View Full Code Here

    String[] localBasename = properties.getStringArray( PropertyKeys.LOCALINDEX );
    Index[] localIndex = new Index[ localBasename.length ];
    for( int i = 0; i < localIndex.length ; i++ ) localIndex[ i ] = Index.getInstance( localBasename[ i ], randomAccess, documentSizes );

    final int numberOfDocuments = properties.getInt( Index.PropertyKeys.DOCUMENTS );
    final IntList sizes = queryProperties != null && queryProperties.containsKey( Index.UriKeys.SIZES ) ?
        DiskBasedIndex.readSizes( queryProperties.get( Index.UriKeys.SIZES ), numberOfDocuments ) : null;

    if ( sizes != null && documentSizes ) LOGGER.warn( "You are loading both local sizes and a global size file specified by the \"size\" properties, which is usually nonsensical" );

    boolean hasCounts = true;
View Full Code Here

    }
  }
 
  public void run() throws Exception {
    final ProgressLogger pl = new ProgressLogger( LOGGER, logInterval );
    final IntList sizeList = globalIndex.sizes;
    partitionSizes();
   
    final int[] position = new int[ globalIndex.maxCount ]
    final int[] localFrequency = new int[ numIndices ]
    final int[] usedIndex = new int[ numIndices ];
    final InputBitStream[] direct = new InputBitStream[ numIndices ];
    final InputBitStream[] indirect = new InputBitStream[ numIndices ];
    final BloomFilter[] bloomFilter = bloomFilterPrecision != 0 ? new BloomFilter[ numIndices ] : null;
    final File[] tempFile = new File[ numIndices ];
    final CachingOutputBitStream[] temp = new CachingOutputBitStream[ numIndices ];
    IndexIterator indexIterator;
   
    for ( int i = 0; i < numIndices; i++ ) {
      tempFile[ i ] = new File( localBasename[ i ] + ".temp" );
      temp[ i ] = new CachingOutputBitStream( tempFile[ i ], bufferSize );
      direct[ i ] = new InputBitStream( temp[ i ].buffer() );
      indirect[ i ] = new InputBitStream( tempFile[ i ] );
      if ( bloomFilterPrecision != 0 ) bloomFilter[ i ] = new BloomFilter( globalIndex.numberOfTerms, bloomFilterPrecision );
    }
    int usedIndices;
    MutableString currentTerm = new MutableString();
    Payload payload = null;
    int frequency, globalPointer, localIndex, localPointer, count = -1;

    pl.expectedUpdates = globalIndex.numberOfPostings;
    pl.itemsName = "postings";
    pl.logInterval = logInterval;
    pl.start( "Partitioning index..." );

    for ( int t = 0; t < globalIndex.numberOfTerms; t++ ) {
      terms.readLine( currentTerm );
      indexIterator = indexReader.nextIterator();
      usedIndices = 0;
      frequency = indexIterator.frequency();
     
      for ( int j = 0; j < frequency; j++ ) {
        globalPointer = indexIterator.nextDocument();               
        localIndex = strategy.localIndex( globalPointer )

        if ( localFrequency[ localIndex ] == 0 ) {
          // First time we see a document for this index.
          currentTerm.println( localTerms[ localIndex ] );
          numTerms[ localIndex ]++;
          usedIndex[ usedIndices++ ] = localIndex;
          if ( bloomFilterPrecision != 0 ) bloomFilter[ localIndex ].add( currentTerm );
        }
       
        /* Store temporarily posting data; note that we save the global pointer as we
         * will have to access the size list. */
       
        localFrequency[ localIndex ]++;
        numPostings[ localIndex ]++;
        temp[ localIndex ].writeGamma( globalPointer );

        if ( globalIndex.hasPayloads ) payload = indexIterator.payload();
        if ( havePayloads ) payload.write( temp[ localIndex ] );
       
        if ( haveCounts ) {
          count = indexIterator.count();
          temp[ localIndex ].writeGamma( count );
          globCount[ localIndex ] += count;       
          if ( maxDocPos[ localIndex ] < count ) maxDocPos[ localIndex ] = count;        
          if ( havePositions ) {
            final int[] pos = indexIterator.positionArray();
            // TODO: compress this stuff
            for( int p = 0; p < count; p++ ) temp[ localIndex ].writeGamma( pos[ p ] );
          }
        }
      }
     
      // We now run through the indices used by this term and copy from the temporary buffer.

      OutputBitStream obs;
     
      for( int k = 0; k < usedIndices; k++ ) {
        final int i = usedIndex[ k ];

        localFrequencies[ i ].writeGamma( localFrequency[ i ] );
        if ( haveCounts ) numOccurrences[ i ] += globCount[ i ];
        if ( localGlobCounts[ i ] != null ) localGlobCounts[ i ].writeLongGamma( globCount[ i ] );
        globCount[ i ] = 0;
       
        InputBitStream ibs;
        indexWriter[ i ].newInvertedList();

        temp[ i ].align();
        if ( temp[ i ].buffer() != null ) ibs = direct[ i ];
        else {
          // We cannot read directly from the internal buffer.
          ibs = indirect[ i ];
          ibs.flush();
          temp[ i ].flush();
        }

        ibs.position( 0 );
         
        indexWriter[ i ].writeFrequency( localFrequency[ i ] );
        for( int j = 0; j < localFrequency[ i ]; j++ ) {
          obs = indexWriter[ i ].newDocumentRecord();
          globalPointer = ibs.readGamma();
          localPointer = strategy.localPointer( globalPointer )
          indexWriter[ i ].writeDocumentPointer( obs, localPointer );
          if ( havePayloads ) {
            payload.read( ibs );
            indexWriter[ i ].writePayload( obs, payload );
          }
          if ( haveCounts ) indexWriter[ i ].writePositionCount( obs, count = ibs.readGamma() );
          if ( havePositions ) {
            for( int p = 0; p < count; p++ ) position[ p ] = ibs.readGamma();
            indexWriter[ i ].writeDocumentPositions( obs, position, 0, count, sizeList != null ? sizeList.getInt( globalPointer ) : -1 );
          }
         
        }
        temp[ i ].position( 0 );
        temp[ i ].writtenBits( 0 );
View Full Code Here

   
  }

  public static int[] convertIndexes(FacetDataCache dataCache,String[] vals)
    {
      IntList list = new IntArrayList();
      for (String val : vals)
      {
        int[] range = parse(dataCache,val);
        if ( range!=null)
        {
          for (int i=range[0];i<=range[1];++i)
          {
            list.add(i);
          }
        }
      }
      return list.toIntArray();
    }
View Full Code Here

    this.maxIDs = maxIDList.toIntArray();
  }
 
  private static int[] convertString(FacetDataCache dataCache,String[] vals)
  {
      IntList list = new IntArrayList(vals.length);
      for (int i=0;i<vals.length;++i)
      {
        int index = dataCache.valArray.indexOf(vals[i]);
        if (index>=0)
        {
          list.add(index);
        }
      }
      return list.toIntArray();
  }
View Full Code Here

   * @return the array of order indices of the values.
   */
  public static <T> int[] convert(FacetDataCache<T> dataCache,T[] vals)
  {
    if (vals!=null && (vals instanceof String[])) return convertString(dataCache, (String[])vals);
    IntList list = new IntArrayList(vals.length);
    for (int i=0;i<vals.length;++i)
    {
      int index = dataCache.valArray.indexOfWithType(vals[i]);
      if (index>=0)
      {
        list.add(index);
      }
    }
    return list.toIntArray();
  }
View Full Code Here

    }
    this.freqs[0] = reader.numDocs() - totalFreq;
  }

  private static int[] convertString(FacetDataCache<?> dataCache, String[] vals) {
    IntList list = new IntArrayList(vals.length);
    for (int i = 0; i < vals.length; ++i) {
      int index = dataCache.valArray.indexOf(vals[i]);
      if (index >= 0) {
        list.add(index);
      }
    }
    return list.toIntArray();
  }
View Full Code Here

   * @param vals
   * @return the array of order indices of the values.
   */
  public static <T> int[] convert(FacetDataCache<T> dataCache, T[] vals) {
    if (vals != null && (vals instanceof String[])) return convertString(dataCache, (String[]) vals);
    IntList list = new IntArrayList(vals.length);
    for (int i = 0; i < vals.length; ++i) {
      int index = dataCache.valArray.indexOfWithType(vals[i]);
      if (index >= 0) {
        list.add(index);
      }
    }
    return list.toIntArray();
  }
View Full Code Here

      if (delDocs!=null && delDocs.size() > 0)
      {
        ZoieIndexReader<R> reader= openIndexReader();
        if (reader!=null)
        {
        IntList delList = new IntArrayList(delDocs.size());
        DocIDMapper idMapper = reader.getDocIDMaper();
        LongIterator iter = delDocs.iterator();
         
        while(iter.hasNext()){
          long uid = iter.nextLong();
          if (ZoieIndexReader.DELETED_UID!=uid){
            int docid = idMapper.getDocID(uid);
            if (docid!=DocIDMapper.NOT_FOUND){
              delList.add(docid);
            }
          }
        }
        delArray = delList.toIntArray();
        }
      }
       
      if (delArray!=null && delArray.length > 0)
      {
View Full Code Here

TOP

Related Classes of it.unimi.dsi.fastutil.ints.IntList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.