Package it.unimi.dsi.util

Examples of it.unimi.dsi.util.Properties


    return bitsForFrequencies + bitsForPointers + bitsForPayloads + bitsForCounts + bitsForPositions + bitsForPositionsOffsets +
    towerData.bitsForTowers() + bitsForQuantumBitLengths + bitsForVariableQuanta + bitsForPositionsQuantumBitLengths + bitsForEntryBitLengths;
  }

  public Properties properties() {
    Properties result = new Properties();
    result.setProperty( Index.PropertyKeys.DOCUMENTS, numberOfDocuments );
    result.setProperty( Index.PropertyKeys.TERMS, currentTerm + 1 );
    result.setProperty( Index.PropertyKeys.POSTINGS, numberOfPostings );
    result.setProperty( Index.PropertyKeys.MAXCOUNT, maxCount );
    result.setProperty( Index.PropertyKeys.INDEXCLASS, FileHPIndex.class.getName() );
    result.setProperty( BitStreamIndex.PropertyKeys.SKIPQUANTUM, variableQuanta ? 0 : quantum );
    result.setProperty( BitStreamIndex.PropertyKeys.SKIPHEIGHT, height );
    if ( COOKIES ) result.setProperty( "cookies", true );
    // We save all flags, except for the PAYLOAD component, which is just used internally.
    for( Map.Entry<Component,Coding> e: flags.entrySet() )
      if ( e.getKey() != Component.PAYLOADS ) result.addProperty( Index.PropertyKeys.CODING, new MutableString().append( e.getKey() ).append( ':' ).append( e.getValue() ) );
    return result;
  }
View Full Code Here


          prevOffset = index.writtenBits();
        }

        totPostings += postings;

        final Properties properties = new Properties();
        properties.setProperty( Index.PropertyKeys.DOCUMENTS, documentCount );
        properties.setProperty( Index.PropertyKeys.TERMS, numTerms );
        properties.setProperty( Index.PropertyKeys.POSTINGS, postings );
        properties.setProperty( Index.PropertyKeys.MAXCOUNT, maxCount );
        properties.setProperty( Index.PropertyKeys.INDEXCLASS, FileIndex.class.getName() );
        properties.addProperty( Index.PropertyKeys.CODING, "FREQUENCIES:GAMMA" );
        properties.addProperty( Index.PropertyKeys.CODING, "POINTERS:DELTA" );
        if ( completeness.compareTo( Completeness.COUNTS ) >= 0 ) properties.addProperty( Index.PropertyKeys.CODING, "COUNTS:GAMMA" );
        if ( completeness.compareTo( Completeness.POSITIONS ) >= 0 ) properties.addProperty( Index.PropertyKeys.CODING, "POSITIONS:DELTA" );
        properties.setProperty( Index.PropertyKeys.TERMPROCESSOR, ObjectParser.toSpec( termProcessor ) );
        properties.setProperty( Index.PropertyKeys.OCCURRENCES, numOccurrences );
        properties.setProperty( Index.PropertyKeys.MAXDOCSIZE, maxDocSize );
        properties.setProperty( Index.PropertyKeys.SIZE, index.writtenBits() );
        if ( field != null ) properties.setProperty( Index.PropertyKeys.FIELD, field );
        properties.save( batchBasename + DiskBasedIndex.PROPERTIES_EXTENSION );
        index.close();
        offsets.close();
        posNumBits.close();

      }
      else {
        final IndexWriter indexWriter = new BitStreamIndexWriter( batchBasename, maxDocInBatch + 1, true, flags );

        ByteArrayPostingList bapl;
        OutputBitStream obs;
        int maxCount = -1, maxFrequency = 0, frequency;
        // Compute max frequency and allocate position array.
        for ( ByteArrayPostingList b : termMap.values() ) {
          b.close();
          b.align();
          if ( maxFrequency < b.frequency ) maxFrequency = b.frequency;
          if ( maxCount < b.maxCount ) maxCount = b.maxCount;
        }

        final long[] bitPos = new long[ maxFrequency ];
        final int[] pointer = new int[ maxFrequency ];
        int[] pos = new int[ maxCount ];
        final boolean hasCounts = completeness.compareTo( Completeness.COUNTS ) >= 0;
        final boolean hasPositions = completeness.compareTo( Completeness.POSITIONS ) >= 0;
        int count = -1, moreCount = -1;
       
        for ( int i = 0; i < numTerms; i++ ) {
          bapl = termMap.get( termArray[ i ] );
          final InputBitStream ibs = new InputBitStream( bapl.buffer );
          frequency = bapl.frequency; // This could be much more than the actual frequency in virtual indices

          // Calculate posting bit positions and corresponding pointers
          for ( int j = 0; j < frequency; j++ ) {
            bitPos[ j ] = ibs.readBits(); // Cache bit poisition
            pointer[ j ] = ibs.readDelta(); // Cache pointer
            if ( hasCounts ) count = ibs.readGamma() + 1;
            if ( hasPositions ) ibs.skipDeltas( count ); // Skip document positions
          }

          // Sort stably pointers and positions by increasing pointer
          it.unimi.dsi.fastutil.Arrays.quickSort( 0, frequency, new AbstractIntComparator() {
            public int compare( final int i0, final int i1 ) {
              final int t = pointer[ i0 ] - pointer[ i1 ];
              if ( t != 0 ) return t;
              final long u = bitPos[ i0 ] - bitPos[ i1 ]; // We need a stable sort
              return u < 0 ? -1 : u > 0 ? 1 : 0;
            }
          },
          new Swapper() {
            public void swap( final int i0, final int i1 ) {
              final long t = bitPos[ i0 ]; bitPos[ i0 ] = bitPos[ i1 ]; bitPos[ i1 ] = t;
              final int p = pointer[ i0 ]; pointer[ i0 ] = pointer[ i1 ]; pointer[ i1 ] = p;
            }
          } );

          int actualFrequency = frequency;
          // Compute actual frequency for virtual indices
          if ( indexingIsVirtual ) {
            actualFrequency = 1;
            for ( int j = 1; j < frequency; j++ ) if ( pointer[ j ] != pointer[ j - 1 ] ) actualFrequency++;
            if ( ASSERTS ) {
              for ( int j = 1; j < frequency; j++ ) {
                assert pointer[ j ] >= pointer[ j - 1 ];
                assert pointer[ j ] != pointer[ j - 1 ] || bitPos[ j ] > bitPos[ j - 1 ];
              }
            }
          }

          indexWriter.newInvertedList();
          indexWriter.writeFrequency( actualFrequency );

          int currPointer;
          for ( int j = 0; j < frequency; j++ ) {
            ibs.position( bitPos[ j ] );
            obs = indexWriter.newDocumentRecord();
            indexWriter.writeDocumentPointer( obs, currPointer = ibs.readDelta() );
            if ( ASSERTS ) assert currPointer == pointer[ j ];
            if ( hasCounts ) count = ibs.readGamma() + 1;
            if ( hasPositions ) {
              ibs.readDeltas( pos, count );
              for ( int p = 1; p < count; p++ ) pos[ p ] += pos[ p - 1 ] + 1;
            }

            if ( indexingIsVirtual ) {
              while( j < frequency - 1 ) {
                ibs.position( bitPos[ j + 1 ] );
                if ( currPointer != ibs.readDelta() ) break;
                j++;
                if ( hasCounts ) moreCount = ibs.readGamma() + 1;
                if ( hasPositions ) {
                  pos = IntArrays.grow( pos, count + moreCount, count );
                  pos[ count ] = ibs.readDelta();
                  if ( ASSERTS ) assert pos[ count ] > pos[ count - 1 ];
                  for ( int p = 1; p < moreCount; p++ ) pos[ count + p ] = pos[ count + p - 1 ] + 1 + ibs.readDelta();
                }
                count += moreCount;
              }
              if ( maxCount < count ) maxCount = count;
            }

            if ( hasCounts ) indexWriter.writePositionCount( obs, count );
            if ( hasPositions ) indexWriter.writeDocumentPositions( obs, pos, 0, count, -1 );
          }

          frequencies.writeGamma( actualFrequency );
          globCounts.writeLongGamma( bapl.globCount );
        }

        indexWriter.close();
        final Properties properties = indexWriter.properties();
        totPostings += properties.getLong( "postings" );
        properties.setProperty( Index.PropertyKeys.TERMPROCESSOR, ObjectParser.toSpec( termProcessor ) );
        properties.setProperty( Index.PropertyKeys.OCCURRENCES, numOccurrences );
        properties.setProperty( Index.PropertyKeys.MAXDOCSIZE, maxDocSize );
        properties.setProperty( Index.PropertyKeys.SIZE, indexWriter.writtenBits() );
        if ( field != null ) properties.setProperty( Index.PropertyKeys.FIELD, field );
        properties.save( batchBasename + DiskBasedIndex.PROPERTIES_EXTENSION );

        if ( indexingIsRemapped ) {
          // We must permute sizes
          final int[] document = new int[ documentCount ], size = new int[ documentCount ];
          final InputBitStream sizes = new InputBitStream( batchBasename + DiskBasedIndex.SIZES_EXTENSION );
View Full Code Here

    towerData.bitsForLowerBitSkips + towerData.bitsForTowerLengths +
    bitsForQuantumBitLengths + bitsForEntryBitLengths;
  }
 
  public Properties properties() {
    Properties result = super.properties();
    result.setProperty( Index.PropertyKeys.INDEXCLASS, FileIndex.class.getName() );
    result.setProperty( BitStreamIndex.PropertyKeys.SKIPQUANTUM, variableQuanta ? 0 : quantum );
    result.setProperty( BitStreamIndex.PropertyKeys.SKIPHEIGHT, height );
    return result; 
  }
View Full Code Here

    numIndices = strategy.numberOfLocalIndices();

    final Coding positionCoding = writerFlags.get( Component.POSITIONS );

    inputProperties = new Properties( inputBasename + DiskBasedIndex.PROPERTIES_EXTENSION );
    globalIndex = DiskBasedIndex.getInstance( inputBasename, inputProperties, false, positionCoding == Coding.GOLOMB || positionCoding == Coding.INTERPOLATIVE, false, null );
    indexReader = globalIndex.getReader();

    localBasename = new String[ numIndices ];
    for( int i = 0; i < numIndices; i++ ) localBasename[ i ] = outputBasename + "-" + i;
View Full Code Here

      pl.update();
    }

    pl.done();

    Properties globalProperties = new Properties();
    globalProperties.setProperty( Index.PropertyKeys.FIELD, inputProperties.getProperty( Index.PropertyKeys.FIELD ) );
    globalProperties.setProperty( Index.PropertyKeys.TERMPROCESSOR, inputProperties.getProperty( Index.PropertyKeys.TERMPROCESSOR ) );
   
    for ( int i = 0; i < numIndices; i++ ) {
      localFrequencies[ i ].close();
      if ( localGlobCounts[ i ] != null ) localGlobCounts[ i ].close();
      localTerms[ i ].close();
      indexWriter[ i ].close();
      if ( bloomFilterPrecision != 0 ) BinIO.storeObject( bloomFilter[ i ], localBasename[ i ] + DocumentalCluster.BLOOM_EXTENSION );
      temp[ i ].close();
      tempFile[ i ].delete();
     
      Properties localProperties = indexWriter[ i ].properties();
      localProperties.addAll( globalProperties );
      localProperties.setProperty( Index.PropertyKeys.MAXCOUNT, String.valueOf( maxDocPos[ i ] ) );
      localProperties.setProperty( Index.PropertyKeys.MAXDOCSIZE, maxDocSize[ i ] );
      localProperties.setProperty( Index.PropertyKeys.FIELD, globalProperties.getProperty( Index.PropertyKeys.FIELD ) );
      localProperties.setProperty( Index.PropertyKeys.OCCURRENCES, haveCounts ? numOccurrences[ i ] : -1 );
      localProperties.setProperty( Index.PropertyKeys.POSTINGS, numPostings[ i ] );
      localProperties.setProperty( Index.PropertyKeys.TERMS, numTerms[ i ] );
      if ( havePayloads ) localProperties.setProperty( Index.PropertyKeys.PAYLOADCLASS, payload.getClass().getName() );
      if ( strategyProperties[ i ] != null ) localProperties.addAll( strategyProperties[ i ] );
      localProperties.save( localBasename[ i ] + DiskBasedIndex.PROPERTIES_EXTENSION );
    }

    if ( strategyFilename != null ) globalProperties.setProperty( IndexCluster.PropertyKeys.STRATEGY, strategyFilename );
    for( int i = 0; i < numIndices; i++ ) globalProperties.addProperty( IndexCluster.PropertyKeys.LOCALINDEX, localBasename[ i ] );
    globalProperties.setProperty( DocumentalCluster.PropertyKeys.BLOOM, bloomFilterPrecision != 0 );
View Full Code Here

   * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded (this
   * feature might not be available with some kind of index).
   * @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or <code>null</code>.
   */
  public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes, final boolean maps, final EnumMap<UriKeys,String> queryProperties ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
    return getInstance( basename, new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION ), randomAccess, documentSizes, maps, queryProperties );
  }
View Full Code Here

   * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded (this
   * feature might not be available with some kind of index).
   * @see #getInstance(CharSequence, boolean, boolean, boolean, EnumMap)
   */
  public static BitStreamIndex getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes, final boolean maps ) throws ConfigurationException, ClassNotFoundException, IOException, InstantiationException, IllegalAccessException {
    return getInstance( basename, new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION ), randomAccess, documentSizes, maps, null );
  }
View Full Code Here

   * might be loaded anyway because the compression method for positions requires it).
   * @param queryProperties a map containing associations between {@link it.unimi.dsi.mg4j.index.Index.UriKeys} and values, or <code>null</code>.
   */
  @SuppressWarnings("unchecked")
  static public Index getInstance( final CharSequence basename, final boolean randomAccess, final boolean documentSizes, final EnumMap<UriKeys,String> queryProperties ) throws ConfigurationException, IOException, ClassNotFoundException, SecurityException, URISyntaxException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
    final Properties properties = new Properties( basename + DiskBasedIndex.PROPERTIES_EXTENSION );
    ClusteringStrategy strategy = null;
    Class<? extends ClusteringStrategy> strategyClass = null;
    if ( properties.containsKey( PropertyKeys.STRATEGY ) ) strategy = (ClusteringStrategy)BinIO.loadObject( properties.getString( PropertyKeys.STRATEGY ) );
    else if ( properties.containsKey( PropertyKeys.STRATEGYCLASS ) ) try {
      strategyClass = (Class<? extends ClusteringStrategy>)MG4JClassParser.getParser().parse( properties.getString( PropertyKeys.STRATEGYCLASS ) );
    }
    catch ( ParseException e ) {
      throw new RuntimeException( e );
    }
    else throw new IllegalArgumentException( "Cluster properties must contain either a strategy or a strategy class property" );
    final Class<? extends IndexCluster> indexClass = (Class<? extends IndexCluster>)Class.forName( properties.getString( Index.PropertyKeys.INDEXCLASS, "(missing index class)" ));

    String[] localBasename = properties.getStringArray( PropertyKeys.LOCALINDEX );
    Index[] localIndex = new Index[ localBasename.length ];
    for( int i = 0; i < localIndex.length ; i++ ) localIndex[ i ] = Index.getInstance( localBasename[ i ], randomAccess, documentSizes );

    final int numberOfDocuments = properties.getInt( Index.PropertyKeys.DOCUMENTS );
    final IntList sizes = queryProperties != null && queryProperties.containsKey( Index.UriKeys.SIZES ) ?
        DiskBasedIndex.readSizes( queryProperties.get( Index.UriKeys.SIZES ), numberOfDocuments ) : null;

    if ( sizes != null && documentSizes ) LOGGER.warn( "You are loading both local sizes and a global size file specified by the \"size\" properties, which is usually nonsensical" );

    boolean hasCounts = true;
    boolean hasPositions = true;
    Payload payload = null;
   
    for ( int i = 0; i < localIndex.length; i++ ) {
      hasCounts = hasCounts && localIndex[ i ].hasCounts;
      hasPositions = hasPositions && localIndex[ i ].hasPositions;

      if ( i == 0 ) payload = localIndex[ i ].payload;
      if ( ( payload == null ) != ( localIndex[ i ].payload == null ) || payload != null && ! payload.compatibleWith( localIndex[ i ].payload ) ) throw new IllegalStateException( "The payload specification of index " + localIndex[ 0 ] + " is not compatible with that of index " + localIndex[ i ] );
    }

    // We stem the names of Bloom filters from the index basename.
    BloomFilter[] termFilter = null;
    if ( properties.getBoolean( DocumentalCluster.PropertyKeys.BLOOM ) ) {
      LOGGER.debug( "Loading Bloom filters..." );
      termFilter = new BloomFilter[ localIndex.length ];
      for ( int i = 0; i < localIndex.length; i++ )
        termFilter[ i ] = (BloomFilter)BinIO.loadObject( basename + "-" + i + BLOOM_EXTENSION );
      LOGGER.debug( "Completed." );
    }

    // Let us rebuild the strategy in case it's a chained strategy
    if ( strategyClass != null ) {
      strategy = strategyClass.getConstructor( Index[].class, BloomFilter[].class ).newInstance( localIndex, termFilter );
    }
    else {
      if ( strategy instanceof ChainedLexicalClusteringStrategy ) strategy = new ChainedLexicalClusteringStrategy( localIndex, termFilter );
      else if ( strategy.numberOfLocalIndices() != localBasename.length ) throw new IllegalArgumentException( "The number of local indices of the strategy (" + localIndex.length + ") and the number of local indices specified by the property file (" + localBasename.length + ") differ" );
    }

    if ( LexicalCluster.class.isAssignableFrom( indexClass ) )
      return new LexicalCluster( localIndex, (LexicalClusteringStrategy)strategy, termFilter,
          numberOfDocuments,
          properties.getInt( Index.PropertyKeys.TERMS ),
          properties.getLong( Index.PropertyKeys.POSTINGS ),
          properties.getLong( Index.PropertyKeys.OCCURRENCES ),
          properties.getInt( Index.PropertyKeys.MAXCOUNT ),
          payload, hasCounts, hasPositions,
          Index.getTermProcessor( properties ),
          properties.getString( Index.PropertyKeys.FIELD ),
          sizes,
          properties );
    else if ( DocumentalCluster.class.isAssignableFrom( indexClass ) ) {
      if ( DocumentalConcatenatedCluster.class.isAssignableFrom( indexClass ) )
      return new DocumentalConcatenatedCluster( localIndex, (DocumentalClusteringStrategy)strategy,
          properties.getBoolean( IndexCluster.PropertyKeys.FLAT ),
          termFilter,
          numberOfDocuments,
          properties.getInt( Index.PropertyKeys.TERMS ),
          properties.getLong( Index.PropertyKeys.POSTINGS ),
          properties.getLong( Index.PropertyKeys.OCCURRENCES ),
          properties.getInt( Index.PropertyKeys.MAXCOUNT ),
          payload, hasCounts, hasPositions,
          Index.getTermProcessor( properties ),
          properties.getString( Index.PropertyKeys.FIELD ),
          sizes,
          properties );
      return new DocumentalMergedCluster( localIndex, (DocumentalClusteringStrategy)strategy,
          properties.getBoolean( IndexCluster.PropertyKeys.FLAT ),
          termFilter,
          numberOfDocuments,
          properties.getInt( Index.PropertyKeys.TERMS ),
          properties.getLong( Index.PropertyKeys.POSTINGS ),
          properties.getLong( Index.PropertyKeys.OCCURRENCES ),
          properties.getInt( Index.PropertyKeys.MAXCOUNT ),
          payload, hasCounts, hasPositions,
          Index.getTermProcessor( properties ),
          properties.getString( Index.PropertyKeys.FIELD ),
          sizes,
          properties );
    }
    else throw new IllegalArgumentException( "Unknown IndexCluster implementation: " + indexClass.getName() );
   
View Full Code Here

  }

  public Properties[] properties() {
    Properties[] properties = new Properties[ k ];
    for( int i = 0; i < k; i++ ) {
      properties[ i ] = new Properties();
      properties[ i ].addProperty( "termfrom", cutPointTerm[ i ] )
      properties[ i ].addProperty( "termto", cutPointTerm[ i + 1 ] )
      properties[ i ].addProperty( "termnumberfrom", cutPoint[ i ] )
      properties[ i ].addProperty( "termnumberto", cutPoint[ i + 1 ] )
    }
View Full Code Here

    final File batchDir = batchDirName == null ? null : new File( batchDirName );

    for ( int i = 0; i < indexedField.length; i++ ) {
      final int batches;
      if ( factory.fieldType( indexedField[ i ] ) == DocumentFactory.FieldType.VIRTUAL ) {
        batches = new Properties( basenameField[ i ] + DiskBasedIndex.PROPERTIES_EXTENSION ).getInt( Index.PropertyKeys.BATCHES );
        final String[] inputBasename = new String[ batches ];
        for( int j = 0; j < inputBasename.length; j++ ) inputBasename[ j ] = Scan.batchBasename( j, basenameField[ i ], batchDir );
        new Paste( basenameField[ i ], inputBasename, false, false, combineBufferSize, batchDir, pasteBufferSize, standardWriterFlags, interleaved, skips, quantum, height, skipBufferSize, logInterval ).run();
      }
      else {
        final String[] inputBasename = new Properties( basenameField[ i ] + Scan.CLUSTER_PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
        batches = inputBasename.length;
        if ( factory.fieldType( indexedField[ i ] ) == DocumentFactory.FieldType.TEXT ) {
          if ( mapFile != null ) new Merge( basenameField[ i ], inputBasename, false, combineBufferSize, standardWriterFlags, interleaved, skips, quantum, height, skipBufferSize, logInterval ).run();
          else new Concatenate( basenameField[ i ], inputBasename, false, combineBufferSize, standardWriterFlags, interleaved, skips, quantum, height, skipBufferSize, logInterval ).run();
        }
View Full Code Here

TOP

Related Classes of it.unimi.dsi.util.Properties

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.