Package it.unimi.dsi.io

Examples of it.unimi.dsi.io.FileLinesCollection


    JSAPResult jsapResult = jsap.parse( arg );
    if ( jsap.messagePrinted() ) return;
   
    String uri[] = null;
    if ( jsapResult.getString( "uris" ) != null ) {
      Collection<MutableString> lines = new FileLinesCollection( jsapResult.getString( "uris" ), "ASCII" ).allLines();
      uri = new String[ lines.size() ];
      int i = 0;
      for( Object l: lines ) uri[ i++ ] = l.toString();
    }
   
View Full Code Here


    }
   
   
    LOGGER.info( "Creating term maps (class: " + termMapClass.getSimpleName() + ")..." );
    for( int i = 0; i < indexedField.length; i++ )
      BinIO.storeObject( StringMaps.synchronize( termMapClass.getConstructor( Iterable.class ).newInstance( new FileLinesCollection( basenameField[ i ] + DiskBasedIndex.TERMS_EXTENSION, "UTF-8" ) ) ), basenameField[ i ] + DiskBasedIndex.TERMMAP_EXTENSION );
   
    LOGGER.info( "Indexing completed." );
  }
View Full Code Here

          outputStream.write( '\n' );
        }
        pl.done();
        outputStream.close();
      }
      collection = new FileLinesCollection( termFile, "UTF-8" );
    }
    LOGGER.debug( "Building function..." );
    final int width = jsapResult.getInt( "width" );
    if ( jsapResult.getBoolean( "sorted" ) ) BinIO.storeObject( new URLMPHVirtualDocumentResolver( new ShiftAddXorSignedStringMap( collection.iterator(), new TwoStepsLcpMonotoneMinimalPerfectHashFunction<CharSequence>( collection, iso ? TransformationStrategies.prefixFreeIso() : TransformationStrategies.prefixFreeUtf16() ), width ) ), resolverName );
    else BinIO.storeObject( new URLMPHVirtualDocumentResolver( new ShiftAddXorSignedStringMap( collection.iterator(), new MWHCFunction<CharSequence>( collection, iso ? TransformationStrategies.iso() : TransformationStrategies.utf16() ), width ) ), resolverName );
View Full Code Here

      final String basenameField = basename + (stem ? "-" + factory.fieldName( indexedField[ i ] ) : "" );
      index[ i ] = Index.getInstance( basenameField );
      if ( ! ( index[ i ] instanceof BitStreamIndex ) ) allBitStreamIndices = false;
     
      if ( termLists ) {
        terms[ i ] = new ObjectArrayList<MutableString>( new FileLinesCollection( basenameField + DiskBasedIndex.TERMS_EXTENSION, "UTF-8" ).allLines() );
        numberOfTerms[ i ] = terms[ i ].size();
      }
      else numberOfTerms[ i ] = index[ i ].numberOfTerms;
      totalTerms += numberOfTerms[ i ];
     
View Full Code Here

  static {
    Util.ensureLog4JIsConfigured( Level.INFO );
  }

  private static StringMap<? extends CharSequence> createMap( String basename ) throws IOException {
    FileLinesCollection flc = new FileLinesCollection( basename, "UTF-8" );
    return new ShiftAddXorSignedStringMap( flc.iterator(), new MWHCFunction<CharSequence>( flc, TransformationStrategies.utf16() ) );
  }
View Full Code Here

    // For the text part, we need term maps to call sameIndex()
    String[] localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
    for ( String index : localIndex ) BinIO.storeObject( createMap(index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );

    sameContent( basename + "-text", basename + "-text-part", new FileLinesCollection( basename + "-text" + TERMS_EXTENSION, "UTF-8" ).iterator() );

    sameContent( basename + "-int", basename + "-int-part" );
    sameContent( basename + "-date", basename + "-date-part" );

    localIndex = new Properties( basename + "-virtual-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
    for ( String index : localIndex )
      BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );

    sameContent( basename + "-virtual", basename + "-virtual-part", new FileLinesCollection( basename + "-virtual" + TERMS_EXTENSION, "UTF-8" ).iterator() );

    localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
    new Concatenate( basename + "-text-merged", localIndex, false, 1024, flags, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
    if ( quantum >= 0 ) sameIndex( basename + "-text", basename + "-text-merged", "batches", flags.containsKey( Component.COUNTS ) ? "" : "occurrences" );
    sameContent( basename + "-text", basename + "-text-merged" );
View Full Code Here

        1024 * 1024, DEFAULT_LOG_INTERVAL ).run();

    String[] localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
    for ( String index : localIndex )
      BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );
    sameContent( basename + "-text", basename + "-text-part", new FileLinesCollection( basename + "-text" + TERMS_EXTENSION, "UTF-8" ).iterator() );

    sameContent( basename + "-int", basename + "-int-part" );
    sameContent( basename + "-date", basename + "-date-part" );

    localIndex = new Properties( basename + "-virtual-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
    for ( String index : localIndex )
      BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );
    sameContent( basename + "-virtual", basename + "-virtual-part", new FileLinesCollection( basename + "-virtual" + TERMS_EXTENSION, "UTF-8" ).iterator() );

    localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );

    new Merge( basename + "-text-merged", localIndex, false, 1024, flags, interleaved, quantum != 0, quantum, height, 1024 * 1024, DEFAULT_LOG_INTERVAL ).run();
    if ( ! interleaved && quantum >= 0 ) sameIndex( basename + "-text", basename + "-text-merged", "batches" );
View Full Code Here

    new PartitionLexically( basename + "-virtual", basename + "-virtual-part", uniform, basename + "-strategy", 1024, DEFAULT_LOG_INTERVAL ).run();

    String[] localIndex = new Properties( basename + "-text-part" + PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
    for ( String index : localIndex )
      BinIO.storeObject( createMap( index + TERMS_EXTENSION ), index + TERMMAP_EXTENSION );
    sameContent( basename + "-text", basename + "-text-part", new FileLinesCollection( basename + "-text" + TERMS_EXTENSION, "UTF-8" ).iterator() );
    sameContent( basename + "-virtual", basename + "-virtual-part" );
  }
View Full Code Here

   
    final String basename = File.createTempFile( getClass().getSimpleName(), "test" ).getCanonicalPath();
        new IndexBuilder( basename, new StringArrayDocumentCollection( "A B", "B", "A", "A" ) ).run();
    BinIO.storeObject( DocumentalStrategies.uniform( 2, 4 ), basename + "-strategy" );
    new PartitionDocumentally( basename + "-text", basename + "-cluster", DocumentalStrategies.uniform( 2, 4 ), basename + "-strategy", 0, 1024, CompressionFlags.DEFAULT_STANDARD_INDEX, true, false, 0, 0, 0, ProgressLogger.DEFAULT_LOG_INTERVAL ).run();
    FileLinesCollection flc;
    flc = new FileLinesCollection( basename + "-cluster-0.terms", "ASCII" );
    BinIO.storeObject( new ShiftAddXorSignedStringMap( flc.iterator(), new MWHCFunction<CharSequence>( flc , TransformationStrategies.utf16() ) ), basename + "-cluster-0.termmap" )
    flc = new FileLinesCollection( basename + "-cluster-1.terms", "ASCII" );
    BinIO.storeObject( new ShiftAddXorSignedStringMap( flc.iterator(), new MWHCFunction<CharSequence>( flc , TransformationStrategies.utf16() ) ), basename + "-cluster-1.termmap" )
    Index index = Index.getInstance( basename + "-cluster" );
    assertEquals( Integer.MAX_VALUE, index.documents( "b" ).skipTo( 2 ) );
  }
View Full Code Here

    indexFBody = DiskBasedIndex.getInstance( bodyFBasename + "-text", true, true );
    indexFTitle = DiskBasedIndex.getInstance( titleFBasename + "-text", true, true );
    indexBodyBis = DiskBasedIndex.getInstance( bodyBasenameBis + "-text", true, true );

    new Paste( basenameFComb, new String[] { bodyFBasename + "-text", titleFBasename + "-text" }, true, true, 1024, null, 1024, CompressionFlags.DEFAULT_STANDARD_INDEX, true, false, -1, -1, -1, 10 ).run();
    immutableExternalPrefixMap = new ImmutableExternalPrefixMap( new FileLinesCollection( basenameFComb + ".terms", "UTF-8" ) );
    simpleParser = new SimpleParser( index0.termProcessor );
  }
View Full Code Here

TOP

Related Classes of it.unimi.dsi.io.FileLinesCollection

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.