Package it.unimi.dsi.lang

Examples of it.unimi.dsi.lang.MutableString


    final FastBufferedReader terms = new FastBufferedReader( new InputStreamReader( new FileInputStream( inputBasename + DiskBasedIndex.TERMS_EXTENSION ), "UTF-8" ) );
   
    for( int i = 0; i < numIndices; i++ ) localTerms[ i ] = new PrintWriter( new OutputStreamWriter( new FastBufferedOutputStream( new FileOutputStream( localBasename[ i ] + DiskBasedIndex.TERMS_EXTENSION ) ), "UTF-8" ) );

    // The current term
    final MutableString currTerm = new MutableString();
   
    pl.itemsName = "terms";
    pl.logInterval = logInterval;
    pl.start( "Partitioning index terms..." );

    int termNumber = 0, k;
   
    while( terms.readLine( currTerm ) != null ) {
      k = strategy.localIndex( termNumber ); // The local index for this term
      if ( numTerms[ k ] != strategy.localNumber( termNumber ) ) throw new IllegalStateException();
      numTerms[ k ]++;
      currTerm.println( localTerms[ k ] );
      pl.update();
      termNumber++;
    }

    terms.close();
View Full Code Here


      if ( posNumBits != null ) localPosNumBits[ i ] = new OutputBitStream( localBasename[ i ] + DiskBasedIndex.POSITIONS_NUMBER_OF_BITS_EXTENSION );
      localOffsets[ i ].writeGamma( 0 );
    }

    // The current term
    final MutableString currTerm = new MutableString();
   
    pl.expectedUpdates = ( new File( inputBasename + DiskBasedIndex.INDEX_EXTENSION ).length() + ( isHighPerformance ? new File( inputBasename + DiskBasedIndex.POSITIONS_EXTENSION ).length() : 0 ) ) * 8;
    pl.itemsName = "bits";
    pl.logInterval = logInterval;
    pl.start( "Partitioning index..." );

    int termNumber = 0, k, prevK = -1, previousHeaderLength = 0, newHeaderLength = 0;
    long length, count, positionsOffset = 0;
    int res, frequency;
   
    while( terms.readLine( currTerm ) != null ) {
      k = strategy.localIndex( termNumber ); // The local index for this term
      if ( numTerms[ k ] != strategy.localNumber( termNumber ) ) throw new IllegalStateException();
      numTerms[ k ]++;
     
      if ( isHighPerformance ) {
        final long temp = globalIndex.readBits();
        positionsOffset = globalIndex.readLongDelta();
        previousHeaderLength = (int)( globalIndex.readBits() - temp );
        if ( prevK != -1 ) {
          length = positionsOffset - globalPositions.readBits();
          pl.count += length;
          while( length > 0 ) {
            res = (int)Math.min( bufferSize * 8, length );
            globalPositions.read( buffer, res );
            localPositionsStream[ prevK ].write( buffer, res );
            length -= res;
          }
        }
        newHeaderLength = localIndexStream[ k ].writeLongDelta( localPositionsStream[ k ].writtenBits() );
      }
     
     
      frequency = frequencies.readGamma();
      localFrequencies[ k ].writeGamma( frequency );
      numberOfPostings[ k ] += frequency;

      if ( posNumBits != null ) localPosNumBits[ k ].writeGamma( posNumBits.readGamma() );
     
      count = globCounts.readLongGamma();
      numberOfOccurrences[ k ] += count;
      localGlobCounts[ k ].writeLongGamma( count );
     
      currTerm.println( localTerms[ k ] );
     
      length = offsets.readLongGamma() - previousHeaderLength;
      localOffsets[ k ].writeLongGamma( length + newHeaderLength );
      pl.count += length + previousHeaderLength - 1;
     
View Full Code Here

    public Document getDocument( final InputStream rawContent, final Reference2ObjectMap<Enum<?>,Object> metadata ) throws IOException {
      return new AbstractDocument() {
        final DataInputStream rawContentDataInputStream = new DataInputStream( rawContent );
        int nextFieldToRead = 0;
        final MutableString uri = new MutableString();
       
        {
          uri.readSelfDelimUTF8( rawContent ).compact();
        }
       
        @Override
        public void close() throws IOException {
          super.close();
          rawContent.close();
        }
       
        public CharSequence title() {
          return (CharSequence)metadata.get( MetadataKeys.TITLE );
        }
       
        public String toString() {
          return title().toString();
        }

        public CharSequence uri() {
          return uri.length() == 0 ? null : uri;
        }
       
        /** Skips until the end of the current field, and increments <code>nextFieldToRead</code>.
         * @throws ClassNotFoundException
         * @throws IOException
         */
        private void skipOneField() throws IOException, ClassNotFoundException {
          switch( fieldType( nextFieldToRead ) ) {
          case TEXT:
            MutableString word = new MutableString();
            MutableString nonWord = new MutableString();
            do {
              word.readSelfDelimUTF8( rawContent );
              if ( exact ) nonWord.readSelfDelimUTF8( rawContent );
            } while ( word.length() > 0 || ( exact && nonWord.length() > 0 ) );
            break;
          case VIRTUAL:
            final int nfrag = rawContentDataInputStream.readInt();
            for ( int i = 0; i < 2 * nfrag; i++ ) MutableString.skipSelfDelimUTF8( rawContent );
            break;
          default: // Non-text and non-virtual
            new ObjectInputStream( rawContent ).readObject();
          }
          nextFieldToRead++;
        }
       
        /** Skips to the given field.
         *
         * @param field the field to skip to.
         * @throws IOException
         * @throws ClassNotFoundException
         */
        private void skipToField( final int field ) throws IOException, ClassNotFoundException {
          if ( nextFieldToRead > field ) throw new IllegalStateException( "Trying to skip to field " + field + " after " + nextFieldToRead );
          while ( nextFieldToRead < field ) skipOneField();
        }

        public Object content( final int field ) {
          ensureFieldIndex( field );
          Object result = null;
          if ( DEBUG ) LOGGER.debug( "Called content(" + field + "); nextField:" + nextFieldToRead );
          try {
            skipToField( field );
            if ( fieldType( nextFieldToRead ) == FieldType.VIRTUAL ) {
              final int nfrag = rawContentDataInputStream.readInt();
              MutableString doc = new MutableString();
              MutableString text = new MutableString();
              VirtualDocumentFragment[] fragArray = new VirtualDocumentFragment[ nfrag ];
              for ( int i = 0; i < nfrag; i++ ) {
                doc.readSelfDelimUTF8( rawContent );
                text.readSelfDelimUTF8( rawContent );
                fragArray[ i ] = new AnchorExtractor.Anchor( doc.copy(), text.copy() );
              }
              result = new ObjectArrayList<VirtualDocumentFragment>( fragArray );
            }
            else if ( fieldType( nextFieldToRead ) != FieldType.TEXT ) {
              result = new ObjectInputStream( rawContent ).readObject();
              if ( DEBUG ) LOGGER.debug( "Read " + result + " from field " + fieldName( nextFieldToRead ) + " of object " + title() );
              nextFieldToRead++;
            }
            else {
              if ( DEBUG ) LOGGER.debug( "Returning reader for " + field );
              result = new Reader() {
                FastBufferedReader fbr = null;
                int f = field;
                public void close() {}
                public int read( final char[] cbuf, final int off, final int len ) throws IOException {
                  if ( fbr == null ) {
                    if ( DEBUG ) LOGGER.debug( "Initialising reader for content " + f );
                    MutableString text = new MutableString();
                    MutableString word = new MutableString();
                    MutableString nonWord = new MutableString();
                    do {
                      text.append( word.readSelfDelimUTF8( rawContent ) );
                      if ( exact ) text.append( nonWord.readSelfDelimUTF8( rawContent ) );
                      else text.append( ' ' );
                    } while ( word.length() > 0 || ( exact && nonWord.length() > 0 ) );
                    fbr = new FastBufferedReader( text );
                    nextFieldToRead++;
                  }
                  return fbr.read( cbuf, off, len );
                }
              };
            }
          } catch ( IOException e ) {
            throw new RuntimeException( e );
          } catch (ClassNotFoundException e) {
            throw new RuntimeException( e );
          }
          return result;
        }

        public WordReader wordReader( final int field )  {
          ensureFieldIndex( field );
          if ( DEBUG ) LOGGER.debug( "Called wordReader(" + field + ")" );
          try {
            skipToField( field );
          } catch ( Exception e ) {
            throw new RuntimeException( e );
          }
          //logger.debug( "Asked for a new word reader for field " + fieldName( field ) );
          switch ( fieldType( field ) ) {
          case TEXT:
            return new WordReader() {
              private static final long serialVersionUID = 1L;
              public boolean next( final MutableString word, final MutableString nonWord ) throws IOException {
                try {
                  word.readSelfDelimUTF8( rawContent );
                }
                catch( EOFException e ) {
                  return false; // TODO: a bit raw
                }
                nonWord.length( 0 );
               
                if ( exact ) {
                  try {
                    nonWord.readSelfDelimUTF8( rawContent );
                  }
                  catch( EOFException e ) {
                    return true; // TODO: a bit raw
                  }
                }
                else nonWord.append( ' ' );

                final boolean goOn = word.length() != 0 || ( exact && nonWord.length() != 0 );
                if ( DEBUG ) LOGGER.debug( "Got word <" + word + "|" + nonWord + "> exact=" + exact + " returning " + goOn );
                if ( ! goOn ) nextFieldToRead++;
                return goOn;
              }
              public WordReader setReader( final Reader reader ) {
View Full Code Here

  }

  public IndexIterator documents( final CharSequence term ) throws IOException {
    remoteIndexIterator.flush();
    outputStream.writeByte( RemoteIndexReader.DOCUMENTS_BY_NAME );
    new MutableString( term ).writeSelfDelimUTF8( (OutputStream)outputStream );
    outputStream.flush();
    remoteIndexIterator.term( term );
    // Read frequency
    remoteIndexIterator.reset( inputStream.readInt() );
    remoteIndexIterator.prefetchDocs( false );
View Full Code Here

  public Not( final Query query ) {
    this.query = query;
  }
 
  public String toString() {
    return new MutableString().append( "! (" ).append( query ).append( ')' ).toString();
  }
View Full Code Here

    result.setProperty( BitStreamIndex.PropertyKeys.SKIPQUANTUM, variableQuanta ? 0 : quantum );
    result.setProperty( BitStreamIndex.PropertyKeys.SKIPHEIGHT, height );
    if ( COOKIES ) result.setProperty( "cookies", true );
    // We save all flags, except for the PAYLOAD component, which is just used internally.
    for( Map.Entry<Component,Coding> e: flags.entrySet() )
      if ( e.getKey() != Component.PAYLOADS ) result.addProperty( Index.PropertyKeys.CODING, new MutableString().append( e.getKey() ).append( ':' ).append( e.getValue() ) );
    return result;
  }
View Full Code Here

  public MultiTerm( final Query... query ) {
    super( query );
    final ObjectOpenHashSet<MutableString> s = new ObjectOpenHashSet<MutableString>( query.length );
    for( Query q : query ) {
      if ( ! ( q instanceof Term ) && ! ( ( q instanceof Weight ) && ( ((Weight)q).query instanceof Term ) ) ) throw new IllegalArgumentException();
      s.add( new MutableString( q instanceof Term ? ((Term)q).term : ((Term)((Weight)q).query ).term ) );
    }
    if ( s.size() != query.length ) throw new IllegalArgumentException( "Multiterm nodes require distinct terms" );
  }
View Full Code Here

            }
          }

          private Reader joinAddresses( final Address address[] ) {
            if ( address == null ) return NullReader.getInstance();
            final MutableString s = new MutableString();
            if ( address != null ) {
              for( int i = 0; i < address.length; i++ ) {
                if ( i > 0 ) s.append( ", " );
                s.append( address[ i ] );
              }
            }
            return new FastBufferedReader( s );
          }
         
View Full Code Here

            outputStream.writeInt( indexIterator.frequency() );
            outputStream.flush();
            break;
           
          case RemoteIndexReader.DOCUMENTS_BY_NAME:
            indexIterator = indexReader.documents( new MutableString().readSelfDelimUTF8( (InputStream)inputStream ) );
            outputStream.writeInt( indexIterator.frequency() );
            outputStream.flush();
            break;
           
          case RemoteIndexReader.SKIP_TO:
View Full Code Here

        try {
          ensureConnection();
          remoteConnection.outputStream.writeByte( RemoteTermMap.GET_TERM );
          remoteConnection.outputStream.writeInt( index );
          remoteConnection.outputStream.flush();
          return new MutableString().readSelfDelimUTF8( (InputStream)remoteConnection.inputStream );
        }
        catch ( Exception e ) {
            throw new RuntimeException( e );
        }
      }
View Full Code Here

TOP

Related Classes of it.unimi.dsi.lang.MutableString

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.