Examples of wordReader()


Examples of it.unimi.di.big.mg4j.document.Document.wordReader()

        for (int i = 0; i < min(k, results.size()); i++) {
          Document document = collection
              .document(results.get(i).document);
          Reader reader = (Reader) document.content(fieldIndex);
          WordReader wordReader = document.wordReader(fieldIndex);
          wordReader.setReader(reader);

          MutableString word = new MutableString();
          MutableString nonWord = new MutableString();
          final LongRBTreeSet set = new LongRBTreeSet();
View Full Code Here

Examples of it.unimi.di.big.mg4j.document.Document.wordReader()

        long unknown = index.getUnknownTermId();

        Multiset<Long> words = HashMultiset.create();

        for (int contentId : contents) {
            final WordReader reader = doc.wordReader(0);

            // Loop over terms
            while (reader.next(token, separator)) {
                final Long termId = index.getTermId(token);
                if (termId == unknown) continue;
View Full Code Here

Examples of it.unimi.dsi.mg4j.document.Document.wordReader()

                // TODO: this must be in increasing field order
                if ( d == null ) d = documentCollection.document( document );
                int fieldIndex = documentCollection.factory().fieldIndex( index.field );
                if ( fieldIndex == -1 || documentCollection.factory().fieldType( fieldIndex ) != DocumentFactory.FieldType.TEXT ) continue;
                final Reader reader = (Reader)d.content( fieldIndex );
                s.appendAndMark( d.wordReader( fieldIndex ).setReader( reader ) );
                s.endField();
                d.close();
                output.println( index.field + ": " + s.toString() );
              }
            }
View Full Code Here

Examples of it.unimi.dsi.mg4j.document.Document.wordReader()

                  final int field = documentCollection.factory().fieldIndex( sortedIndex[ j ].field );
                  // If the field is not present (e.g., because of parallel indexing) or it is not text we skip
                  if ( field == -1 || documentCollection.factory().fieldType( field ) != DocumentFactory.FieldType.TEXT ) continue;
                  LOGGER.debug( "Found intervals for " + sortedIndex[ j ].field + " (" + field + ")" );
                  final Reader content = (Reader)document.content( field );
                  snippet.startField( selectedInterval ).appendAndMark( document.wordReader( field ).setReader( content ) ).endField();
                }
                if ( LOGGER.isDebugEnabled() ) LOGGER.debug( sortedIndex[ j ].field + ": " + ( selectedInterval == null ? null : Arrays.asList( selectedInterval ) ) );
                document.close();
              }
             
View Full Code Here

Examples of it.unimi.dsi.mg4j.document.Document.wordReader()

      if ( building ) builder.startDocument( document.title(), document.uri() );
      for ( int i = 0; i < numberOfIndexedFields; i++ ) {
        switch ( factory.fieldType( indexedField[ i ] ) ) {
        case TEXT:
          reader = (Reader)document.content( indexedField[ i ] );
          wordReader = document.wordReader( indexedField[ i ] );
          wordReader.setReader( reader );
          if ( building ) builder.startTextField();
          scan[ i ].processDocument( map != null ? map[ documentPointer ] : documentPointer, wordReader );
          if ( building ) builder.endTextField();
          overallTerms += scan[ i ].numTerms;
View Full Code Here

Examples of it.unimi.dsi.mg4j.document.Document.wordReader()

          if ( building ) builder.endTextField();
          overallTerms += scan[ i ].numTerms;
          break;
        case VIRTUAL:
          fragments = (ObjectList<VirtualDocumentFragment>)document.content( indexedField[ i ] );
          wordReader = document.wordReader( indexedField[ i ] );
          virtualDocumentResolver[ i ].context( document );
          for( VirtualDocumentFragment fragment: fragments ) {
            int virtualDocumentPointer = virtualDocumentResolver[ i ].resolve( fragment.documentSpecifier() );
            if ( virtualDocumentPointer < 0 ) continue;
            if ( map != null ) virtualDocumentPointer = map[ virtualDocumentPointer ];
View Full Code Here

Examples of it.unimi.dsi.mg4j.document.Document.wordReader()

            else {
              // text index
              pos = 0;
              termsInDoc[ i ].clear();
              reader = (Reader)content;
              wordReader = document.wordReader( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) );
              wordReader.setReader( reader );
              while( wordReader.next( word, nonWord ) ) {
                if ( word.length() == 0 || index[ i ].termProcessor != null && ! index[ i ].termProcessor.processTerm( word ) ) continue;
                if ( ( t = (int)( (BitStreamIndex)index[ i ] ).termMap.getLong( word ) ) == -1 ) LOGGER.error( index[ i ] + ": Could not find term " + word + " in term index" );
                else {
View Full Code Here

Examples of it.unimi.dsi.mg4j.document.Document.wordReader()

              }
            }
            else {
              pos = 0;
              reader = (Reader)content;
              wordReader = document.wordReader( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) );
              wordReader.setReader( reader );
              while( wordReader.next( word, nonWord ) ) {
                if ( word.length() == 0 || index[ i ].termProcessor != null && ! index[ i ].termProcessor.processTerm( word ) ) continue;
                IndexIterator indexIterator = indexReader[ i ].documents( word );
                if ( currDoc != indexIterator.skipTo( currDoc ) )
View Full Code Here

Examples of it.unimi.dsi.mg4j.document.Document.wordReader()

    while ( ( document = iterator.nextDocument() ) != null ) {
      for ( int i = 0; i < field.length; i++ ) {
        switch ( factory.fieldType( field[ i ] ) ) {
        case TEXT:
          processDocument( document.wordReader( field[ i ] ).setReader( (Reader)document.content( field[ i ] ) ), map == null ? documentIndex : map[ documentIndex ], 0, termMap[ i ],
              index[ i ].termProcessor );
          break;
        case VIRTUAL:
          ObjectArrayList<VirtualDocumentFragment> fragments = (ObjectArrayList<VirtualDocumentFragment>)document.content( field[ i ] );
          resolver.context( document );
View Full Code Here

Examples of it.unimi.dsi.mg4j.document.Document.wordReader()

            int d = resolver.resolve( fragment.documentSpecifier() );

            if ( d != -1 ) {
              if ( map != null ) d = map[ d ];
              if ( maxDoc[ i ] < d ) maxDoc[ i ] = d;
              currMaxPos[ i ][ d ] = processDocument( document.wordReader( field[ i ] ).setReader( new FastBufferedReader( fragment.text() ) ), d, currMaxPos[ i ][ d ], termMap[ i ],
                  index[ i ].termProcessor )
                  + gap;
            }
          }
          break;
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.