Package it.unimi.dsi.mg4j.document

Examples of it.unimi.dsi.mg4j.document.Document.content()


                s.startField( interval );
                // TODO: this must be in increasing field order
                if ( d == null ) d = documentCollection.document( document );
                int fieldIndex = documentCollection.factory().fieldIndex( index.field );
                if ( fieldIndex == -1 || documentCollection.factory().fieldType( fieldIndex ) != DocumentFactory.FieldType.TEXT ) continue;
                final Reader reader = (Reader)d.content( fieldIndex );
                s.appendAndMark( d.wordReader( fieldIndex ).setReader( reader ) );
                s.endField();
                d.close();
                output.println( index.field + ": " + s.toString() );
              }
View Full Code Here


            }
            else if ( index.hasPayloads && dsi.info.get( index ) == SelectedInterval.TRUE_ARRAY ) {
              if ( d == null ) d = documentCollection.document( document );
              int fieldIndex = documentCollection.factory().fieldIndex( index.field );
              if ( fieldIndex == -1 ) continue;
              output.println( d.content( fieldIndex ) );
            }
          output.println();
        }
      }
    }
View Full Code Here

                if ( selectedInterval != null ) {
                  final int field = documentCollection.factory().fieldIndex( sortedIndex[ j ].field );
                  // If the field is not present (e.g., because of parallel indexing) or it is not text we skip
                  if ( field == -1 || documentCollection.factory().fieldType( field ) != DocumentFactory.FieldType.TEXT ) continue;
                  LOGGER.debug( "Found intervals for " + sortedIndex[ j ].field + " (" + field + ")" );
                  final Reader content = (Reader)document.content( field );
                  snippet.startField( selectedInterval ).appendAndMark( document.wordReader( field ).setReader( content ) ).endField();
                }
                if ( LOGGER.isDebugEnabled() ) LOGGER.debug( sortedIndex[ j ].field + ": " + ( selectedInterval == null ? null : Arrays.asList( selectedInterval ) ) );
                document.close();
              }
View Full Code Here

      long overallTerms = 0;
      if ( building ) builder.startDocument( document.title(), document.uri() );
      for ( int i = 0; i < numberOfIndexedFields; i++ ) {
        switch ( factory.fieldType( indexedField[ i ] ) ) {
        case TEXT:
          reader = (Reader)document.content( indexedField[ i ] );
          wordReader = document.wordReader( indexedField[ i ] );
          wordReader.setReader( reader );
          if ( building ) builder.startTextField();
          scan[ i ].processDocument( map != null ? map[ documentPointer ] : documentPointer, wordReader );
          if ( building ) builder.endTextField();
View Full Code Here

          scan[ i ].processDocument( map != null ? map[ documentPointer ] : documentPointer, wordReader );
          if ( building ) builder.endTextField();
          overallTerms += scan[ i ].numTerms;
          break;
        case VIRTUAL:
          fragments = (ObjectList<VirtualDocumentFragment>)document.content( indexedField[ i ] );
          wordReader = document.wordReader( indexedField[ i ] );
          virtualDocumentResolver[ i ].context( document );
          for( VirtualDocumentFragment fragment: fragments ) {
            int virtualDocumentPointer = virtualDocumentResolver[ i ].resolve( fragment.documentSpecifier() );
            if ( virtualDocumentPointer < 0 ) continue;
View Full Code Here

          }
          if ( building ) builder.virtualField( fragments );
          overallTerms += scan[ i ].numTerms;
          break;
        default:
          Object o = document.content( indexedField[ i ] );
          accumulator[ i ].processData( map != null ? map[ documentPointer ] : documentPointer, o );
          if ( building ) builder.nonTextField( o );
          break;
        }
View Full Code Here

      final int numberOfFields = factory.numberOfFields();
     
      LOGGER.debug( "ParsingFactory declares " + numberOfFields + " fields"  );
     
      for( int field = 0; field < numberOfFields; field++ ) {
        if ( factory.fieldType( field ) != FieldType.TEXT ) fields.add( StringEscapeUtils.escapeHtml( document.content( field ).toString() ) );
        else fields.add( StringEscapeUtils.escapeHtml( IOUtils.toString( (Reader)document.content( field ) ) ).replaceAll( "\n", "<br>\n" ) );
      }
      context.put( "title", document.title() );
      context.put( "fields", fields );
      context.put( "factory", factory );
View Full Code Here

     
      LOGGER.debug( "ParsingFactory declares " + numberOfFields + " fields"  );
     
      for( int field = 0; field < numberOfFields; field++ ) {
        if ( factory.fieldType( field ) != FieldType.TEXT ) fields.add( StringEscapeUtils.escapeHtml( document.content( field ).toString() ) );
        else fields.add( StringEscapeUtils.escapeHtml( IOUtils.toString( (Reader)document.content( field ) ) ).replaceAll( "\n", "<br>\n" ) );
      }
      context.put( "title", document.title() );
      context.put( "fields", fields );
      context.put( "factory", factory );
      return getTemplate( "it/unimi/dsi/mg4j/query/generic.velocity" );
View Full Code Here

       
        while( ( document = documentIterator.nextDocument() ) != null ) {
          currDoc = permutation != null ? permutation[ docCounter ] : docCounter;

          for( i = 0; i < index.length; i++ ) {
            Object content = document.content( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) );
            if ( index[ i ].hasPayloads ) {
              // TODO: write tests for the other case
              if ( allBitStreamIndices ) {
                IndexIterator indexIterator = indexReader[ i ].documents( 0 );
                int pointer = indexIterator.skipTo( currDoc );
View Full Code Here

       
        while( ( document = documentIterator.nextDocument() ) != null ) {
          currDoc = permutation != null ? permutation[ docCounter ] : docCounter;

          for( i = 0; i < index.length; i++ ) {
            Object content = document.content( stem || index[ i ].field == null ? indexedField[ i ] : factory.fieldIndex( index[ i ].field ) );
            if ( index[ i ].hasPayloads ) {
              if ( allBitStreamIndices ) {
                IndexIterator indexIterator = indexReader[ i ].documents( 0 );
                int pointer = indexIterator.skipTo( currDoc );
                if ( pointer == currDoc ) {
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.