Package it.unimi.dsi.mg4j.document

Examples of it.unimi.dsi.mg4j.document.DocumentFactory.fieldType()


    final int numberOfIndexedFields = indexedField.length;
    if ( numberOfIndexedFields == 0 ) throw new IllegalArgumentException( "You must specify at least one field" );
    final DocumentFactory factory = documentSequence.factory();
    final File tempDir = tempDirName == null ? null : new File( tempDirName );
    for ( int i = 0; i < indexedField.length; i++ )
      if ( factory.fieldType( indexedField[ i ] ) == DocumentFactory.FieldType.VIRTUAL && ( virtualDocumentResolver == null || virtualDocumentResolver[ i ] == null ) ) throw new IllegalArgumentException(
          "No resolver was associated with virtual field " + factory.fieldName( indexedField[ i ] ) );

    final int[] map = mapFile != null ? BinIO.loadInts( mapFile ) : null;

    final Scan[] scan = new Scan[ numberOfIndexedFields ]; // To scan textual content
View Full Code Here


    if ( documentSequence instanceof DocumentCollection ) pl.expectedUpdates = ( (DocumentCollection)documentSequence ).size();
 
   
    for ( int i = 0; i < numberOfIndexedFields; i++ ) {
      final String fieldName = factory.fieldName( indexedField[ i ] );
      switch ( factory.fieldType( indexedField[ i ] ) ) {
      case TEXT:
        scan[ i ] = new Scan( basename + '-' + fieldName, fieldName, completeness, termProcessor, map != null ? IndexingType.REMAPPED
            : IndexingType.STANDARD, 0, 0, bufferSize, builder, tempDir );
        break;
      case VIRTUAL:
View Full Code Here

    while ( ( document = iterator.nextDocument() ) != null ) {
     
      long overallTerms = 0;
      if ( building ) builder.startDocument( document.title(), document.uri() );
      for ( int i = 0; i < numberOfIndexedFields; i++ ) {
        switch ( factory.fieldType( indexedField[ i ] ) ) {
        case TEXT:
          reader = (Reader)document.content( indexedField[ i ] );
          wordReader = document.wordReader( indexedField[ i ] );
          wordReader.setReader( reader );
          if ( building ) builder.startTextField();
View Full Code Here

        else if ( overallTerms >= maxTerms ) LOGGER.warn( "Too many terms (" + overallTerms + "): writing a batch of " + documentsInBatch + " documents" );
        else if ( compacted && percAvailableMemory < PERC_AVAILABLE_MEMORY_DUMP ) LOGGER.warn( "Available memory below " + PERC_AVAILABLE_MEMORY_DUMP + "%: writing a batch of " + documentsInBatch + " documents" );

        long occurrences = 0;
        for ( int i = 0; i < numberOfIndexedFields; i++ ) {
          switch ( factory.fieldType( indexedField[ i ] ) ) {
          case TEXT:
          case VIRTUAL:
            occurrences += scan[ i ].dumpBatch();
            scan[ i ].openSizeBitStream();
            break;
View Full Code Here

    iterator.close();
    if ( builder != null ) builder.close();

    for ( int i = 0; i < numberOfIndexedFields; i++ ) {
      switch ( factory.fieldType( indexedField[ i ] ) ) {
      case TEXT:
      case VIRTUAL:
        scan[ i ].close();
        break;
      default:
View Full Code Here

      final int numberOfFields = factory.numberOfFields();
     
      LOGGER.debug( "ParsingFactory declares " + numberOfFields + " fields"  );
     
      for( int field = 0; field < numberOfFields; field++ ) {
        if ( factory.fieldType( field ) != FieldType.TEXT ) fields.add( StringEscapeUtils.escapeHtml( document.content( field ).toString() ) );
        else fields.add( StringEscapeUtils.escapeHtml( IOUtils.toString( (Reader)document.content( field ) ) ).replaceAll( "\n", "<br>\n" ) );
      }
      context.put( "title", document.title() );
      context.put( "fields", fields );
      context.put( "factory", factory );
View Full Code Here

    final DocumentFactory factory = documentSequence.factory();
    if ( indexedFields.isEmpty() ) {
      // We index everything
      for( int i = 0; i < factory.numberOfFields(); i++ )
        if ( factory.fieldType( i ) != FieldType.VIRTUAL || virtualDocumentResolvers.containsKey( i ) ) indexedFields.add( i );
    }
   
    final int[] indexedField = indexedFields.toIntArray();
    final String[] basenameField = new String[ indexedField.length ];
    for( int i = 0; i < indexedField.length; i++ ) basenameField[ i ] = basename + "-" + factory.fieldName( indexedField[ i ] );
View Full Code Here

   
    final File batchDir = batchDirName == null ? null : new File( batchDirName );

    for ( int i = 0; i < indexedField.length; i++ ) {
      final int batches;
      if ( factory.fieldType( indexedField[ i ] ) == DocumentFactory.FieldType.VIRTUAL ) {
        batches = new Properties( basenameField[ i ] + DiskBasedIndex.PROPERTIES_EXTENSION ).getInt( Index.PropertyKeys.BATCHES );
        final String[] inputBasename = new String[ batches ];
        for( int j = 0; j < inputBasename.length; j++ ) inputBasename[ j ] = Scan.batchBasename( j, basenameField[ i ], batchDir );
        new Paste( basenameField[ i ], inputBasename, false, false, combineBufferSize, batchDir, pasteBufferSize, standardWriterFlags, interleaved, skips, quantum, height, skipBufferSize, logInterval ).run();
      }
View Full Code Here

        new Paste( basenameField[ i ], inputBasename, false, false, combineBufferSize, batchDir, pasteBufferSize, standardWriterFlags, interleaved, skips, quantum, height, skipBufferSize, logInterval ).run();
      }
      else {
        final String[] inputBasename = new Properties( basenameField[ i ] + Scan.CLUSTER_PROPERTIES_EXTENSION ).getStringArray( IndexCluster.PropertyKeys.LOCALINDEX );
        batches = inputBasename.length;
        if ( factory.fieldType( indexedField[ i ] ) == DocumentFactory.FieldType.TEXT ) {
          if ( mapFile != null ) new Merge( basenameField[ i ], inputBasename, false, combineBufferSize, standardWriterFlags, interleaved, skips, quantum, height, skipBufferSize, logInterval ).run();
          else new Concatenate( basenameField[ i ], inputBasename, false, combineBufferSize, standardWriterFlags, interleaved, skips, quantum, height, skipBufferSize, logInterval ).run();
        }
        else {
          if ( mapFile != null ) new Merge( basenameField[ i ], inputBasename, false, combineBufferSize, payloadWriterFlags, interleaved, skips, quantum, height, skipBufferSize, logInterval ).run();
View Full Code Here

    final IntArrayList[] payloadPointers = new IntArrayList[ n ];
    final ObjectArrayList<Object>[] payloadContent = new ObjectArrayList[ n ];

    for ( int i = 0; i < n; i++ ) {
      field[ i ] = factory.fieldIndex( index[ i ].field );
      switch ( factory.fieldType( field[ i ] ) ) {
      case VIRTUAL:
        currMaxPos[ i ] = new int[ resolver.numberOfDocuments() ];
      case TEXT:
        termMap[ i ] = new Object2ObjectOpenHashMap<MutableString, ObjectArrayList<int[]>>();
        break;
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.