Package it.unimi.dsi.mg4j.document

Examples of it.unimi.dsi.mg4j.document.DocumentIterator.nextDocument()


    int documentPointer = 0, documentsInBatch = 0;
    long batchStartTime = System.currentTimeMillis();
    boolean outOfMemoryError = false;

    while ( ( document = iterator.nextDocument() ) != null ) {
     
      long overallTerms = 0;
      if ( building ) builder.startDocument( document.title(), document.uri() );
      for ( int i = 0; i < numberOfIndexedFields; i++ ) {
        switch ( factory.fieldType( indexedField[ i ] ) ) {
View Full Code Here


    ProgressLogger progressLogger = new ProgressLogger( LOGGER, jsapResult.getLong( "logInterval" ), "documents" );
    if ( documentSequence instanceof DocumentCollection ) progressLogger.expectedUpdates = ((DocumentCollection)documentSequence).size();
    progressLogger.start( "Scanning..." );
   
    while( ( document = documentIterator.nextDocument() ) != null ) {
      if ( uriStream != null ) {
        s.replace( document.uri() );
        s.replace( LINE_TERMINATORS, SPACES );
        s.writeUTF8( uriStream );
        uriStream.write( '\n' );
View Full Code Here

      }
    }

    int documentIndex = 0;

    while ( ( document = iterator.nextDocument() ) != null ) {
      for ( int i = 0; i < field.length; i++ ) {
        switch ( factory.fieldType( field[ i ] ) ) {
        case TEXT:
          processDocument( document.wordReader( field[ i ] ).setReader( (Reader)document.content( field[ i ] ) ), map == null ? documentIndex : map[ documentIndex ], 0, termMap[ i ],
              index[ i ].termProcessor );
View Full Code Here

        false);

    try {
      DocumentIterator iter = collection.iterator();
      Document d;
      while ((d = iter.nextDocument()) != null)
        d.title();
    } catch (IllegalStateException e) {
      assertTrue(false);
    }
View Full Code Here

        false);

    DocumentIterator iter = collection.iterator();
    Document d = null;

    d = iter.nextDocument();
    assertNotNull(d);
    assertEquals("http://gx0001/", d.uri());
    assertEquals("GX001", d.title());

    final int textIndex = collection.factory().fieldIndex( "text" );
View Full Code Here

    final int textIndex = collection.factory().fieldIndex( "text" );
   
    assertEquals( "Line 1\n     The line 2!\n  Mamma\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
   
    d = iter.nextDocument();
    assertNotNull(d);
    assertEquals("http://gx0002/", d.uri());
    assertEquals("GX002", d.title());

    assertEquals( "Contents of this file reside on one line only\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
View Full Code Here

    assertEquals("http://gx0002/", d.uri());
    assertEquals("GX002", d.title());

    assertEquals( "Contents of this file reside on one line only\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );

    d = iter.nextDocument();
    assertNotNull(d);
    assertEquals("http://gx0003/", d.uri());
    assertEquals("GX003", d.title());

    assertEquals( "Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
View Full Code Here

    assertEquals("http://gx0003/", d.uri());
    assertEquals("GX003", d.title());

    assertEquals( "Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );

    d = iter.nextDocument();
    assertNotNull(d);
    assertEquals("http://gx0004/", d.uri());
    assertEquals("GX004", d.title());

    assertEquals( "New content 0\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
View Full Code Here

    assertEquals("http://gx0004/", d.uri());
    assertEquals("GX004", d.title());

    assertEquals( "New content 0\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
   
    d = iter.nextDocument();
    assertNotNull(d);
    assertEquals("http://gx0005/", d.uri());
    assertEquals("GX005", d.title());

    assertEquals( "New content 1\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
View Full Code Here

    assertEquals("http://gx0005/", d.uri());
    assertEquals("GX005", d.title());

    assertEquals( "New content 1\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );

    d = iter.nextDocument();
    assertNotNull(d);
    assertEquals("http://gx0006/", d.uri());
    assertEquals("GX006", d.title());

    assertEquals( "New content 2\n", IOUtils.toString( (Reader)d.content( textIndex ) ) );
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.