Package it.unimi.dsi.mg4j.document

Examples of it.unimi.dsi.mg4j.document.StringArrayDocumentCollection


  public void testSkipToBeyondUsedClusters() throws Exception {
    /* We test what happens when we skip to a document belonging to a local index larger
     * than any index in which the term appears. */
   
    final String basename = File.createTempFile( getClass().getSimpleName(), "test" ).getCanonicalPath();
        new IndexBuilder( basename, new StringArrayDocumentCollection( "A B", "B", "A", "A" ) ).run();
    BinIO.storeObject( DocumentalStrategies.uniform( 2, 4 ), basename + "-strategy" );
    new PartitionDocumentally( basename + "-text", basename + "-cluster", DocumentalStrategies.uniform( 2, 4 ), basename + "-strategy", 0, 1024, CompressionFlags.DEFAULT_STANDARD_INDEX, true, false, 0, 0, 0, ProgressLogger.DEFAULT_LOG_INTERVAL ).run();
    FileLinesCollection flc;
    flc = new FileLinesCollection( basename + "-cluster-0.terms", "ASCII" );
    BinIO.storeObject( new ShiftAddXorSignedStringMap( flc.iterator(), new MWHCFunction<CharSequence>( flc , TransformationStrategies.utf16() ) ), basename + "-cluster-0.termmap" )
View Full Code Here


  private BitStreamIndex index;
  private String basename;

  public void setUp() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
    basename = File.createTempFile( getClass().getSimpleName(), "test" ).getCanonicalPath();
    new IndexBuilder( basename, new StringArrayDocumentCollection( "a", "b", "c" ) ).run();
    index = DiskBasedIndex.getInstance( basename + "-text", true, true );
  }
View Full Code Here

    titleBasename = File.createTempFile( ScorerTest.class.getSimpleName(), "title", tempDir ).toString();
    bodyFBasename = File.createTempFile( ScorerTest.class.getSimpleName(), "bodyf", tempDir ).toString();
    titleFBasename = File.createTempFile( ScorerTest.class.getSimpleName(), "titlef", tempDir ).toString();
    basenameFComb = File.createTempFile( ScorerTest.class.getSimpleName(), "combf", tempDir ).toString();
    bodyBasenameBis = File.createTempFile( ScorerTest.class.getSimpleName(), "bodyfbis", tempDir ).toString();
    documentCollection0 = new StringArrayDocumentCollection(
        // number of documents N = 3
        // average document size = 9 + 4 + 14 / 3 = 9
        // number of occurrences = 27
        new String[] {
            "This sentence speaks really really really good of gods", // size 9
            "And this, is a list of all the green things in the green world", // size 14
            "get THIS not THAT" } // size 4
        );

    // BM25 testing

    bodyDocumentCollection = new StringArrayDocumentCollection(
        new String[] {
            "A C C",
            "D Z",
            "A X Z",
            "C X X",
            "Q",
        }
    );
   
    // Doc size: 3 2 3 3 1 (avg: 12/5)
    // Frequency: A:2 C:2 D:1 Q:1 X:2 Z:2

    titleDocumentCollection = new StringArrayDocumentCollection(
        new String[] {
            "A",
            "Q",
            "Z",
            "Q X",
            "Z",
        }
    );
   
    // Doc size: 1 1 1 2 1 (avg: 9/8)
    // Frequency: A:1 Q:2 X:1 Z:2
    
    // BM25F testing

    bodyFDocumentCollection = new StringArrayDocumentCollection(
        new String[] {
            "A C C",
            "D Z",
            "A X Z",
            "C X X",
            "Q",
            "1",
            "1",
            "1"
        }
    );
   
    // Doc size: 3 2 3 3 1 (avg: 12/5)
    // Frequency: A:2 C:2 D:1 Q:1 X:2 Z:2

    titleFDocumentCollection = new StringArrayDocumentCollection(
        new String[] {
            "A",
            "Q",
            "Z",
            "Q X",
View Full Code Here

TOP

Related Classes of it.unimi.dsi.mg4j.document.StringArrayDocumentCollection

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.