Package org.carrot2.core.test

Source Code of org.carrot2.core.test.QueryableDocumentSourceTestBase

/*
* Carrot2 project.
*
* Copyright (C) 2002-2014, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/

package org.carrot2.core.test;

import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.carrot2.core.Controller;
import org.carrot2.core.Document;
import org.carrot2.core.IDocumentSource;
import org.carrot2.core.ProcessingResult;
import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.core.test.assertions.Carrot2CoreAssertions;
import org.carrot2.util.StringUtils;
import org.carrot2.util.tests.UsesExternalServices;
import org.junit.Test;

import com.carrotsearch.randomizedtesting.RandomizedContext;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

/**
* Common tests for {@link IDocumentSource}s that accept a string query.
*/
public abstract class QueryableDocumentSourceTestBase<T extends IDocumentSource> extends
    DocumentSourceTestBase<T>
{
    @UsesExternalServices
    @Test
    public void testNoResultsQuery() throws Exception
    {
        runAndCheckNoResultsQuery();
    }

    @UsesExternalServices
    @Test
    public void testSmallQuery() throws Exception
    {
        runAndCheckMinimumResults(getSmallQueryText(), getSmallQuerySize(),
            getSmallQuerySize() / 2);
    }

    @UsesExternalServices
    @Test
    public void testUtfCharacters() throws Exception
    {
        assumeTrue(hasUtfResults());
        runAndCheckMinimumResults("kaczyński", getSmallQuerySize(),
            getSmallQuerySize() / 2);
    }

    @UsesExternalServices
    @Test
    public void testLargeQuery() throws Exception
    {
        runAndCheckMinimumResults(getLargeQueryText(), getLargeQuerySize(),
            getLargeQuerySize() / 2);
    }

    @UsesExternalServices
    @Test
    public void testResultsTotal() throws Exception
    {
        assumeTrue(hasTotalResultsEstimate());
        runQuery(getSmallQueryText(), getSmallQuerySize());

        assertNotNull(resultAttributes.get(AttributeNames.RESULTS_TOTAL));
        assertTrue((Long) resultAttributes.get(AttributeNames.RESULTS_TOTAL) > 0);
    }

    @UsesExternalServices
    @Test
    public void testURLsUnique() throws Exception
    {
        assumeTrue(mustReturnUniqueUrls());
        runQuery(getLargeQueryText(), getLargeQuerySize());
        assertFieldUnique(getDocuments(), Document.CONTENT_URL);
    }

    @UsesExternalServices
    @Test
    public void testHtmlUnescaping()
    {
        assumeTrue(canReturnEscapedHtml());
        runQuery("test", getSmallQuerySize());
        final List<Document> documents = getDocuments();
        int i = 0;
        for (Document document : documents)
        {
            Carrot2CoreAssertions.assertThat(document)
                .as("doc[" + i++ + "]").stringFieldsDoNotMatchPattern(".*&lt;.*");
        }
    }

    @UsesExternalServices
    @Test
    @ThreadLeakLingering(linger = 2000)
    @SuppressWarnings("unchecked")
    public void testInCachingController() throws InterruptedException, ExecutionException
    {
        final Map<String, Object> attributes = Maps.newHashMap();
        attributes.put(AttributeNames.QUERY, getSmallQueryText());
        attributes.put(AttributeNames.RESULTS, getSmallQuerySize());

        // Cache results from all DataSources
        final Controller controller =
            getCachingController(initAttributes, IDocumentSource.class);
        int count = 3;
        final ExecutorService executorService = Executors.newFixedThreadPool(count);

        try {
            List<Callable<ProcessingResult>> callables = Lists.newArrayList();
            for (int i = 0; i < count; i++)
            {
                callables.add(new Callable<ProcessingResult>()
                {
                    public ProcessingResult call() throws Exception
                    {
                        Map<String, Object> localAttributes = Maps.newHashMap(attributes);
                        return controller.process(localAttributes, getComponentClass());
                    }
                });
            }
   
            final List<Future<ProcessingResult>> results = executorService.invokeAll(callables);
   
            List<Document> documents = null;
            int index = 0;
            for (Future<ProcessingResult> future : results)
            {
                ProcessingResult processingResult = future.get();
                final List<Document> documentsLocal = (List<Document>) processingResult
                    .getAttributes().get(AttributeNames.DOCUMENTS);
                assertThat(documentsLocal).as("documents at " + index).isNotNull();
                if (!canReturnMoreResultsThanRequested())
                {
                    assertThat(documentsLocal.size()).as("documents.size() at " + index)
                        .isLessThanOrEqualTo(getSmallQuerySize());
                }
                assertThat(documentsLocal.size()).as("documents.size() at " + index)
                    .isGreaterThanOrEqualTo(getSmallQuerySize() / 2);
   
                // Should have same documents (from the cache)
                if (documents != null)
                {
                    for (int i = 0; i < documents.size(); i++)
                    {
                        assertSame(documents.get(i), documentsLocal.get(i));
                    }
                }
                documents = documentsLocal;
                index++;
            }
        } finally {
            controller.dispose();
            executorService.shutdown();
        }
    }

    /**
     * Override to switch on checking non-English results.
     */
    protected boolean hasUtfResults()
    {
        return false;
    }

    /**
     * Override to customize small query size.
     */
    protected int getSmallQuerySize()
    {
        return 50;
    }

    /**
     * Override to customize small query text.
     */
    protected String getSmallQueryText()
    {
        return "blog";
    }

    /**
     * Override to customize large query size.
     */
    protected int getLargeQuerySize()
    {
        return 300;
    }

    /**
     * Return <code>true</code> if the source can return more results than requested.
     */
    protected boolean canReturnMoreResultsThanRequested()
    {
        return false;
    }
   
    /**
     * Override to customize large query text.
     */
    protected String getLargeQueryText()
    {
        return "test";
    }

    /**
     * Override to switch checking of total results estimates.
     */
    protected boolean hasTotalResultsEstimate()
    {
        return true;
    }

    /**
     * Override to switch checking of HTML unescaping.
     */
    protected boolean canReturnEscapedHtml()
    {
        return true;
    }

    /**
     * Override to switch checking of URL uniqueness.
     */
    protected boolean mustReturnUniqueUrls()
    {
        return true;
    }

    /**
     * Override to customize no results query.
     */
    protected String getNoResultsQueryText()
    {
        return getNoResultsQuery();
    }

    /**
     * Override to customize no results query.
     */
    public static String getNoResultsQuery()
    {
        final int words = 5;
        final int chars = 8;
        final Random random = RandomizedContext.current().getRandom();

        final StringBuilder query = new StringBuilder();
        for (int i = 0; i < words; i++)
        {
            for (int j = 0; j < chars; j++)
            {
                query.append((char) ('a' + random.nextInt('z' - 'a')));
            }
            query.append(random.nextInt(1000000));
            query.append(' ');
        }

        return query.toString();
    }

    protected void runAndCheckMinimumResults(String query, int resultsToRequest,
        int minimumExpectedResults)
    {
        int actualResults = runQuery(query, resultsToRequest);
        assertThat(actualResults).isGreaterThanOrEqualTo(minimumExpectedResults);
    }

    protected void runAndCheckNoResultsQuery()
    {
        runAndCheckNoResultsQuery(getSmallQuerySize());
    }

    protected void runAndCheckNoResultsQuery(int size)
    {
        final int results = runQuery(getNoResultsQueryText(), size);
        if (results != 0)
        {
            final List<Document> documents = getDocuments();
            final String urls = StringUtils.toString(Lists.transform(documents,
                new Function<Document, String>()
                {
                    public String apply(Document document)
                    {
                        return document.getField(Document.CONTENT_URL);
                    }
                }), ", ");
            fail("Expected 0 results but found: " + results + " (urls: " + urls + ")");
        }
    }
}
TOP

Related Classes of org.carrot2.core.test.QueryableDocumentSourceTestBase

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.