Package org.carrot2.source.xml

Source Code of org.carrot2.source.xml.XmlDocumentSourceTest

/*
* Carrot2 project.
*
* Copyright (C) 2002-2014, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/

package org.carrot2.source.xml;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;

import org.carrot2.core.Cluster;
import org.carrot2.core.Controller;
import org.carrot2.core.Document;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.core.test.DocumentSourceTestBase;
import org.carrot2.util.attribute.AttributeUtils;
import org.carrot2.util.resource.ContextClassLoaderLocator;
import org.carrot2.util.resource.IResource;
import org.carrot2.util.resource.ResourceLookup;
import org.carrot2.util.resource.URLResourceWithParams;
import org.junit.Test;

import com.google.common.base.Function;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

/**
* Test cases for {@link XmlDocumentSource}.
*/
public class XmlDocumentSourceTest extends DocumentSourceTestBase<XmlDocumentSource>
{
    /**
     * Resource locator for test resources, use context class loader.
     */
    private ResourceLookup resourceLocator = new ResourceLookup(
        new ContextClassLoaderLocator());

    /**
     * Transforms {@link Document}s to their ids.
     */
    protected static final Function<Document, Integer> DOCUMENT_TO_INT_ID = new Function<Document, Integer>()
    {
        public Integer apply(Document document)
        {
            return Integer.valueOf(document.getStringId());
        }
    };

    @Override
    public Class<XmlDocumentSource> getComponentClass()
    {
        return XmlDocumentSource.class;
    }

    @SuppressWarnings("unchecked")
    @Test
    public void testReadClusters()
    {
        IResource xml = resourceLocator.getFirst("/xml/carrot2-with-clusters.xml");

        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xml"), xml);
        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "readClusters"), true);
        runQuery();

        List<Cluster> clusters = (List<Cluster>) resultAttributes.get(AttributeNames.CLUSTERS);
        assertThat(clusters).isNotEmpty();
    }

    @Test
    public void testLegacyXml()
    {
        IResource xml = resourceLocator.getFirst("/xml/carrot2-apple-computer.xml");

        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xml"),
            xml);
        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "results"),
            300);
        final int documentCount = runQuery();
        assertEquals(200, documentCount);
        assertEquals("apple computer", resultAttributes.get(AttributeNames.QUERY));
    }

    @Test
    public void testResultsTruncation()
    {
        IResource xml = resourceLocator.getFirst("/xml/carrot2-apple-computer.xml");

        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xml"),
            xml);
        processingAttributes.put(AttributeNames.RESULTS, 50);
        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "readAll"),
            false);
        final int documentCount = runQuery();
        assertEquals(50, documentCount);
        assertEquals("apple computer", resultAttributes.get(AttributeNames.QUERY));
    }

    @Test
    public void testXsltNoParameters()
    {
        IResource xml = resourceLocator.getFirst("/xml/custom-parameters-not-required.xml");
        IResource xslt = resourceLocator.getFirst("/xsl/custom-xslt.xsl");

        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xml"),
            xml);
        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xslt"),
            xslt);
        final int documentCount = runQuery();
        assertTransformedDocumentsEqual(documentCount);
    }

    @Test
    public void testXsltWithParameters()
    {
        IResource xml = resourceLocator.getFirst("/xml/custom-parameters-required.xml");
        IResource xslt = resourceLocator.getFirst("/xsl/custom-xslt.xsl");

        Map<String, String> xsltParameters = Maps.newHashMap();
        xsltParameters.put("id-field", "number");
        xsltParameters.put("title-field", "heading");
        xsltParameters.put("snippet-field", "snippet");
        xsltParameters.put("url-field", "url");

        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xml"),
            xml);
        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xslt"),
            xslt);
        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class,
            "xsltParameters"), xsltParameters);
        final int documentCount = runQuery();
        assertTransformedDocumentsEqual(documentCount);
    }

    @Test
    public void testNoIdsInSourceXml()
    {
        IResource xml = resourceLocator.getFirst("/xml/carrot2-no-ids.xml");

        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xml"),
            xml);
        final int documentCount = runQuery();
        assertEquals(2, documentCount);
        assertEquals(Lists.newArrayList(0, 1), Lists.transform(getDocuments(),
            DOCUMENT_TO_INT_ID));
    }

    @Test
    public void testGappedIdsInSourceXml()
    {
        IResource xml = resourceLocator.getFirst("/xml/carrot2-gapped-ids.xml");
        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xml"), xml);
        try {
            runQuery();
            fail();
        } catch (ProcessingException e) {
            assertThat(e.getMessage()).contains("Identifiers must be unique");
        }
    }

    @Test
    public void testDuplicatedIdsInSourceXml()
    {
        IResource xml = resourceLocator.getFirst("/xml/carrot2-duplicated-ids.xml");
        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xml"), xml);
        try {
            runQuery();
            fail();
        } catch (ProcessingException e) {
            assertThat(e.getMessage()).contains("Identifiers must be unique");
        }
    }

    @Test
    public void testInitializationTimeXslt()
    {
        IResource xslt = resourceLocator.getFirst("/xsl/custom-xslt.xsl");
        initAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xslt"), xslt);

        IResource xml = resourceLocator.getFirst("/xml/custom-parameters-not-required.xml");
        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xml"),
            xml);

        final int documentCount = runQueryInCachingController();
        assertTransformedDocumentsEqual(documentCount);
    }

    @Test
    public void testOverridingInitializationTimeXslt()
    {
        IResource initXslt = resourceLocator.getFirst("/xsl/carrot2-identity.xsl");
        initAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xslt"),
            initXslt);
       
        @SuppressWarnings("unchecked")
        Controller controller = getCachingController(initAttributes);

        // Run with identity XSLT
        {
            IResource xml = resourceLocator.getFirst("/xml/carrot2-test.xml");
            processingAttributes.put(AttributeUtils
                .getKey(XmlDocumentSource.class, "xml"), xml);

            final int documentCount = runQuery(controller);
            assertEquals(2, documentCount);
            assertEquals(Lists.newArrayList("Title 0", "Title 1"), Lists.transform(
                getDocuments(), DOCUMENT_TO_TITLE));
            assertEquals(Lists.newArrayList("Snippet 0", "Snippet 1"), Lists.transform(
                getDocuments(), DOCUMENT_TO_SUMMARY));
        }

        // Run with swapping XSLT
        {
            IResource xml = resourceLocator.getFirst("/xml/carrot2-test.xml");
            IResource xslt = resourceLocator.getFirst("/xsl/carrot2-title-snippet-switch.xsl");
            processingAttributes.put(AttributeUtils
                .getKey(XmlDocumentSource.class, "xml"), xml);
            processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class,
                "xslt"), xslt);

            final int documentCount = runQuery(controller);
            assertEquals(2, documentCount);
            assertEquals(Lists.newArrayList("Snippet 0", "Snippet 1"), Lists.transform(
                getDocuments(), DOCUMENT_TO_TITLE));
            assertEquals(Lists.newArrayList("Title 0", "Title 1"), Lists.transform(
                getDocuments(), DOCUMENT_TO_SUMMARY));
        }
    }

    @Test
    public void testDisablingInitializationTimeXslt()
    {
        IResource initXslt = resourceLocator.getFirst(
            "/xsl/carrot2-title-snippet-switch.xsl");
        initAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xslt"),
            initXslt);

        @SuppressWarnings("unchecked")
        Controller controller = getCachingController(initAttributes);
       
        // Run with swapping XSLT
        {
            IResource xml = resourceLocator.getFirst("/xml/carrot2-test.xml");
            processingAttributes.put(AttributeUtils
                .getKey(XmlDocumentSource.class, "xml"), xml);

            final int documentCount = runQuery(controller);
            assertEquals(2, documentCount);
            assertEquals(Lists.newArrayList("Snippet 0", "Snippet 1"), Lists.transform(
                getDocuments(), DOCUMENT_TO_TITLE));
            assertEquals(Lists.newArrayList("Title 0", "Title 1"), Lists.transform(
                getDocuments(), DOCUMENT_TO_SUMMARY));
        }

        // Run without XSLT
        {
            IResource xml = resourceLocator.getFirst("/xml/carrot2-test.xml");
            processingAttributes.put(AttributeUtils
                .getKey(XmlDocumentSource.class, "xml"), xml);
            processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class,
                "xslt"), null);

            final int documentCount = runQuery(controller);
            assertEquals(2, documentCount);
            assertEquals(Lists.newArrayList("Title 0", "Title 1"), Lists.transform(
                getDocuments(), DOCUMENT_TO_TITLE));
            assertEquals(Lists.newArrayList("Snippet 0", "Snippet 1"), Lists.transform(
                getDocuments(), DOCUMENT_TO_SUMMARY));
        }
    }

    @Test
    public void testRemoteUrl() throws MalformedURLException
    {
        String base = System.getProperty("carrot2.xml.feed.url.base");
        assumeTrue("carrot2.xml.feed.url.base property undefined.", !Strings.isNullOrEmpty(base));

        IResource xml = new URLResourceWithParams(new URL(base + "&q=${query}&results=${results}"));
        final String query = "apple computer";

        processingAttributes.put(AttributeUtils.getKey(XmlDocumentSource.class, "xml"),
            xml);
        processingAttributes.put(AttributeNames.QUERY, query);
        processingAttributes.put(AttributeNames.RESULTS, 50);
        final int documentCount = runQuery();
        assertEquals(50, documentCount);
        assertEquals(query, resultAttributes.get(AttributeNames.QUERY));
    }

    private void assertTransformedDocumentsEqual(final int documentCount)
    {
        assertEquals(2, documentCount);
        assertEquals("xslt test", resultAttributes.get(AttributeNames.QUERY));
        assertEquals(Lists.newArrayList(498967, 831478), Lists.transform(getDocuments(),
            DOCUMENT_TO_INT_ID));
        assertEquals(Lists.newArrayList("IBM's MARS Block Cipher.",
            "IBM WebSphere Studio Device Developer"), Lists.transform(getDocuments(),
            DOCUMENT_TO_TITLE));
        assertEquals(Lists.newArrayList(
            "The company's AES proposal using 128 bit blocks.",
            "An integrated development environment."), Lists.transform(getDocuments(),
            DOCUMENT_TO_SUMMARY));
        assertEquals(Lists.newArrayList("http://www.research.ibm.com/security/mars.html",
            "http://www-3.ibm.com/software/wireless/wsdd/"), Lists.transform(
            getDocuments(), DOCUMENT_TO_CONTENT_URL));
    }
}
TOP

Related Classes of org.carrot2.source.xml.XmlDocumentSourceTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.