Package org.carrot2.webapp.source

Source Code of org.carrot2.webapp.source.WebDocumentSource

/*
* Carrot2 project.
*
* Copyright (C) 2002-2014, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/

package org.carrot2.webapp.source;

import java.util.List;
import java.util.Map;

import org.carrot2.core.Document;
import org.carrot2.core.IControllerContext;
import org.carrot2.core.IDocumentSource;
import org.carrot2.core.ProcessingException;
import org.carrot2.source.SearchEngineResponse;
import org.carrot2.source.SimpleSearchEngine;
import org.carrot2.source.etools.EToolsDocumentSource;
import org.carrot2.source.google.GoogleDocumentSource;
import org.carrot2.util.attribute.Bindable;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

/**
* A {@link IDocumentSource} that folds in a few initial hits from Google API into the
* eTools results. Ultimately, this type of processing should be done on the eTools side.
*/
@Bindable
public class WebDocumentSource extends SimpleSearchEngine
{
    /**
     * ETools document source, contains bindable attributes.
     */
    private EToolsDocumentSource etools = new EToolsDocumentSource();

    /**
     * Google document source, contains bindable attributes.
     */
    private GoogleDocumentSource google = new GoogleDocumentSource();

    /**
     * Query failure string (must be an identical object to cause a query for tests).
     */
    static final String QUERY_FAILURE = new String("foobar");

    @Override
    public void init(IControllerContext context)
    {
        super.init(context);

        google.init(context);
        etools.init(context);
    }

    @Override
    public void beforeProcessing() throws ProcessingException
    {
        google.beforeProcessing();
        etools.beforeProcessing();
    }

    @Override
    public SearchEngineResponse fetchSearchResponse() throws Exception
    {
        final SearchEngineResponse response = new SearchEngineResponse();

        // Run sub-components sequentially so that we have a chance to weed out spammers
        // before we query Google.
        try {
            // String equality intentional; will work for tests only.
            if (query == QUERY_FAILURE)
            {
                throw new RuntimeException("Synthetic failure.");
            }

            etools.process();
   
            google.results = 8;
            google.process();
        } catch (ProcessingException e) {
            if (e.getCause() instanceof Exception) {
                throw (Exception) e.getCause();
            } else {
                throw e;
            }
        }

        final Map<String, Document> googleDocumentsByUrl = Maps.newHashMap();
        if (google.documents != null)
        {
            for (Document googleDocument : google.documents)
            {
                googleDocumentsByUrl.put((String) googleDocument
                    .getField(Document.CONTENT_URL), googleDocument);
                googleDocument.setField(Document.SOURCES, Lists
                    .newArrayList("Google"));

                // Set the language based on the eTools source configuration
                googleDocument.setLanguage(etools.language != null ? etools.language
                    .toLanguageCode() : null);
            }
            response.results.addAll(google.documents);
        }

        if (etools.documents != null)
        {
            for (Document etoolsDocument : etools.documents)
            {
                final Document matchingGoogleDocument = googleDocumentsByUrl
                    .get(etoolsDocument.getField(Document.CONTENT_URL));
                if (matchingGoogleDocument != null)
                {
                    final List<String> sources = etoolsDocument
                        .getField(Document.SOURCES);
                    if (!sources.contains("Google"))
                    {
                        sources.add("Google");
                    }
                    matchingGoogleDocument.setField(Document.SOURCES, sources);
                }
                else
                {
                    response.results.add(etoolsDocument);
                }
            }
        }

        // Trim to size (cannot use sublist because the field is final)
        while (response.results.size() > results)
        {
            response.results.remove(results);
        }

        response.metadata.put(SearchEngineResponse.RESULTS_TOTAL_KEY,
            google.resultsTotal);

        return response;
    }

    @Override
    public void afterProcessing()
    {
        etools.afterProcessing();
        google.afterProcessing();

        etools.documents = null;
        google.documents = null;
    }

    @Override
    public void dispose()
    {
        etools.dispose();
        google.dispose();
    }
}
TOP

Related Classes of org.carrot2.webapp.source.WebDocumentSource

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.