Package de.jetwick.util

Source Code of de.jetwick.util.GenericUrlResolverTest

/**
* Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*         http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.util;

import de.jetwick.es.TweetQuery;
import de.jetwick.es.ElasticTweetSearch;
import java.util.Date;
import org.junit.After;
import com.google.inject.Module;
import de.jetwick.config.DefaultModule;
import de.jetwick.JetwickTestClass;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import de.jetwick.data.JTweet;
import de.jetwick.data.JUser;
import de.jetwick.data.UrlEntry;
import de.jetwick.es.ElasticTweetSearchTest;
import de.jetwick.snacktory.HtmlFetcher;
import de.jetwick.snacktory.JResult;
import de.jetwick.tw.UrlExtractor;
import java.io.IOException;
import java.util.Collections;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.*;

/**
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class GenericUrlResolverTest extends JetwickTestClass {

    private GenericUrlResolver resolver;
    private ElasticTweetSearchTest twTestSearch = new ElasticTweetSearchTest();
    private ElasticTweetSearch twSearch;

    @BeforeClass
    public static void beforeClass() {
        ElasticTweetSearchTest.beforeClass();
    }

    @AfterClass
    public static void afterClass() {
        ElasticTweetSearchTest.afterClass();
    }

    @Before
    @Override
    public void setUp() throws Exception {
        twTestSearch.setUp();
        super.setUp();

        twSearch = twTestSearch.getSearch();
        resolver = getInstance(GenericUrlResolver.class);
    }

    @After
    @Override
    public void tearDown() throws Exception {
        super.tearDown();
        twTestSearch.tearDown();
    }

    @Test
    public void testResolve() throws InterruptedException {
        HtmlFetcher fetcher = new HtmlFetcher() {

            @Override
            public String getResolvedUrl(String urlAsString, int timeout) {
                return urlAsString + "_r";
            }

            @Override
            public JResult fetchAndExtract(String url, int timeout, boolean resolve) throws Exception {
                JResult res = new JResult();
                return res.setUrl(url).setTitle(url + "_t");
            }
        };
        resolver.setHtmlFetcher(fetcher);
        JTweet tw = createTweet(1L, "http://hiho.de");
        resolver.putObject(tw);
        assertNotNull(resolver.findUrlInCache("http://hiho.de"));

        assertTrue(resolver.executeResolve(0));
        twSearch.forceEmptyQueueAndRefresh(400);

        assertNotNull(twSearch.findByTwitterId(tw.getTwitterId()));
        // original url
        tw = twSearch.findByUrl("http://hiho.de").get(0);
        UrlEntry ue = tw.getUrlEntries().iterator().next();
        assertEquals("http://hiho.de", ue.getOriginalUrl(tw));
        // resolved url
        assertEquals("http://hiho.de_r", ue.getResolvedUrl());
        // fetched title
        assertEquals("http://hiho.de_r_t", ue.getResolvedTitle());
        assertNotNull(twSearch.findByUrl("http://hiho.de_r").get(0));       
    }

    @Test
    public void testResolveProblem() throws InterruptedException {
        HtmlFetcher fetcher = new HtmlFetcher() {

            @Override
            public String getResolvedUrl(String urlAsString, int timeout) {
                return urlAsString + "_r";
            }

            @Override
            public JResult fetchAndExtract(String url, int timeout, boolean resolve) throws Exception {
                throw new IOException("url does not exist");
            }
        };
        resolver.setHtmlFetcher(fetcher);
        resolver.putObject(createTweet(1L, "http://hiho.de"));
        assertEquals(1, resolver.getUnresolvedSize());
        assertNotNull(resolver.findUrlInCache("http://hiho.de"));

        assertTrue(resolver.executeResolve(0));
        twSearch.forceEmptyQueueAndRefresh();

        // feed article even if resolving makes trouble
        assertEquals(1, twSearch.findByUrl("http://hiho.de").size());
        // we have real time get => remove url from cache
        assertNull(resolver.findUrlInCache("http://hiho.de"));

        // but do not include in scan search directly after resolving
//        assertEquals(0, resolver.getCheckAgainSize());
    }

    @Test
    public void testAlreadyExistentSameId() throws InterruptedException {
        HtmlFetcher fetcher = new HtmlFetcher() {

            @Override
            public String getResolvedUrl(String urlAsString, int timeout) {
                return urlAsString + "_r";
            }

            @Override
            public JResult fetchAndExtract(String url, int timeout, boolean resolve) throws Exception {
                JResult res = new JResult();
                return res.setUrl(url).setTitle(url + "_t");
            }
        };
        resolver.setHtmlFetcher(fetcher);
        // use persistent tweet otherwise elasticTweetSearch won't update the tweet (and the retweet count)
        resolver.putObject(createTweetWithUrlEntries(1L, "http://hiho.de", 10, "http://hiho.de").makePersistent());
        assertNotNull(resolver.findUrlInCache("http://hiho.de"));

        assertEquals(0, twSearch.countAll());
        // now put an existing article into aindex so that the article with sharecount==10 wont be fetched but queued again!
        twSearch.update(Collections.singletonList(createTweetWithUrlEntries(1L, "http://hiho.de_r", 1, "http://hiho.de")), new Date(), false);
        twSearch.forceEmptyQueueAndRefresh();

        assertEquals(1, twSearch.search(new TweetQuery()).size());
        assertEquals(1, twSearch.search(new TweetQuery()).get(0).getVersion());

        assertEquals(1, twSearch.findByUrl("http://hiho.de").size());
        assertEquals(1, twSearch.findByUrl("http://hiho.de").get(0).getVersion());

        assertTrue(resolver.executeResolve(0));
        twSearch.forceEmptyQueueAndRefresh();

        assertEquals(1, twSearch.findByUrl("http://hiho.de").size());
        assertEquals(10, twSearch.findByUrl("http://hiho.de").get(0).getRetweetCount());
    }  

    @Test
    public void testALotIdenticalUrls() {
        JTweet a1 = createTweet(1L, "http://url1.de");
        JTweet a2 = createTweet(2L, "http://url1.de");
        JTweet a3 = createTweet(3L, "http://url1.de");
        resolver.putObject(a1);
        assertEquals(1, resolver.getUnresolvedSize());
        resolver.putObject(a2);
        assertEquals(1, resolver.getUnresolvedSize());

        // make article existing => skip resolving but do queue into article index
        twSearch.forceEmptyQueueAndRefresh();
        resolver.putObject(a3);
        assertEquals(0, resolver.getUnresolvedSize());
        twSearch.forceEmptyQueueAndRefresh();
    }

    JTweet createTweet(long id, String url) {
        return createTweetWithUrlEntries(id, url, 0, url).setCreatedAt(new Date());
    }

    JTweet createTweetWithUrlEntries(long id, String url, int rt, final String origUrl) {
        UrlExtractor extractor = new UrlExtractor() {

            @Override
            public JResult getInfo(String url, int timeout) throws Exception {
                return UrlEntry.createSimpleResult(origUrl);
            }
        };
        JTweet tw = new JTweet(id, "text is not important " + url, new JUser("timetabling")).setRetweetCount(rt);
        extractor.setTweet(tw);
        tw.setUrlEntries(extractor.run().getUrlEntries());       
        return tw;
    }

    @Override
    public Module createModule() {
        return new DefaultModule() {

            @Override
            public void installSearchModule() {
                bind(ElasticTweetSearch.class).toInstance(twTestSearch.getSearch());
            }
        };
    }
}
TOP

Related Classes of de.jetwick.util.GenericUrlResolverTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.