/**
* Copyright (C) 2010 Peter Karich <jetwick_@_pannous_._info>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.util;
import de.jetwick.es.TweetQuery;
import de.jetwick.es.ElasticTweetSearch;
import java.util.Date;
import org.junit.After;
import com.google.inject.Module;
import de.jetwick.config.DefaultModule;
import de.jetwick.JetwickTestClass;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import de.jetwick.data.JTweet;
import de.jetwick.data.JUser;
import de.jetwick.data.UrlEntry;
import de.jetwick.es.ElasticTweetSearchTest;
import de.jetwick.snacktory.HtmlFetcher;
import de.jetwick.snacktory.JResult;
import de.jetwick.tw.UrlExtractor;
import java.io.IOException;
import java.util.Collections;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.*;
/**
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class GenericUrlResolverTest extends JetwickTestClass {
private GenericUrlResolver resolver;
private ElasticTweetSearchTest twTestSearch = new ElasticTweetSearchTest();
private ElasticTweetSearch twSearch;
@BeforeClass
public static void beforeClass() {
ElasticTweetSearchTest.beforeClass();
}
@AfterClass
public static void afterClass() {
ElasticTweetSearchTest.afterClass();
}
@Before
@Override
public void setUp() throws Exception {
twTestSearch.setUp();
super.setUp();
twSearch = twTestSearch.getSearch();
resolver = getInstance(GenericUrlResolver.class);
}
@After
@Override
public void tearDown() throws Exception {
super.tearDown();
twTestSearch.tearDown();
}
@Test
public void testResolve() throws InterruptedException {
HtmlFetcher fetcher = new HtmlFetcher() {
@Override
public String getResolvedUrl(String urlAsString, int timeout) {
return urlAsString + "_r";
}
@Override
public JResult fetchAndExtract(String url, int timeout, boolean resolve) throws Exception {
JResult res = new JResult();
return res.setUrl(url).setTitle(url + "_t");
}
};
resolver.setHtmlFetcher(fetcher);
JTweet tw = createTweet(1L, "http://hiho.de");
resolver.putObject(tw);
assertNotNull(resolver.findUrlInCache("http://hiho.de"));
assertTrue(resolver.executeResolve(0));
twSearch.forceEmptyQueueAndRefresh(400);
assertNotNull(twSearch.findByTwitterId(tw.getTwitterId()));
// original url
tw = twSearch.findByUrl("http://hiho.de").get(0);
UrlEntry ue = tw.getUrlEntries().iterator().next();
assertEquals("http://hiho.de", ue.getOriginalUrl(tw));
// resolved url
assertEquals("http://hiho.de_r", ue.getResolvedUrl());
// fetched title
assertEquals("http://hiho.de_r_t", ue.getResolvedTitle());
assertNotNull(twSearch.findByUrl("http://hiho.de_r").get(0));
}
@Test
public void testResolveProblem() throws InterruptedException {
HtmlFetcher fetcher = new HtmlFetcher() {
@Override
public String getResolvedUrl(String urlAsString, int timeout) {
return urlAsString + "_r";
}
@Override
public JResult fetchAndExtract(String url, int timeout, boolean resolve) throws Exception {
throw new IOException("url does not exist");
}
};
resolver.setHtmlFetcher(fetcher);
resolver.putObject(createTweet(1L, "http://hiho.de"));
assertEquals(1, resolver.getUnresolvedSize());
assertNotNull(resolver.findUrlInCache("http://hiho.de"));
assertTrue(resolver.executeResolve(0));
twSearch.forceEmptyQueueAndRefresh();
// feed article even if resolving makes trouble
assertEquals(1, twSearch.findByUrl("http://hiho.de").size());
// we have real time get => remove url from cache
assertNull(resolver.findUrlInCache("http://hiho.de"));
// but do not include in scan search directly after resolving
// assertEquals(0, resolver.getCheckAgainSize());
}
@Test
public void testAlreadyExistentSameId() throws InterruptedException {
HtmlFetcher fetcher = new HtmlFetcher() {
@Override
public String getResolvedUrl(String urlAsString, int timeout) {
return urlAsString + "_r";
}
@Override
public JResult fetchAndExtract(String url, int timeout, boolean resolve) throws Exception {
JResult res = new JResult();
return res.setUrl(url).setTitle(url + "_t");
}
};
resolver.setHtmlFetcher(fetcher);
// use persistent tweet otherwise elasticTweetSearch won't update the tweet (and the retweet count)
resolver.putObject(createTweetWithUrlEntries(1L, "http://hiho.de", 10, "http://hiho.de").makePersistent());
assertNotNull(resolver.findUrlInCache("http://hiho.de"));
assertEquals(0, twSearch.countAll());
// now put an existing article into aindex so that the article with sharecount==10 wont be fetched but queued again!
twSearch.update(Collections.singletonList(createTweetWithUrlEntries(1L, "http://hiho.de_r", 1, "http://hiho.de")), new Date(), false);
twSearch.forceEmptyQueueAndRefresh();
assertEquals(1, twSearch.search(new TweetQuery()).size());
assertEquals(1, twSearch.search(new TweetQuery()).get(0).getVersion());
assertEquals(1, twSearch.findByUrl("http://hiho.de").size());
assertEquals(1, twSearch.findByUrl("http://hiho.de").get(0).getVersion());
assertTrue(resolver.executeResolve(0));
twSearch.forceEmptyQueueAndRefresh();
assertEquals(1, twSearch.findByUrl("http://hiho.de").size());
assertEquals(10, twSearch.findByUrl("http://hiho.de").get(0).getRetweetCount());
}
@Test
public void testALotIdenticalUrls() {
JTweet a1 = createTweet(1L, "http://url1.de");
JTweet a2 = createTweet(2L, "http://url1.de");
JTweet a3 = createTweet(3L, "http://url1.de");
resolver.putObject(a1);
assertEquals(1, resolver.getUnresolvedSize());
resolver.putObject(a2);
assertEquals(1, resolver.getUnresolvedSize());
// make article existing => skip resolving but do queue into article index
twSearch.forceEmptyQueueAndRefresh();
resolver.putObject(a3);
assertEquals(0, resolver.getUnresolvedSize());
twSearch.forceEmptyQueueAndRefresh();
}
JTweet createTweet(long id, String url) {
return createTweetWithUrlEntries(id, url, 0, url).setCreatedAt(new Date());
}
JTweet createTweetWithUrlEntries(long id, String url, int rt, final String origUrl) {
UrlExtractor extractor = new UrlExtractor() {
@Override
public JResult getInfo(String url, int timeout) throws Exception {
return UrlEntry.createSimpleResult(origUrl);
}
};
JTweet tw = new JTweet(id, "text is not important " + url, new JUser("timetabling")).setRetweetCount(rt);
extractor.setTweet(tw);
tw.setUrlEntries(extractor.run().getUrlEntries());
return tw;
}
@Override
public Module createModule() {
return new DefaultModule() {
@Override
public void installSearchModule() {
bind(ElasticTweetSearch.class).toInstance(twTestSearch.getSearch());
}
};
}
}