/*
* Copyright 2010 Peter Karich jetwick_@_pannous_._info
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.jetwick.es;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.facet.terms.TermsFacet;
import java.util.LinkedHashSet;
import java.util.Set;
import java.io.StringReader;
import de.jetwick.data.UrlEntry;
import de.jetwick.data.JTweet;
import de.jetwick.data.JUser;
import de.jetwick.util.MyDate;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.indices.IndexMissingException;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.*;
/**
*
* @author Peter Karich, peat_hal 'at' users 'dot' sourceforge 'dot' net
*/
public class ElasticTweetSearchTest extends AbstractElasticSearchTester {
// private Logger logger = LoggerFactory.getLogger(getClass());
private ElasticTweetSearch twSearch;
public ElasticTweetSearch getSearch() {
return twSearch;
}
@Before
public void setUp() throws Exception {
twSearch = new ElasticTweetSearch(getClient());
super.setUp(twSearch);
twSearch.setTesting(true);
}
@Test
public void testSearch() throws Exception {
JUser fromUser = new JUser("peter");
JTweet tw1 = new JTweet(1L, "this is a test!", fromUser);
JUser otherUser = new JUser("otherUser");
JTweet tw2 = new JTweet(2L, "Java is cool and stable!", otherUser);
JTweet tw3 = new JTweet(3L, "Java is stable!", otherUser);
twSearch.store(tw1, false);
twSearch.store(tw2, false);
twSearch.store(tw3, true);
assertEquals(1, twSearch.search("java").size());
assertEquals(1, twSearch.search("test").size());
assertEquals(1, twSearch.searchTweets(new TweetQuery("this test")).size());
assertEquals(2, twSearch.searchTweets(new TweetQuery("java stable")).size());
assertEquals(1, twSearch.searchTweets(new TweetQuery("java cool stable")).size());
assertEquals(2, twSearch.searchTweets(new TweetQuery("java")).size());
assertEquals(3, twSearch.searchTweets(new TweetQuery("java OR test")).size());
assertEquals(1, twSearch.searchTweets(new TweetQuery("java -cool")).size());
try {
// throw error if contains unescaped lucene chars
twSearch.searchTweets(new TweetQuery("stable!"));
assertTrue(false);
} catch (Exception ex) {
assertTrue(true);
}
}
@Test
public void testSmartEscapedSearch() throws Exception {
JUser fromUser = new JUser("peter");
JTweet tw1 = new JTweet(1L, "this is a test!", fromUser);
JUser otherUser = new JUser("otherUser");
JTweet tw2 = new JTweet(2L, "Java is cool and stable!", otherUser);
JTweet tw3 = new JTweet(3L, "Java is stable!", otherUser);
twSearch.store(tw1, false);
twSearch.store(tw2, false);
twSearch.store(tw3, true);
assertEquals(1, twSearch.search("java").size());
assertEquals(1, twSearch.search("test").size());
assertEquals(1, twSearch.searchTweets(new TweetQuery("this test").setEscape(true)).size());
assertEquals(2, twSearch.searchTweets(new TweetQuery("java stable").setEscape(true)).size());
assertEquals(1, twSearch.searchTweets(new TweetQuery("java cool stable").setEscape(true)).size());
assertEquals(2, twSearch.searchTweets(new TweetQuery("java").setEscape(true)).size());
assertEquals(3, twSearch.searchTweets(new TweetQuery("java OR test").setEscape(true)).size());
assertEquals(1, twSearch.searchTweets(new TweetQuery("java -cool").setEscape(true)).size());
assertEquals(2, twSearch.searchTweets(new TweetQuery("stable!").setEscape(true)).size());
}
@Test
public void testHashtags() {
// # is handled as digit so that we can search for java to get java and #java results (the same applies to @)
twSearch.testUpdate(createTweet(1L, "is cool and stable! #java", "peter2"));
// hmmh sometimes this fails!?
assertEquals(1, twSearch.search("java").size());
assertEquals(1, twSearch.search("#java").size());
twSearch.deleteAll();
assertEquals(0, twSearch.search("java").size());
assertEquals(0, twSearch.search("#java").size());
twSearch.testUpdate(createTweet(1L, "is cool and stable! java", "peter2"));
assertEquals(1, twSearch.search("java").size());
assertEquals(0, twSearch.search("#java").size());
}
@Test
public void testHashtags2() {
twSearch.testUpdate(Arrays.asList(createTweet(1L, "egypt germany", "peter"),
createTweet(2L, "egypt #germany", "peter2"),
createTweet(3L, "egypt #Germany", "peter3"),
createTweet(4L, "egypt #GERMANY", "peter4")));
assertEquals(4, twSearch.search("egypt germany").size());
assertEquals(3, twSearch.search("egypt #germany").size());
}
@Test
public void testSearchAnchors() throws Exception {
JUser peter = new JUser("peter");
JTweet tw1 = new JTweet(1L, "peter #java is cool!", peter);
JUser peter2 = new JUser("peter2");
JTweet tw2 = new JTweet(2L, "@peter java is cool!", peter2);
twSearch.store(tw1, false);
twSearch.store(tw2, true);
assertEquals(1, twSearch.search("#java").size());
assertEquals(1, twSearch.search("@peter").size());
}
@Test
public void testCamelCase() throws Exception {
JTweet tw1 = new JTweet(1L, "peter iBood is cool!", new JUser("peter1"));
JTweet tw2 = new JTweet(2L, "ibood is cool!", new JUser("peter2"));
JTweet tw3 = new JTweet(3L, "peter iBOOD is cool!", new JUser("peter3"));
JTweet tw4 = new JTweet(4L, "Ibood is cool!", new JUser("peter4"));
JTweet tw5 = new JTweet(5L, "iBOOD.com", new JUser("peter5"));
twSearch.store(tw1, false);
twSearch.store(tw2, false);
twSearch.store(tw3, false);
twSearch.store(tw5, false);
twSearch.store(tw4, true);
assertEquals(5, twSearch.search("ibood").size());//1,2,3,4,5
assertEquals(5, twSearch.search("iBood").size());//1,2,3,4,5
assertEquals(0, twSearch.search("bood").size()); //-> ok
assertEquals(1, twSearch.search("iBood.com").size()); //1 -> ok
assertEquals(1, twSearch.search("ibood.com").size()); //5 -> ok
assertEquals(1, twSearch.search("ibood.com*").size()); //missing 5
}
@Test
public void testSearchJavaScript() throws Exception {
// keepwords.txt
JUser peter = new JUser("peter1");
JTweet tw1 = new JTweet(1L, "peter JavaScript is cool!", peter);
JUser peter2 = new JUser("peter2");
JTweet tw2 = new JTweet(2L, "java is cool!", peter2);
JTweet tw3 = new JTweet(3L, "peter javascript is cool!", new JUser("peter3"));
twSearch.store(tw1, false);
twSearch.store(tw2, false);
twSearch.store(tw3, true);
assertEquals(1, twSearch.search("java").size());
assertEquals("peter2", twSearch.search("java").iterator().next().getScreenName());
assertEquals(2, twSearch.search("javascript").size());
Iterator<JUser> iter = twSearch.search("javascript").iterator();
assertEquals("peter1", iter.next().getScreenName());
assertEquals("peter3", iter.next().getScreenName());
}
@Test
public void testSorting() {
MyDate day = new MyDate();
MyDate day2 = day.clone().plusDays(1);
twSearch.store(createSolrTweet(day, "java is a test!", "peter"), false);
twSearch.store(createSolrTweet(day2, "java is cool and stable!", "peter2"), true);
JetwickQuery q = new TweetQuery("java").setSort("dt", "desc");
List<JUser> res = new ArrayList<JUser>();
twSearch.query(res, q);
assertEquals(2, res.size());
assertEquals(day2.getTime(), (long) res.get(0).getOwnTweets().iterator().next().getTwitterId());
q = new TweetQuery("java").setSort("dt", "asc");
res.clear();
twSearch.query(res, q);
assertEquals(day.getTime(), (long) res.get(0).getOwnTweets().iterator().next().getTwitterId());
}
@Test
public void testLoc() {
JUser user = new JUser("peter");
user.setLocation("TEST");
JTweet tw;
tw = new JTweet(1L, "test tweet text", user);
twSearch.store(tw, false);
tw = new JTweet(2L, "test tweet text2", user);
twSearch.store(tw, true);
List<JUser> res = new ArrayList<JUser>();
twSearch.query(res, new TweetQuery().addFilterQuery("loc", "TEST"));
assertEquals(1, res.size());
assertEquals(2, res.get(0).getOwnTweets().size());
user = new JUser("peter");
tw = new JTweet(3L, "test tweet text", user);
tw.setLocation("TEST3");
twSearch.store(tw, false);
tw = new JTweet(4L, "test tweet text", user);
tw.setLocation("TEST4");
twSearch.store(tw, true);
res = new ArrayList<JUser>();
twSearch.query(res, new TweetQuery().addFilterQuery("loc", "TEST3"));
assertEquals(1, res.size());
assertEquals(1, res.get(0).getOwnTweets().size());
}
@Test
public void testDelete() throws Exception {
// do not throw exception
twSearch.delete(Collections.EMPTY_LIST);
JUser otherUser = new JUser("otherUser");
JTweet tw2 = new JTweet(2L, "java is cool and stable!", otherUser);
twSearch.store(tw2, false);
twSearch.refresh();
assertEquals(1, twSearch.search("java").size());
twSearch.delete(Arrays.asList(tw2));
twSearch.refresh();
assertEquals(0, twSearch.search("java").size());
}
@Test
public void testGeo() throws Exception {
JUser otherUser = new JUser("otherUser");
JTweet tw2 = new JTweet(2L, "java is cool and stable!", otherUser);
twSearch.store(tw2.setGeoLocation(123, 321), false);
twSearch.refresh();
assertEquals(1, twSearch.search("java").size());
assertEquals(1, twSearch.searchGeo(123, 321, 1).size());
assertEquals(0, twSearch.searchGeo(123, 100, 1).size());
}
@Test
public void testGetReplies() {
JUser usera = new JUser("usera");
JTweet tw = new JTweet(1L, "this is a Test ", usera);
JUser userb = new JUser("userb");
JTweet tw2 = new JTweet(2L, "this is a Test ", userb);
tw2.addReply(tw);
twSearch.store(tw, true);
twSearch.store(tw2, true);
assertEquals(0, twSearch.searchReplies(1L, true).size());
assertEquals(0, twSearch.searchReplies(2L, true).size());
assertEquals(0, twSearch.searchReplies(1L, false).size());
assertEquals(1, twSearch.searchReplies(2L, false).size());
tw = twSearch.searchReplies(2L, false).iterator().next();
assertEquals(1L, (long) tw.getTwitterId());
}
@Test
public void testGetRetweets() {
JUser usera = new JUser("usera");
JTweet tw = new JTweet(1L, "this is a Test ", usera);
JUser userb = new JUser("userb");
JTweet tw2 = new JTweet(2L, "rt @usera: this is a Test ", userb);
tw.addReply(tw2);
twSearch.store(tw, false);
twSearch.store(tw2, true);
assertEquals(1, twSearch.searchReplies(1L, true).size());
assertEquals(0, twSearch.searchReplies(2L, true).size());
assertEquals(0, twSearch.searchReplies(1L, false).size());
assertEquals(0, twSearch.searchReplies(2L, false).size());
assertEquals(2L, (long) twSearch.searchReplies(1L, true).iterator().next().getTwitterId());
}
@Test
public void testFindDuplicates() {
twSearch.store(new JTweet(1L, "wikileaks is not a wtf", new JUser("userA")), false);
twSearch.store(new JTweet(2L, "news about wikileaks", new JUser("userB")), false);
// find dup is restricted to the last hour so use a current date
MyDate dt = new MyDate();
JTweet tw3 = new JTweet(3L, "wtf means wikileaks task force", new JUser("userC")).setCreatedAt(dt.toDate());
JTweet tw4 = new JTweet(4L, "wtf wikileaks task force", new JUser("userD")).setCreatedAt(dt.plusMinutes(1).toDate());
JTweet tw5 = new JTweet(5L, "RT @userC: wtf means wikileaks task force", new JUser("userE")).setCreatedAt(dt.plusMinutes(1).toDate());
twSearch.queueObjects(Arrays.asList(tw3, tw4, tw5));
twSearch.forceEmptyQueueAndRefresh();
assertEquals("should be empty. should NOT find tweet 4 because it is younger", 0, tw3.getDuplicates().size());
assertEquals("should find tweet 3", 1, tw4.getDuplicates().size());
Map<Long, JTweet> map = new LinkedHashMap<Long, JTweet>();
JTweet tw = new JTweet(10L, "wtf wikileaks task force", new JUser("peter")).setCreatedAt(dt.plusMinutes(1).toDate());
map.put(10L, tw);
twSearch.findDuplicates(map);
assertEquals("should find tweets 3 and 4", 2, tw.getDuplicates().size());
}
@Test
public void testSpamDuplicates() {
MyDate dt = new MyDate();
JTweet tw1 = new JTweet(1L, "2488334. Increase your twitter followers now! Buy Twitter Followers", new JUser("userA")).setCreatedAt(dt.plusMinutes(1).toDate());
JTweet tw2 = new JTweet(2L, "349366. Increase your twitter followers now! Buy Twitter Followers", new JUser("userB")).setCreatedAt(dt.plusMinutes(1).toDate());
JTweet tw3 = new JTweet(31L, "2040312. Increase your twitter followers now! Buy Twitter Followers", new JUser("userC")).setCreatedAt(dt.plusMinutes(1).toDate());
twSearch.queueObjects(Arrays.asList(tw1, tw2, tw3));
twSearch.forceEmptyQueueAndRefresh();
assertEquals(0, tw1.getDuplicates().size());
assertEquals(1, tw2.getDuplicates().size());
assertEquals(2, tw3.getDuplicates().size());
}
@Test
public void testBatchUpdate() {
List<JTweet> list = new ArrayList<JTweet>();
list.add(createTweet(1L, "text", "usera"));
list.add(createTweet(2L, "RT @usera: text", "userb"));
list.add(createTweet(3L, "text2", "usera"));
list.add(createTweet(4L, "hey I read your text", "userb").setInReplyTwitterId(3L));
twSearch.queueObjects(list);
twSearch.forceEmptyQueueAndRefresh();
assertEquals(4, twSearch.query(new TweetQuery()).hits().getTotalHits());
assertEquals(1, twSearch.findByTwitterId(1L).getReplyCount());
assertEquals(1, twSearch.findByTwitterId(1L).getRetweetCount());
assertEquals(0, twSearch.findByTwitterId(2L).getReplyCount());
assertEquals(0, twSearch.findByTwitterId(2L).getRetweetCount());
assertEquals(1, twSearch.findByTwitterId(3L).getReplyCount());
assertEquals(0, twSearch.findByTwitterId(3L).getRetweetCount());
assertEquals(0, twSearch.findByTwitterId(4L).getReplyCount());
assertEquals(0, twSearch.findByTwitterId(4L).getRetweetCount());
}
@Test
public void testConnectTwitterId() throws Exception {
// A has replies B and C
// C has replies D
// store A and D
twSearch.testUpdate(Arrays.asList(createTweet(1L, "A", "u1"),
createTweet(4L, "D", "u4").setInReplyTwitterId(3L)));
assertEquals(2, twSearch.getFeededTweets());
twSearch.testUpdate(createTweet(3L, "C", "u3").setInReplyTwitterId(1L));
assertEquals(3, twSearch.getFeededTweets());
// now check if C was properly connected with A and D
JTweet twC = twSearch.findByTwitterId(3L);
assertEquals(1, twC.getReplyCount());
// A should have C as reply
JTweet twA = twSearch.findByTwitterId(1L);
assertEquals(1, twA.getReplyCount());
// now check if B was properly connected with A
twSearch.testUpdate(createTweet(2L, "B", "u2").setInReplyTwitterId(1L));
twA = twSearch.findByTwitterId(1L);
assertEquals(2, twA.getReplyCount());
// return null when not found
assertNull(twSearch.findByTwitterId(23L));
}
@Test
public void testLetUpdateFailButMergeToGetCorrectRetweetCount() throws IOException {
twSearch.testUpdate(createTweet(1L, "text", "peter"));
SearchHits h = twSearch.query(new TweetQuery()).hits();
assertEquals(1, h.totalHits());
Collection<Integer> res = twSearch.bulkUpdate(
Arrays.asList(createTweet(1L, "text", "peter").setRetweetCount(1).setVersion(10)),
twSearch.getIndexName());
assertEquals(1, res.size());
twSearch.queueObject(createTweet(1L, "text", "peter").setRetweetCount(1).
setUpdatedAt(new Date()).setVersion(10));
twSearch.forceEmptyQueueAndRefresh();
twSearch.forceEmptyQueueAndRefresh();
assertEquals(1, twSearch.getFeededTweets());
assertEquals(1, twSearch.searchTweets(new TweetQuery()).size());
assertEquals(1, twSearch.searchTweets(new TweetQuery()).get(0).getRetweetCount());
}
@Test
public void testVersionCollision() throws IOException {
twSearch.testUpdate(createTweet(1L, "text", "peter"));
SearchHits h = twSearch.query(new TweetQuery()).hits();
assertEquals(1, h.totalHits());
Collection<Integer> res = twSearch.bulkUpdate(
Arrays.asList(createTweet(1L, "text", "peter").setRetweetCount(1).setVersion(h.hits()[0].getVersion())),
twSearch.getIndexName());
assertEquals(0, res.size());
twSearch.refresh();
assertEquals(1, twSearch.searchTweets(new TweetQuery()).size());
assertEquals(1, twSearch.searchTweets(new TweetQuery()).get(0).getRetweetCount());
assertEquals(1, twSearch.getFeededTweets());
}
@Test
public void testAttach() throws Exception {
twSearch.testUpdate(createTweet(1, "test", "peter"));
twSearch.testUpdate(createTweet(2, "test2", "peter"));
assertEquals(2, twSearch.findByUserName("peter").getOwnTweets().size());
}
@Test
public void testDoNotSaveSecondUser() {
JTweet fTweet = createTweet(5, "@peter @karsten bla bli", "peter");
twSearch.testUpdate(fTweet);
assertNull(twSearch.findByUserName("karsten"));
assertNotNull(twSearch.findByUserName("peter"));
}
@Test
public void testFindById() {
JTweet tw = createTweet(5L, "test", "peter");
twSearch.testUpdate(tw);
assertEquals("test", twSearch.findByTwitterId(5L).getText());
tw = createTweet(6L, "test2", "peter");
twSearch.testUpdate(tw);
assertEquals(1, twSearch.collectObjects(twSearch.query(new TweetQuery().addFilterQuery("_id_tweet", "5"))).size());
assertEquals(2, twSearch.collectObjects(twSearch.query(new TweetQuery().addFilterQuery("_id_tweet", "5 OR 6"))).size());
assertEquals(1, twSearch.collectObjects(twSearch.query(new TweetQuery().addFilterQuery("-_id_tweet", "5"))).size());
assertEquals(1, twSearch.collectObjects(twSearch.query(new TweetQuery().addFilterQuery("-_id_tweet", "5").addFilterQuery("_id_tweet", "6"))).size());
}
@Test
public void testDoSaveDuplicate() {
twSearch.testUpdate(createTweet(4, "@peter bla bli", "peter"));
twSearch.testUpdate(createTweet(5, "@peter bla bli", "karsten"));
assertNotNull(twSearch.findByUserName("karsten"));
assertNotNull(twSearch.findByUserName("peter"));
}
@Test
public void testIdVsName() {
JTweet fTweet = createTweet(5, "@karsten bla bli", "peter");
twSearch.testUpdate(fTweet);
fTweet = createTweet(6, "@peter bla bli", "karsten");
twSearch.testUpdate(fTweet);
assertNotNull(twSearch.findByUserName("karsten"));
}
@Test
public void testNoDuplicateUser2() {
JTweet fTweet = createTweet(1, "@karsten bla bli", "peter");
twSearch.testUpdate(fTweet);
fTweet = createTweet(2, "@Karsten bla bli", "Peter");
twSearch.testUpdate(fTweet);
}
@Test
public void testNoDuplicateTweet() {
JTweet fTweet = createTweet(123, "@karsten bla bli", "peter");
twSearch.testUpdate(fTweet);
twSearch.testUpdate(fTweet);
assertEquals(1, twSearch.countAll());
assertEquals(1, twSearch.findByUserName("peter").getOwnTweets().size());
}
@Test
public void testUpdateTweetsWhichIsInfluencedFromActivationDepth() throws Exception {
JTweet tw1 = createTweet(1L, "tweet1", "peter");
JTweet tw2 = createTweet(2L, "tweet2", "peter");
twSearch.testUpdate(tw1);
twSearch.testUpdate(tw2);
assertEquals(2, twSearch.findByUserName("peter").getOwnTweets().size());
tw1 = createTweet(1L, "tweet1", "peter");
twSearch.testUpdate(tw1);
assertEquals(2, twSearch.findByUserName("peter").getOwnTweets().size());
}
@Test
public void testUpdateAndRemove() throws Exception {
JTweet tw1 = createTweet(1L, "@karsten hajo", "peter");
tw1.setCreatedAt(new MyDate().minusDays(2).toDate());
twSearch.testUpdate(tw1);
assertEquals(1, twSearch.countAll());
assertEquals("@karsten hajo", twSearch.search("hajo").iterator().next().getOwnTweets().iterator().next().getText());
assertEquals(1, twSearch.findByUserName("peter").getOwnTweets().size());
JTweet tw = createTweet(2L, "test", "peter");
tw.setCreatedAt(new Date());
twSearch.setRemoveOlderThanDays(1);
twSearch.queueObject(tw);
twSearch.forceEmptyQueueAndRefresh();
assertEquals(1, twSearch.countAll());
assertEquals(1, twSearch.search("test").size());
assertEquals(0, twSearch.search("hajo").size());
assertEquals(1, twSearch.findByUserName("peter").getOwnTweets().size());
}
@Test
public void testDoubleUpdateShouldIncreaseReplies() throws Exception {
twSearch.testUpdate(Arrays.asList(createTweet(1L, "bla bli blu", "userA"),
createTweet(2L, "RT @userA: bla bli blu", "userC")));
assertEquals(1, twSearch.findByTwitterId(1L).getReplyCount());
assertEquals(1, twSearch.findByTwitterId(1L).getRetweetCount());
twSearch.testUpdate(Arrays.asList(
createTweet(3L, "RT @userA: bla bli blu", "userC"),
createTweet(4L, "RT @userA: bla bli blu", "userD")));
assertEquals(2, twSearch.findByTwitterId(1L).getReplyCount());
assertEquals(2, twSearch.findByTwitterId(1L).getRetweetCount());
assertEquals(0, twSearch.findByTwitterId(2L).getReplyCount());
assertEquals(0, twSearch.findByTwitterId(3L).getReplyCount());
assertEquals(0, twSearch.findByTwitterId(4L).getReplyCount());
twSearch.testUpdate(Arrays.asList(
createTweet(5L, "RT @userA: bla bli blu", "userE")));
assertEquals(3, twSearch.findByTwitterId(1L).getReplyCount());
assertEquals(3, twSearch.findByTwitterId(1L).getRetweetCount());
}
@Test
public void testConnectTweets() throws Exception {
// A has reply B
twSearch.testUpdate(Arrays.asList(createTweet(1L, "bla bli blu", "userA"),
createTweet(2L, "RT @userA: bla bli blu", "userC")));
assertEquals(1, twSearch.findByTwitterId(1L).getReplyCount());
twSearch.testUpdate(createTweet(3L, "@userXY see this nice fact: RT @userA: bla bli blu", "userB"));
assertEquals(2, twSearch.findByTwitterId(1L).getReplyCount());
}
@Test
public void testProcessToUser() throws Exception {
twSearch.testUpdate(createTweet(1L, "@userA bla bli blu", "userB"));
twSearch.testUpdate(createTweet(2L, "RT @userB: @userA bla bli blu", "userA"));
assertEquals(2, twSearch.countAll());
assertEquals(1, twSearch.findByTwitterId(1L).getReplyCount());
assertEquals(1, twSearch.findByTwitterId(1L).getRetweetCount());
assertEquals(0, twSearch.findByTwitterId(2L).getReplyCount());
assertEquals(0, twSearch.findByTwitterId(2L).getRetweetCount());
}
@Test
public void testDoNotAllowSelfRetweets() throws Exception {
twSearch.testUpdate(createTweet(1L, "bla bli blu", "userA"));
twSearch.testUpdate(createTweet(2L, "RT @userA: bla bli blu", "userA"));
twSearch.testUpdate(createTweet(3L, "RT @userA: bla bli blu", "userb"));
assertEquals(1, twSearch.findByTwitterId(1L).getReplyCount());
}
@Test
public void testDoNotAddDuplicateRetweets() throws Exception {
twSearch.testUpdate(createTweet(1L, "bla bli blu", "userA"));
assertEquals(0, twSearch.findByTwitterId(1L).getReplyCount());
twSearch.testUpdate(createTweet(2L, "RT @userA: bla bli blu", "userB"));
assertEquals(1, twSearch.findByTwitterId(1L).getRetweetCount());
twSearch.testUpdate(createTweet(3L, "RT @userA: bla bli blu", "userB"));
assertEquals(1, twSearch.findByTwitterId(1L).getRetweetCount());
}
@Test
public void testNoFailureOnSimilarDocumentIndexing() {
Collection<JTweet> list = Arrays.asList(
createTweet(1L, "Very clever story telling using HTML and Javascript... http://j.mp/eQmdl2", "newsycombinator"),
createTweet(2L, "RT @newsycombinator: Very clever story telling using HTML and Javascript... http://j.mp/eQmdl2", "user1"),
createTweet(3L, "RT @newsycombinator: Very clever story telling using HTML and Javascript... http://j.mp/eQmdl2 - Although @DamagedGoods doesn't like it", "user2"),
createTweet(4L, "Very clever story telling using HTML and Javascript... http://hobolobo.net/", "hackernewsbot"),
createTweet(5L, "RT @newsyc20: Very clever story telling using HTML and Javascript... http://hobolobo.net/ (http://bit.ly/k3z6aJ) #trending", "user4"),
createTweet(6L, "RT @hackernewsbot: Very clever story telling using HTML and Javascript...... http://hobolobo.net/", "user5"),
createTweet(7L, "Brilliant!! RT @hackernewsbot: Very clever story telling using HTML and Javascript...... http://hobolobo.net/", "user6"));
twSearch.queueObjects(list);
twSearch.forceEmptyQueueAndRefresh(300);
twSearch.forceEmptyQueueAndRefresh(300);
assertEquals(7, twSearch.getFeededTweets());
assertEquals(7, twSearch.countAll());
assertEquals(2, twSearch.findByTwitterId(1L).getRetweetCount());
}
@Test
public void testDoNotAddOldTweets() {
JTweet tw = createTweet(2L, "RT @userA: bla bli blu", "userB");
tw.setCreatedAt(new MyDate().minusDays(2).toDate());
twSearch.setRemoveOlderThanDays(1);
twSearch.queueObject(tw);
twSearch.forceEmptyQueueAndRefresh();
assertEquals(0, twSearch.getFeededTweets());
tw.setCreatedAt(new Date());
twSearch.queueObject(tw);
twSearch.forceEmptyQueueAndRefresh();
assertEquals(1, twSearch.getFeededTweets());
}
@Test
public void testAddOldTweetsIfPersistent() {
JTweet tw = createTweet(2L, "RT @userA: bla bli blu", "userB");
Date dt = new MyDate().minusDays(2).toDate();
tw.setUpdatedAt(dt);
tw.setCreatedAt(dt);
twSearch.testUpdate(tw);
assertEquals(1, twSearch.getFeededTweets());
// testOverwriteTweetsIfPersistent
tw = createTweet(2L, "totally new", "userB");
dt = new MyDate().minusDays(2).toDate();
tw.setUpdatedAt(dt);
tw.setCreatedAt(dt);
twSearch.testUpdate(tw);
assertEquals(1, twSearch.getFeededTweets());
assertEquals(0, twSearch.search("bla").size());
assertEquals(1, twSearch.search("new").size());
}
@Test
public void testDontRemoveOldIfPersistent() throws Exception {
JTweet tw1 = createTweet(4L, "newbla next", "userc").setRetweetCount(100);
tw1.setCreatedAt(new MyDate().minusDays(2).toDate());
JTweet tw2 = createTweet(2L, "RT @userA: bla bli blu", "userB");
Date dt = new MyDate().minusDays(2).toDate();
tw2.setUpdatedAt(dt);
tw2.setCreatedAt(dt);
// until date is very old to let tweets going through
twSearch.queueObjects(Arrays.asList(tw2, tw1));
twSearch.forceEmptyQueueAndRefresh();
assertEquals(2, twSearch.getFeededTweets());
assertEquals(2, twSearch.countAll());
assertEquals(100, twSearch.findByTwitterId(4L).getRetweetCount());
assertNotNull(twSearch.findByTwitterId(2L).getUpdatedAt());
JTweet tw3 = createTweet(3L, "another tweet grabbed from search", "userB");
tw3.setCreatedAt(new MyDate().minusDays(2).toDate());
twSearch.setRemoveOlderThanDays(1);
twSearch.queueObject(tw3);
twSearch.forceEmptyQueueAndRefresh();
assertEquals(0, twSearch.getFeededTweets());
assertEquals(2, twSearch.countAll());
assertTrue(twSearch.searchTweets(new TweetQuery()).contains(tw2));
assertTrue(twSearch.searchTweets(new TweetQuery()).contains(tw1));
}
@Test
public void testComplexUpdate() throws Exception {
JTweet tw1 = createTweet(1L, "bla bli blu", "userA");
tw1.setCreatedAt(new MyDate().minusDays(2).toDate());
JTweet tw2 = createTweet(2L, "rt @usera: bla bli blu", "userB");
tw2.setCreatedAt(new MyDate().minusDays(2).plusMinutes(1).toDate());
JTweet tw3 = createTweet(3L, "rt @usera: bla bli blu", "userC");
tw3.setCreatedAt(new MyDate().minusDays(2).plusMinutes(1).toDate());
JTweet tw4 = createTweet(4L, "rt @usera: bla bli blu", "userD");
tw4.setCreatedAt(new MyDate().minusDays(2).plusMinutes(1).toDate());
twSearch.testUpdate(Arrays.asList(tw1, tw2, tw3, tw4));
assertEquals(1, twSearch.findByUserName("usera").getOwnTweets().size());
assertEquals(3, twSearch.findByTwitterId(1L).getReplyCount());
assertEquals(4, twSearch.getFeededTweets());
JTweet tw101 = createTweet(101L, "newtext two", "usera");
tw101.setCreatedAt(new Date());
JTweet tw102 = createTweet(102L, "newbla one", "userd");
tw102.setCreatedAt(new Date());
JTweet tw103 = createTweet(103L, "newbla two", "userd");
tw103.setCreatedAt(new Date());
JTweet tw104 = createTweet(104L, "rt @usera: newtext two", "userc");
tw104.setCreatedAt(new MyDate(tw101.getCreatedAt()).plusMinutes(1).toDate());
twSearch.setRemoveOlderThanDays(1);
twSearch.testUpdate(Arrays.asList(tw101, tw102, tw103, tw104));
assertEquals(4, twSearch.getFeededTweets());
assertEquals(4, twSearch.countAll());
assertEquals(1, twSearch.findByTwitterId(101L).getRetweetCount());
assertEquals(1, twSearch.findByTwitterId(101L).getReplyCount());
// no tweet exists with that string
assertEquals(0, twSearch.search("bla bli blu").size());
}
@Test
public void testDoNotThrowQueryParserException() {
JTweet tw = createTweet(1L, "rt @jenny2s: -- Earth, Wind & Fire - September (From \"Live In Japan\")"
+ " http://www.youtube.com/watch?v=hy-huQAMPQA via @youtube --- HAPPY SEPTEMBER !!", "usera");
twSearch.testUpdate(tw);
}
@Test
public void testUpdateList() {
twSearch.testUpdate(Arrays.asList(createTweet(1L, "test", "peter"),
createTweet(1L, "test", "peter")));
assertEquals(1, twSearch.getFeededTweets());
assertNotNull(twSearch.findByTwitterId(1L));
}
@Test
public void testUserChoices() {
twSearch.testUpdate(Arrays.asList(createTweet(1L, "test", "usera"),
createTweet(2L, "pest", "usera"),
createTweet(3L, "schnest", "usera")));
Collection<String> coll = twSearch.getUserChoices(null, "user");
assertEquals(1, coll.size());
coll = twSearch.getUserChoices(null, "loose");
assertEquals(0, coll.size());
}
@Test
public void testQueryChoices() {
twSearch.testUpdate(Arrays.asList(createTweet(1L, "abcd", "usera"),
createTweet(2L, "bluest abcdxy abcdxy", "usera"),
createTweet(3L, "bluest bluest abcdxy", "usera"),
createTweet(4L, "abort", "usera"),
createTweet(5L, "bingo", "usera"),
createTweet(6L, "chemical", "usera"),
createTweet(7L, "destination", "usera"),
createTweet(8L, "estimation", "usera"),
createTweet(9L, "finish", "usera"),
createTweet(10L, "ginie", "usera"),
createTweet(11L, "home", "usera"),
// why is as last tweet this necessary? otherwise we don't get 'home' as tag!?
createTweet(12L, "testing", "usera")));
assertEquals(2L, twSearch.query(new TweetQuery().addFilterQuery("tag", "bluest")).getHits().getTotalHits());
assertEquals(1L, twSearch.query(new TweetQuery("home")).getHits().getTotalHits());
Collection<String> coll = twSearch.getQueryChoices(null, "abcdxy");
assertEquals(0, coll.size());
coll = twSearch.getQueryChoices(null, "ab");
assertEquals(3, coll.size());
assertTrue(coll.contains("abcdxy"));
// it is important to filter (with regex filter) away some tags otherwise we don't get the important ones:
coll = twSearch.getQueryChoices(null, "ho");
assertEquals(1, coll.size());
assertTrue(coll.contains("home"));
coll = twSearch.getQueryChoices(null, "abcdxy ");
assertEquals(1, coll.size());
assertTrue(coll.contains("abcdxy bluest"));
}
@Test
public void testQueryChoicesWithoutDateRestrictions() {
twSearch.testUpdate(Arrays.asList(createTweet(new MyDate().minusDays(1).minusMinutes(3), "obama obama", "usera"),
createTweet(new MyDate().minusDays(1).minusMinutes(2), "bluest obama obama", "usera"),
createTweet(new MyDate().minusDays(1).minusMinutes(1), "bluest bluest obama", "usera"),
createTweet(new MyDate().minusDays(1), "obama bluest again and again", "usera")));
assertEquals(3L, twSearch.query(new TweetQuery().addFilterQuery("tag", "bluest")).getHits().getTotalHits());
Collection<String> coll = twSearch.getQueryChoices(new TweetQuery().addLatestDateFilter(8), "obama ");
assertEquals(1, coll.size());
assertTrue(coll.contains("obama bluest"));
}
@Test
public void testFindOrigin() {
twSearch.testUpdate(Arrays.asList(createTweet(1L, "text", "usera"),
createTweet(2L, "RT @usera: text", "userb"),
createTweet(3L, "RT @usera: text", "userc"),
createTweet(4L, "new text", "userd")));
JetwickQuery q = twSearch.createFindOriginQuery(null, "text", 1);
// crt_b, retw_i
assertEquals(2, q.getFilterQueries().size());
assertEquals(ElasticTweetSearch.RT_COUNT, q.getFilterQueries().get(1).getKey());
assertEquals("[1 TO *]", q.getFilterQueries().get(1).getValue());
// too high minResults
int minResults = 3;
q = twSearch.createFindOriginQuery(null, "text", minResults);
assertEquals(1, q.getFilterQueries().size());
// no retweets for 'new text'
q = twSearch.createFindOriginQuery(null, "new text", 2);
assertEquals(1, q.getFilterQueries().size());
}
@Test
public void testFacets() {
twSearch.testUpdate(Arrays.asList(createTweet(1L, "Beitrag atom. atom again", "userA"),
createTweet(2L, "atom gruene", "userA"),
createTweet(3L, "third tweet", "userA")));
SearchResponse rsp = twSearch.query(new TweetQuery(true));
assertEquals(3, rsp.hits().getTotalHits());
// only the second tweet will contain a tag with atom!
assertEquals(1, ((TermsFacet) rsp.facets().facet("tag")).getEntries().size());
rsp = twSearch.query(new TweetQuery().addFilterQuery("tag", "atom"));
assertEquals(2, twSearch.collectObjects(rsp).size());
}
@Test
public void testReadUrlEntries() throws IOException {
JTweet tw = new JTweet(1L, "text", new JUser("peter"));
List<UrlEntry> entries = new ArrayList<UrlEntry>();
UrlEntry urlEntry = new UrlEntry(2, 18, "http://fulltest.de/bla");
urlEntry.setResolvedDomain("resolved-domain.de");
urlEntry.setResolvedTitle("ResolvedTitel");
entries.add(urlEntry);
tw.setUrlEntries(entries);
XContentBuilder iDoc = twSearch.createDoc(tw);
String str = iDoc.prettyPrint().string();
assertTrue(str.contains("\"url_pos_1_s\":\"2,18\""));
assertTrue(str.contains("\"dest_url_1_s\":\"http://fulltest.de/bla\""));
assertTrue(str.contains("\"dest_domain_1_s\":\"resolved-domain.de\""));
assertTrue(str.contains("\"dest_title_1_s\":\"ResolvedTitel\""));
Map<String, Object> map = new LinkedHashMap<String, Object>();
map.put("user", "peter");
map.put("tw", "text");
map.put("url_i", 1);
map.put("retw_i", 0);
map.put("repl_i", 0);
map.put("url_pos_1_s", "2,18");
map.put("dest_url_1_s", "http://fulltest.de/bla");
map.put("dest_domain_1_s", "resolved-domain.de");
map.put("dest_title_1_s", "ResolvedTitel");
JTweet tw2 = twSearch.readDoc("1", 0L, map);
assertEquals(1, tw2.getUrlEntries().size());
Iterator<UrlEntry> iter = tw2.getUrlEntries().iterator();
urlEntry = iter.next();
assertEquals("http://fulltest.de/bla", urlEntry.getResolvedUrl());
assertEquals("resolved-domain.de", urlEntry.getResolvedDomain());
assertEquals("ResolvedTitel", urlEntry.getResolvedTitle());
assertEquals(2, urlEntry.getIndex());
assertEquals(18, urlEntry.getLastIndex());
}
@Test
public void testSameUrlTitleButDifferentUrl() throws IOException {
JTweet tw1 = new JTweet(1L, "text", new JUser("peter"));
List<UrlEntry> entries = new ArrayList<UrlEntry>();
UrlEntry urlEntry = new UrlEntry(2, 18, "http://fulltest.de/url2");
urlEntry.setResolvedDomain("resolved-domain.de");
urlEntry.setResolvedTitle("ResolvedTitel");
entries.add(urlEntry);
tw1.setUrlEntries(entries);
JTweet tw2 = new JTweet(1L, "text2", new JUser("peter2"));
entries = new ArrayList<UrlEntry>();
urlEntry = new UrlEntry(2, 18, "http://fulltest.de/urlNext");
urlEntry.setResolvedDomain("resolved-domain.de");
urlEntry.setResolvedTitle("ResolvedTitel");
entries.add(urlEntry);
tw2.setUrlEntries(entries);
twSearch.testUpdate(Arrays.asList(tw1, tw2));
assertEquals(1, twSearch.query(new TweetQuery()).hits().totalHits());
}
@Test
public void testGetMoreTweets() throws IOException {
// fill index with 2 tweets and 1 user
JTweet tw2;
twSearch.testUpdate(Arrays.asList(
createTweet(1L, "test", "peter"),
tw2 = createTweet(2L, "text", "peter")));
Map<Long, JTweet> alreadyExistingTw = new LinkedHashMap<Long, JTweet>();
alreadyExistingTw.put(2L, tw2);
Map<String, JUser> users = new LinkedHashMap<String, JUser>();
JUser u = new JUser("peter");
users.put("peter", u);
// return the tweet (1L) which is not already in the map!
twSearch.fetchMoreTweets(alreadyExistingTw, users);
assertEquals(1, u.getOwnTweets().size());
assertEquals(1L, (long) u.getOwnTweets().iterator().next().getTwitterId());
}
@Test
public void testSnowballStemming() throws IOException {
twSearch.testUpdate(Arrays.asList(createTweet(1L, "duplication", "peter"),
createTweet(2L, "testing", "peter")));
assertEquals(1, twSearch.searchTweets(new TweetQuery("duplicate")).size());
assertEquals(1, twSearch.searchTweets(new TweetQuery("test")).size());
Set<String> stopWords = new LinkedHashSet<String>();
stopWords.add("duplicate");
Set<String> set = new SimilarTweetQuery().doSnowballStemming(
new WhitespaceTokenizer(new StringReader("duplication tester")));
assertEquals(2, set.size());
assertTrue(set.contains("tester"));
assertTrue(set.contains("duplic"));
}
@Test
public void testFollowerSearch() throws Exception {
twSearch.testUpdate(Arrays.asList(
createTweet(1L, "test this", "peter"),
createTweet(2L, "test others", "tester"),
createTweet(3L, "testnot this", "peter"),
createTweet(4L, "test this", "peternot")));
Collection<String> users = Arrays.asList("peter", "tester");
Collection<JTweet> coll = twSearch.collectObjects(twSearch.query(new TweetQuery("test").createFriendsQuery(users)));
assertEquals(2, coll.size());
int counter = 0;
for (JTweet tw : coll) {
if (tw.getTwitterId() == 1L)
counter++;
else if (tw.getTwitterId() == 2L)
counter++;
}
assertEquals(2, counter);
}
@Test
public void testIndexMerge() throws IOException, InterruptedException {
String index1 = "index1";
String index2 = "index2";
String resindex = "resindex";
twSearch.createIndex(index1);
twSearch.createIndex(index2);
twSearch.createIndex(resindex);
twSearch.waitForYellow(resindex);
twSearch.deleteAll(index1, twSearch.getIndexType());
twSearch.deleteAll(index2, twSearch.getIndexType());
twSearch.deleteAll(resindex, twSearch.getIndexType());
twSearch.bulkUpdate(Arrays.asList(
new JTweet(1L, "hey cool one", new JUser("peter")),
new JTweet(2L, "two! another one", new JUser("test"))), index1);
twSearch.bulkUpdate(Arrays.asList(
new JTweet(3L, "second index. one", new JUser("people")),
new JTweet(4L, "snd index! two", new JUser("k")),
new JTweet(5L, "snd index! third", new JUser("k"))), index2);
twSearch.mergeIndices(Arrays.asList(index1, index2), resindex, 10, true, twSearch, null);
assertEquals(5, twSearch.countAll(resindex));
}
@Test
public void testUpdateOneTweetForTwoIndices() throws IOException, InterruptedException {
String index1 = "index1";
String index2 = "index2";
twSearch.saveCreateIndex(index1, false);
twSearch.saveCreateIndex(index2, false);
twSearch.waitForYellow(index1);
twSearch.deleteAll(index1, twSearch.getIndexType());
twSearch.deleteAll(index2, twSearch.getIndexType());
List<JTweet> list = new ArrayList<JTweet>();
JUser user2 = new JUser("peter2");
for (int i = 0; i < 2; i++) {
list.add(new JTweet(i, "nice day", user2));
}
twSearch.bulkUpdate(list, index1, true);
assertEquals(2, twSearch.countAll(index1));
assertEquals(0, twSearch.countAll(index2));
twSearch.bulkUpdate(list, index1, true);
twSearch.bulkUpdate(list, index2, true);
assertEquals(2, twSearch.countAll(index1));
// !! indices are independent !!
assertEquals(2, twSearch.countAll(index2));
assertEquals(4, twSearch.countAll(index1, index2));
}
@Test
public void testIndexMergeWithPaging() throws Exception {
String index1 = "index1";
String index2 = "index2";
String resindex = "resindex";
twSearch.saveCreateIndex(index1, false);
twSearch.saveCreateIndex(index2, false);
twSearch.saveCreateIndex(resindex, false);
twSearch.waitForYellow(resindex);
// clearing index
twSearch.deleteAll(index1, twSearch.getIndexType());
twSearch.deleteAll(index2, twSearch.getIndexType());
twSearch.deleteAll(resindex, twSearch.getIndexType());
// this store makes a problem later on, when searching on index1
twSearch.bulkUpdate(Arrays.asList(new JTweet(1L, "test", new JUser("testuser"))), index1, true);
List<JTweet> list = new ArrayList<JTweet>();
JUser user = new JUser("peter");
for (int i = 0; i < 100; i++) {
list.add(new JTweet(i, "hey cool one", user));
}
JUser user2 = new JUser("peter2");
for (int i = 100; i < 200; i++) {
list.add(new JTweet(i, "nice day", user2));
}
twSearch.bulkUpdate(list, index1, true);
// identical tweets -> TODO do or don't store?
List<JTweet> list2 = new ArrayList<JTweet>();
for (int i = 0; i < 100; i++) {
list2.add(new JTweet(i, "[updated] hey cool one", user));
}
// different tweets
JUser user3 = new JUser("peter3");
for (int i = 300; i < 400; i++) {
list2.add(new JTweet(i, "what's going on?", user3));
}
twSearch.bulkUpdate(list2, index2, true);
// System.out.println("1:" + twSearch.countAll(index1) + " 2:" + twSearch.countAll(index2) + " res:" + twSearch.countAll(resindex));
twSearch.mergeIndices(Arrays.asList(index1, index2), resindex, 2, true, twSearch, null);
// 100 + 100 in the first list. in list2 only 100 new => 300
assertEquals(300, twSearch.countAll(resindex));
twSearch.setIndexName(resindex);
SearchResponse rsp = twSearch.query(new TweetQuery().setSize(1000));
assertEquals(300, twSearch.collectObjects(twSearch.query(new ArrayList(), rsp)).size());
}
@Test
public void testDeleteAndAlias() throws IOException, InterruptedException {
// make sure we can delete all entries from resindex
String index1 = "index1";
String resindex = "resindex";
twSearch.saveCreateIndex(index1, false);
twSearch.saveCreateIndex(resindex, false);
twSearch.waitForYellow(resindex);
twSearch.deleteAll(index1, twSearch.getIndexType());
twSearch.deleteAll(resindex, twSearch.getIndexType());
// index2 was created in the previously test
// don't remove index2 to make sure we grab really only from index1
// twSearch.deleteAll("index2");
List<JTweet> list = new ArrayList<JTweet>();
JUser user = new JUser("peter");
for (int i = 0; i < 100; i++) {
list.add(new JTweet(i, "hey cool one", user));
}
twSearch.bulkUpdate(list, index1, true);
assertEquals(0, twSearch.countAll(resindex));
twSearch.mergeIndices(Arrays.asList(index1), resindex, 2, true, twSearch, null);
assertEquals(100, twSearch.countAll(resindex));
assertEquals(100, twSearch.countAll(index1));
twSearch.deleteIndex(index1);
try {
assertEquals(0, twSearch.countAll(index1));
assertFalse(true);
} catch (IndexMissingException ex) {
assertTrue(true);
}
twSearch.addIndexAlias(resindex, index1);
assertEquals(100, twSearch.countAll(index1));
}
@Test
public void testQueryMultipleIndices() throws Exception {
String index1 = "index1";
String index2 = "index2";
twSearch.saveCreateIndex(index1, false);
twSearch.saveCreateIndex(index2, false);
twSearch.waitForYellow(index1);
twSearch.deleteAll(index1, twSearch.getIndexType());
twSearch.deleteAll(index2, twSearch.getIndexType());
twSearch.bulkUpdate(Arrays.asList(new JTweet(1L, "test", new JUser("testuser")).setRetweetCount(0)), index1, true);
twSearch.bulkUpdate(Arrays.asList(new JTweet(1L, "test", new JUser("testuser")).setRetweetCount(2)), index2, true);
SearchResponse rsp = twSearch.getClient().prepareSearch(index1, index2).setVersion(true).
setQuery(QueryBuilders.matchAllQuery()).execute().actionGet();
assertEquals(2, twSearch.collectObjects(rsp).size());
}
@Test
public void testFindUser() {
twSearch.testUpdate(Arrays.asList(
createTweet(1L, "test this", "peter"),
createTweet(4L, "test this", "peter_not")));
assertEquals(1, twSearch.collectObjects(twSearch.query(new TweetQuery().addFilterQuery(ElasticTweetSearch.USER, "peter"))).size());
}
@Test
public void testSuggestFilterRemoval() {
MyDate md = new MyDate();
twSearch.testUpdate(Arrays.asList(
createTweet(1L, "RT @user3: test this first tweet", "peter").setCreatedAt(md.toDate()),
createTweet(2L, "test others", "peter2").setCreatedAt(md.toDate()),
createTweet(3L, "testnot this", "peter3").setCreatedAt(md.minusHours(2).toDate()),
createTweet(4L, "test this", "peter4").setCreatedAt(md.toDate())));
JetwickQuery q = new TweetQuery(false).addIsOriginalTweetFilter().
addLatestDateFilter(1).
addUserFilter("peter");
Collection<String> keys = twSearch.suggestRemoval(q);
assertEquals(3, keys.size());
Iterator<String> iter = keys.iterator();
assertEquals(ElasticTweetSearch.USER, iter.next());
assertEquals(ElasticTweetSearch.DATE, iter.next());
assertEquals(ElasticTweetSearch.IS_RT, iter.next());
}
@Test
public void testSuggestFilterRemoval2() {
MyDate md = new MyDate();
twSearch.testUpdate(Arrays.asList(
createTweet(1L, "RT @user3: test this first tweet", "peter1").setCreatedAt(md.toDate()),
createTweet(2L, "test others", "peter2").setCreatedAt(md.toDate()),
createTweet(3L, "testnot this", "peter3").setCreatedAt(md.minusHours(2).toDate()),
createTweet(4L, "test this", "peter4").setCreatedAt(md.toDate())));
JetwickQuery q = new TweetQuery(false).addIsOriginalTweetFilter().
addLatestDateFilter(1).
addUserFilter("peter");
Collection<String> keys = twSearch.suggestRemoval(q);
assertEquals(2, keys.size());
Iterator<String> iter = keys.iterator();
// assertEquals(ElasticTweetSearch.USER, iter.next());
assertEquals(ElasticTweetSearch.DATE, iter.next());
assertEquals(ElasticTweetSearch.IS_RT, iter.next());
}
@Test
public void testSuggestFilterForceDate() {
MyDate md = new MyDate();
twSearch.testUpdate(Arrays.asList(
createTweet(2L, "test others", "peter2").setCreatedAt(md.minusHours(2).toDate()),
createTweet(3L, "testnot this", "peter3").setCreatedAt(md.minusHours(2).toDate())));
JetwickQuery q = new TweetQuery(false).addLatestDateFilter(1);
Collection<String> keys = twSearch.suggestRemoval(q);
assertEquals(1, keys.size());
Iterator<String> iter = keys.iterator();
assertEquals(ElasticTweetSearch.DATE, iter.next());
}
@Test
public void testFindByUrl() {
List<UrlEntry> entries = new ArrayList<UrlEntry>();
UrlEntry urlEntry = new UrlEntry(2, 18, "http://fulltest.de/bla");
urlEntry.setResolvedDomain("resolved-domain.de");
urlEntry.setResolvedTitle("ResolvedTitel");
urlEntry.setIndex(12);
urlEntry.setLastIndex(26);
entries.add(urlEntry);
JTweet tw = createTweet(2L, "test others http://orig.de", "peter2");
tw.setUrlEntries(entries);
twSearch.update(Collections.singleton(tw), new Date(0), false);
twSearch.refresh();
assertEquals(1, twSearch.findByUrl("http://fulltest.de/bla").size());
assertEquals(1, twSearch.findByUrl("http://orig.de").size());
assertEquals(0, twSearch.findByUrl("http://irgendwas.de").size());
}
@Test
public void testProtectedTweet() {
twSearch.testUpdate(Arrays.asList(
createTweet(1L, "test others", "peter2").setCreatedAt(new Date()).setProtected(true),
createTweet(2L, "testnot this", "peter3").setCreatedAt(new Date())));
JetwickQuery q = new TweetQuery(false);
// feed only none protected
assertEquals(1, twSearch.query(q).hits().totalHits());
assertEquals("2", twSearch.query(q).hits().getHits()[0].id());
assertEquals(1, twSearch.getFeededTweets());
}
JTweet createSolrTweet(MyDate dt, String twText, String user) {
return new JTweet(dt.getTime(), twText, new JUser(user)).setCreatedAt(dt.toDate());
}
JTweet createTweet(long id, String twText, String user) {
return new JTweet(id, twText, new JUser(user)).setCreatedAt(new Date(id));
}
JTweet createNowTweet(long id, String twText, String user) {
return new JTweet(id, twText, new JUser(user)).setCreatedAt(new Date());
}
JTweet createOldTweet(long id, String twText, String user) {
return createTweet(id, twText, user).setCreatedAt(new Date(id));
}
JTweet createTweet(MyDate dt, String twText, String user) {
return createTweet(dt.getTime(), twText, user).setCreatedAt(dt.toDate());
}
}