Package org.elasticsearch.river.twitter.test

Source Code of org.elasticsearch.river.twitter.test.TwitterIntegrationTest

/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.river.twitter.test;

import org.elasticsearch.action.count.CountResponse;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.base.Predicate;
import org.elasticsearch.common.joda.time.DateTime;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.DistanceUnit;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.indices.IndexAlreadyExistsException;
import org.elasticsearch.indices.IndexMissingException;
import org.elasticsearch.plugins.PluginsService;
import org.elasticsearch.river.twitter.test.helper.HttpClient;
import org.elasticsearch.river.twitter.test.helper.HttpClientResponse;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import twitter4j.Status;
import twitter4j.Twitter;
import twitter4j.TwitterException;
import twitter4j.TwitterFactory;
import twitter4j.auth.AccessToken;

import java.io.IOException;
import java.util.concurrent.TimeUnit;

import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.CoreMatchers.*;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;

/**
* Integration tests for Twitter river<br>
* You must have an internet access.
*
* Launch it using:
* mvn test -Dtests.twitter=true -Dtests.config=/path/to/elasticsearch.yml
*
* where your /path/to/elasticsearch.yml contains:

  river:
      twitter:
          oauth:
             consumer_key: ""
             consumer_secret: ""
             access_token: ""
             access_token_secret: ""

*/
@ElasticsearchIntegrationTest.ClusterScope(
        scope = ElasticsearchIntegrationTest.Scope.SUITE,
        transportClientRatio = 0.0)
@AbstractTwitterTest.TwitterTest
public class TwitterIntegrationTest extends ElasticsearchIntegrationTest {

    private final String track = "obama";

    @Override
    protected Settings nodeSettings(int nodeOrdinal) {
        ImmutableSettings.Builder settings = ImmutableSettings.builder()
                .put(super.nodeSettings(nodeOrdinal))
                .put("plugins." + PluginsService.LOAD_PLUGIN_FROM_CLASSPATH, true);

        Environment environment = new Environment();

        // if explicit, just load it and don't load from env
        if (Strings.hasText(System.getProperty("tests.config"))) {
            settings.loadFromUrl(environment.resolveConfig(System.getProperty("tests.config")));
        }

        return settings.build();
    }

    @Before
    public void createEmptyRiverIndex() {
        // We want to force _river index to use 1 shard 1 replica
        client().admin().indices().prepareCreate("_river").setSettings(ImmutableSettings.builder()
                .put(SETTING_NUMBER_OF_SHARDS, 1)
                .put(SETTING_NUMBER_OF_REPLICAS, 0)).get();
    }

    @After
    public void deleteRiverAndWait() throws InterruptedException {
        logger.info(" --> remove all twitter rivers");
        client().admin().indices().prepareDelete("_river").get();
        // We just wait a few to make sure that all bulks has been processed
        awaitBusy(new Predicate<Object>() {
            @Override
            public boolean apply(Object o) {
                return false;
            }
        }, 2, TimeUnit.SECONDS);
    }

    private String getDbName() {
        return Strings.toUnderscoreCase(getTestName());
    }

    private void launchTest(XContentBuilder river, final Integer numDocs, boolean removeRiver)
            throws IOException, InterruptedException {
        logger.info("  -> Checking internet working");
        new HttpClient("www.elasticsearch.org", 80).request("/");
        HttpClientResponse response = new HttpClient("www.elasticsearch.org", 80).request("/");
        Assert.assertThat(response.errorCode(), is(200));

        logger.info("  -> Create river");
        try {
            createIndex(getDbName());
        } catch (IndexAlreadyExistsException e) {
            // No worries. We already created the index before
        }
        index("_river", getDbName(), "_meta", river);

        logger.info("  -> Wait for some docs");
        assertThat(awaitBusy(new Predicate<Object>() {
            public boolean apply(Object obj) {
                try {
                    refresh();
                    CountResponse response = client().prepareCount(getDbName()).get();
                    logger.info("  -> got {} docs in {} index", response.getCount(), getDbName());
                    return response.getCount() >= numDocs;
                } catch (IndexMissingException e) {
                    return false;
                }
            }
        }, 5, TimeUnit.MINUTES), equalTo(true));

        if (removeRiver) {
            logger.info("  -> Remove river");
            client().admin().indices().prepareDeleteMapping("_river").setType(getDbName()).get();
        }
    }

    @Test
    public void testLanguageFiltering() throws IOException, InterruptedException {
        launchTest(jsonBuilder()
            .startObject()
                .field("type", "twitter")
                .startObject("twitter")
                    .field("type", "filter")
                    .startObject("filter")
                        .field("tracks", "le")
                        .field("language", "fr")
                    .endObject()
                .endObject()
            .endObject(), randomIntBetween(5, 50), true);

        // We should have only FR data
        SearchResponse response = client().prepareSearch(getDbName())
                .addField("language")
                .addField("_source")
                .get();

        logger.info("  --> Search response: {}", response.toString());

        // All language fields should be fr
        for (SearchHit hit : response.getHits().getHits()) {
            assertThat(hit.field("language"), notNullValue());
            assertThat(hit.field("language").getValue().toString(), is("fr"));
        }
    }

    @Test
    public void testIgnoreRT() throws IOException, InterruptedException {
        launchTest(jsonBuilder()
            .startObject()
                .field("type", "twitter")
                .startObject("twitter")
                    .field("type", "sample")
                    .field("ignore_retweet", true)
               .endObject()
            .endObject(), randomIntBetween(5, 50), true);

        // We should have only FR data
        SearchResponse response = client().prepareSearch(getDbName())
                .addField("retweet.id")
                .get();

        logger.info("  --> Search response: {}", response.toString());

        // We should not have any RT
        for (SearchHit hit : response.getHits().getHits()) {
            assertThat(hit.field("retweet.id"), nullValue());
        }
    }

    @Test
    public void testRaw() throws IOException, InterruptedException {
        launchTest(jsonBuilder()
            .startObject()
                .field("type", "twitter")
                .startObject("twitter")
                    .field("raw", true)
                    .startObject("filter")
                          .field("tracks", track)
                    .endObject()
               .endObject()
            .endObject(), randomIntBetween(5, 50), true);

        // We should have data we don't have without raw set to true
        SearchResponse response = client().prepareSearch(getDbName())
                .addField("user.statuses_count")
                .addField("_source")
                .get();

        logger.info("  --> Search response: {}", response.toString());

        for (SearchHit hit : response.getHits().getHits()) {
            assertThat(hit.field("user.statuses_count"), notNullValue());
        }
    }

    /**
     * Tracking twitter account: 783214
     */
    @Test
    public void testFollow() throws IOException, InterruptedException {
        launchTest(jsonBuilder()
                .startObject()
                    .field("type", "twitter")
                    .startObject("twitter")
                        .startObject("filter")
                            .field("follow", "783214")
                        .endObject()
                    .endObject()
                    .startObject("index")
                        .field("bulk_size", 1)
                    .endObject()
                .endObject(), 1, true);
    }

    /**
     * Tracking twitter lists and Zonal_Marking/Guardian100FootballBlogs,Zonal_Marking/football-journalists-3
     */
    @Test
    public void testFollowList() throws IOException, InterruptedException {
        launchTest(jsonBuilder()
                .startObject()
                    .field("type", "twitter")
                    .startObject("twitter")
                        .startObject("filter")
                            .field("user_lists", "Zonal_Marking/Guardian100FootballBlogs,Zonal_Marking/football-journalists-3")
                        .endObject()
                    .endObject()
                    .startObject("index")
                        .field("bulk_size", 1)
                    .endObject()
                .endObject(), 1, true);
    }
    @Test
    public void testTracks() throws IOException, InterruptedException {
        launchTest(jsonBuilder()
            .startObject()
                .field("type", "twitter")
                .startObject("twitter")
                    .startObject("filter")
                        .field("tracks", track)
                    .endObject()
               .endObject()
            .endObject(), randomIntBetween(1, 10), true);

        // We should have only FR data
        SearchResponse response = client().prepareSearch(getDbName())
                .setQuery(QueryBuilders.queryString(track))
                .get();

        logger.info("  --> Search response: {}", response.toString());

        assertThat(response.getHits().getTotalHits(), greaterThan(0L));
    }

    @Test
    public void testSample() throws IOException, InterruptedException {
        launchTest(jsonBuilder()
            .startObject()
                .field("type", "twitter")
                .startObject("twitter")
                    .field("type", "sample")
               .endObject()
            .endObject(), randomIntBetween(10, 200), true);
    }

    @Test
    public void testUserStream() throws IOException, InterruptedException, TwitterException {
        launchTest(jsonBuilder()
            .startObject()
                .field("type", "twitter")
                .startObject("twitter")
                    .field("type", "user")
               .endObject()
            .endObject(), 0, false);

        // Wait for the river to start
        awaitBusy(new Predicate<Object>() {
            public boolean apply(Object obj) {
                try {
                    GetResponse response = get("_river", getDbName(), "_status");
                    return response.isExists();
                } catch (IndexMissingException e) {
                    return false;
                }
            }
        }, 10, TimeUnit.SECONDS);

        // The river could look started but it tooks actually some seconds
        // to get twitter stream up and running. So we wait 5 seconds more.
        awaitBusy(new Predicate<Object>() {
            public boolean apply(Object obj) {
                return false;
            }
        }, 5, TimeUnit.SECONDS);

        // Generate a tweet on your timeline
        // We need to read settings from elasticsearch.yml file
        Settings settings = internalCluster().getInstance(Settings.class);
        AccessToken accessToken = new AccessToken(
                settings.get("river.twitter.oauth.access_token"),
                settings.get("river.twitter.oauth.access_token_secret"));


        Twitter twitter = new TwitterFactory().getInstance();
        twitter.setOAuthConsumer(
                settings.get("river.twitter.oauth.consumer_key"),
                settings.get("river.twitter.oauth.consumer_secret"));
        twitter.setOAuthAccessToken(accessToken);

        Status status = twitter.updateStatus("testing twitter river. Please ignore. " +
                        DateTime.now().toString());
        logger.info("  -> tweet [{}] sent: [{}]", status.getId(), status.getText());

        assertThat(awaitBusy(new Predicate<Object>() {
            public boolean apply(Object obj) {
                try {
                    refresh();
                    SearchResponse response = client().prepareSearch(getDbName()).get();
                    logger.info("  -> got {} docs in {} index", response.getHits().totalHits(), getDbName());
                    return response.getHits().totalHits() >= 1;
                } catch (IndexMissingException e) {
                    return false;
                }
            }
        }, 1, TimeUnit.MINUTES), is(true));

        logger.info("  -> Remove river");
        client().admin().indices().prepareDeleteMapping("_river").setType(getDbName()).get();
    }

    /**
     * Test for #51: https://github.com/elasticsearch/elasticsearch-river-twitter/issues/51
     */
    @Test
    public void testgeoAsArray() throws IOException, InterruptedException {
        launchTest(jsonBuilder()
            .startObject()
                .field("type", "twitter")
                .startObject("twitter")
                    .startObject("filter")
                        .field("tracks", track)
                    .endObject()
                    .field("geo_as_array", true)
               .endObject()
            .endObject(), randomIntBetween(1, 10), false);

        // We wait for geo located tweets (it could take a looooong time)
        assertThat(awaitBusy(new Predicate<Object>() {
            public boolean apply(Object obj) {
                try {
                    refresh();
                    SearchResponse response = client().prepareSearch(getDbName())
                            .setPostFilter(
                                    FilterBuilders.geoDistanceFilter("status.location")
                                    .point(0, 0)
                                    .distance(10000, DistanceUnit.KILOMETERS)
                            )
                            .addField("_source")
                            .addField("location")
                            .get();

                    logger.info("  --> Search response: {}", response.toString());

                    for (SearchHit hit : response.getHits().getHits()) {
                        if (hit.field("location") != null) {
                            // We have a location field so it must be an array containing 2 values
                            assertThat(hit.field("location").getValues().size(), is(2));
                            return true;
                        }
                    }
                    return false;
                } catch (IndexMissingException e) {
                    return false;
                }
            }
        }, 5, TimeUnit.MINUTES), is(true));

        logger.info("  -> Remove river");
        client().admin().indices().prepareDeleteMapping("_river").setType(getDbName()).get();
    }
}
TOP

Related Classes of org.elasticsearch.river.twitter.test.TwitterIntegrationTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.