Package com.pearson.entech.elasticsearch.search.facet.approx.date.internal

Examples of com.pearson.entech.elasticsearch.search.facet.approx.date.internal.InternalDistinctFacet


        testSerializingDistinctFacet(counts, expectedCounts, expectedCardinalities);
    }

    private void testSerializingDistinctFacet(final ExtTLongObjectHashMap<DistinctCountPayload> counts,
            final Map<Long, Integer> expectedCounts, final Map<Long, Integer> expectedCardinalities) throws Exception {
        final InternalDistinctFacet toSend = new InternalDistinctFacet("bar", counts);
        final InternalDistinctFacet toReceive = new InternalDistinctFacet();
        serializeAndDeserialize(toSend, toReceive);
        final ExtTLongObjectHashMap<DistinctCountPayload> receivedCounts =
                new ExtTLongObjectHashMap<DistinctCountPayload>(toReceive.peekCounts());
        compareDistinctCounts(expectedCounts, expectedCardinalities, receivedCounts);
    }
View Full Code Here


        if(searchResponse.getFailedShards() > 0) {
            System.out.println(searchResponse);
            fail(Joiner.on(", ").join(searchResponse.getShardFailures()));
        }

        InternalDistinctFacet facet = searchResponse.getFacets().facet("stats1");
        assertThat(facet.getName(), equalTo("stats1"));
        assertThat(facet.getEntries().size(), equalTo(2));
        assertThat(facet.getEntries().get(0).getTime(), equalTo(utcTimeInMillis("2009-03-05")));
        assertThat(facet.getEntries().get(0).getTotalCount(), equalTo(2l));
        assertThat(facet.getEntries().get(0).getDistinctCount(), equalTo(2l));
        assertThat(facet.getEntries().get(1).getTime(), equalTo(utcTimeInMillis("2009-03-06")));
        assertThat(facet.getEntries().get(1).getTotalCount(), equalTo(1l));
        assertThat(facet.getEntries().get(1).getDistinctCount(), equalTo(1l));
        assertThat(facet.getTotalCount(), equalTo(3l));
        assertThat(facet.getDistinctCount(), equalTo(3l));

        // time zone causes the dates to shift by 2
        facet = searchResponse.getFacets().facet("stats2");
        assertThat(facet.getName(), equalTo("stats2"));
        assertThat(facet.getEntries().size(), equalTo(2));
        assertThat(facet.getEntries().get(0).getTime(), equalTo(utcTimeInMillis("2009-03-04")));
        assertThat(facet.getEntries().get(0).getTotalCount(), equalTo(1l));
        assertThat(facet.getEntries().get(0).getDistinctCount(), equalTo(1l));
        assertThat(facet.getEntries().get(1).getTime(), equalTo(utcTimeInMillis("2009-03-05")));
        assertThat(facet.getEntries().get(1).getTotalCount(), equalTo(2l));
        assertThat(facet.getEntries().get(1).getDistinctCount(), equalTo(2l));
        assertThat(facet.getTotalCount(), equalTo(3l));
        assertThat(facet.getDistinctCount(), equalTo(3l));

        // time zone causes the dates to shift by 2
        facet = searchResponse.getFacets().facet("stats3");
        assertThat(facet.getName(), equalTo("stats3"));
        assertThat(facet.getEntries().size(), equalTo(2));
        assertThat(facet.getEntries().get(0).getTime(), equalTo(utcTimeInMillis("2009-03-04")));
        assertThat(facet.getEntries().get(0).getTotalCount(), equalTo(1l));
        assertThat(facet.getEntries().get(0).getDistinctCount(), equalTo(1l));
        assertThat(facet.getEntries().get(1).getTime(), equalTo(utcTimeInMillis("2009-03-05")));
        assertThat(facet.getEntries().get(1).getTotalCount(), equalTo(2l));
        assertThat(facet.getEntries().get(1).getDistinctCount(), equalTo(2l));
        assertThat(facet.getTotalCount(), equalTo(3l));
        assertThat(facet.getDistinctCount(), equalTo(3l));

        // time zone causes the dates to shift by 2
        //        facet = searchResponse.getFacets().facet("stats4");
        //        assertThat(facet.getName(), equalTo("stats4"));
        //        assertThat(facet.getEntries().size(), equalTo(2));
        //        assertThat(facet.getEntries().get(0).getTime(), equalTo(utcTimeInMillis("2009-03-04")));
        //        assertThat(facet.getEntries().get(0).getTotalCount(), equalTo(1l));
        //        assertThat(facet.getEntries().get(0).getDistinctCount(), equalTo(1l));
        //        assertThat(facet.getEntries().get(1).getTime(), equalTo(utcTimeInMillis("2009-03-05")));
        //        assertThat(facet.getEntries().get(1).getTotalCount(), equalTo(2l));
        //        assertThat(facet.getEntries().get(0).getDistinctCount(), equalTo(2l));

        facet = searchResponse.getFacets().facet("stats5");
        assertThat(facet.getName(), equalTo("stats5"));
        assertThat(facet.getEntries().size(), equalTo(2));
        assertThat(facet.getEntries().get(0).getTime(), equalTo(utcTimeInMillis("2009-03-05")));
        assertThat(facet.getEntries().get(0).getTotalCount(), equalTo(2l));
        assertThat(facet.getEntries().get(0).getDistinctCount(), equalTo(2l));
        assertThat(facet.getEntries().get(1).getTime(), equalTo(utcTimeInMillis("2009-03-06")));
        assertThat(facet.getEntries().get(1).getTotalCount(), equalTo(1l));
        assertThat(facet.getEntries().get(1).getDistinctCount(), equalTo(1l));
        assertThat(facet.getTotalCount(), equalTo(3l));
        assertThat(facet.getDistinctCount(), equalTo(3l));

        facet = searchResponse.getFacets().facet("stats6");
        assertThat(facet.getName(), equalTo("stats6"));
        assertThat(facet.getEntries().size(), equalTo(2));
        assertThat(facet.getEntries().get(0).getTime(), equalTo(utcTimeInMillis("2009-03-04") - TimeValue.timeValueHours(2).millis()));
        assertThat(facet.getEntries().get(0).getTotalCount(), equalTo(1l));
        assertThat(facet.getEntries().get(0).getDistinctCount(), equalTo(1l));
        assertThat(facet.getEntries().get(1).getTime(), equalTo(utcTimeInMillis("2009-03-05") - TimeValue.timeValueHours(2).millis()));
        assertThat(facet.getEntries().get(1).getTotalCount(), equalTo(2l));
        assertThat(facet.getEntries().get(1).getDistinctCount(), equalTo(2l));
        assertThat(facet.getTotalCount(), equalTo(3l));
        assertThat(facet.getDistinctCount(), equalTo(3l));

        facet = searchResponse.getFacets().facet("stats7");
        assertThat(facet.getName(), equalTo("stats7"));
        assertThat(facet.getEntries().size(), equalTo(1));
        assertThat(facet.getEntries().get(0).getTime(), equalTo(utcTimeInMillis("2009-01-01")));
        assertThat(facet.getTotalCount(), equalTo(3l));
        assertThat(facet.getDistinctCount(), equalTo(3l));
    }
View Full Code Here

            System.out.println(searchResponse);
            fail(Joiner.on(", ").join(searchResponse.getShardFailures()));
        }

        // time zone causes the dates to shift by 2:00
        InternalDistinctFacet facet = searchResponse.getFacets().facet("stats1");
        assertThat(facet.getName(), equalTo("stats1"));
        assertThat(facet.getEntries().size(), equalTo(2));
        assertThat(facet.getEntries().get(0).getTime(), equalTo(utcTimeInMillis("2009-03-05")));
        assertThat(facet.getEntries().get(0).getTotalCount(), equalTo(1l));
        assertThat(facet.getEntries().get(0).getDistinctCount(), equalTo(1l));
        assertThat(facet.getEntries().get(1).getTime(), equalTo(utcTimeInMillis("2009-03-06")));
        assertThat(facet.getEntries().get(1).getTotalCount(), equalTo(2l));
        assertThat(facet.getEntries().get(1).getDistinctCount(), equalTo(2l));
        assertThat(facet.getTotalCount(), equalTo(3l));
        assertThat(facet.getDistinctCount(), equalTo(3l));

        // time zone causes the dates to shift by 1:30
        facet = searchResponse.getFacets().facet("stats2");
        assertThat(facet.getName(), equalTo("stats2"));
        assertThat(facet.getEntries().size(), equalTo(2));
        assertThat(facet.getEntries().get(0).getTime(), equalTo(utcTimeInMillis("2009-03-05")));
        assertThat(facet.getEntries().get(0).getTotalCount(), equalTo(2l));
        assertThat(facet.getEntries().get(0).getDistinctCount(), equalTo(2l));
        assertThat(facet.getEntries().get(1).getTime(), equalTo(utcTimeInMillis("2009-03-06")));
        assertThat(facet.getEntries().get(1).getTotalCount(), equalTo(1l));
        assertThat(facet.getEntries().get(1).getDistinctCount(), equalTo(1l));
        assertThat(facet.getTotalCount(), equalTo(3l));
        assertThat(facet.getDistinctCount(), equalTo(3l));
    }
View Full Code Here

        putSync(newID(), 1, __days[4]);
        putSync(newID(), 1, __days[6]);
        assertEquals(4, countAll());
        final SearchResponse response = getHistogram(__days[0], __days[7], "day", __userField);
        assertEquals(4, response.getHits().getTotalHits());
        final InternalDistinctFacet facet = response.getFacets().facet(__facetName);
        final List<DistinctTimePeriod<NullEntry>> facetList = facet.entries();
        // Expecting just one hit and one distinct hit per doc, for the username.
        assertEquals(4, facetList.size());
        assertEquals(__days[0], facetList.get(0).getTime());
        assertEquals(1, facetList.get(0).getTotalCount());
        assertEquals(1, facetList.get(0).getDistinctCount());
        assertEquals(__days[2], facetList.get(1).getTime());
        assertEquals(1, facetList.get(1).getTotalCount());
        assertEquals(1, facetList.get(1).getDistinctCount());
        assertEquals(__days[4], facetList.get(2).getTime());
        assertEquals(1, facetList.get(2).getTotalCount());
        assertEquals(1, facetList.get(2).getDistinctCount());
        assertEquals(__days[6], facetList.get(3).getTime());
        assertEquals(1, facetList.get(3).getTotalCount());
        assertEquals(1, facetList.get(3).getDistinctCount());
        assertThat(facet.getTotalCount(), equalTo(4l));
        assertThat(facet.getDistinctCount(), equalTo(1l)); // same user each time
    }
View Full Code Here

        putSync(newID(), 1, __days[4]);
        putSync(newID(), 1, __days[6]);
        assertEquals(4, countAll());
        final SearchResponse response = getHistogram(__days[0], __days[7], "day", __txtField);
        assertEquals(4, response.getHits().getTotalHits());
        final InternalDistinctFacet facet = response.getFacets().facet(__facetName);
        final List<DistinctTimePeriod<NullEntry>> facetList = facet.entries();
        // Expecting one hit for each token in the string "Document created [at] <TIMESTAMP>"
        // for each document, in this case these are unique per bucket too. The word "at"
        // is a stopword and is removed.
        assertEquals(4, facetList.size());
        assertEquals(__days[0], facetList.get(0).getTime());
        assertEquals(3, facetList.get(0).getTotalCount());
        assertEquals(3, facetList.get(0).getDistinctCount());
        assertEquals(__days[2], facetList.get(1).getTime());
        assertEquals(3, facetList.get(1).getTotalCount());
        assertEquals(3, facetList.get(1).getDistinctCount());
        assertEquals(__days[4], facetList.get(2).getTime());
        assertEquals(3, facetList.get(2).getTotalCount());
        assertEquals(3, facetList.get(2).getDistinctCount());
        assertEquals(__days[6], facetList.get(3).getTime());
        assertEquals(3, facetList.get(3).getTotalCount());
        assertEquals(3, facetList.get(3).getDistinctCount());
        assertThat(facet.getTotalCount(), equalTo(12l));
        assertThat(facet.getDistinctCount(), equalTo(6l)); // "document", "created", 4 usernames
    }
View Full Code Here

        putSync(newID(), 3, __days[6] + 10);
        putSync(newID(), 4, __days[6] + 20);
        assertEquals(8, countAll());
        final SearchResponse response = getHistogram(__days[0], __days[7], "day", __userField);
        assertEquals(8, response.getHits().getTotalHits());
        final InternalDistinctFacet facet = response.getFacets().facet(__facetName);
        final List<DistinctTimePeriod<NullEntry>> facetList = facet.entries();
        // Hits and distinct hits can now vary in intervals where the same user posted more
        // than once (i.e. day 0 here).
        assertEquals(4, facetList.size());
        assertEquals(__days[0], facetList.get(0).getTime());
        assertEquals(3, facetList.get(0).getTotalCount());
        assertEquals(2, facetList.get(0).getDistinctCount());
        assertEquals(__days[2], facetList.get(1).getTime());
        assertEquals(1, facetList.get(1).getTotalCount());
        assertEquals(1, facetList.get(1).getDistinctCount());
        assertEquals(__days[4], facetList.get(2).getTime());
        assertEquals(1, facetList.get(2).getTotalCount());
        assertEquals(1, facetList.get(2).getDistinctCount());
        assertEquals(__days[6], facetList.get(3).getTime());
        assertEquals(3, facetList.get(3).getTotalCount());
        assertEquals(3, facetList.get(3).getDistinctCount());
        assertThat(facet.getTotalCount(), equalTo(8l));
        assertThat(facet.getDistinctCount(), equalTo(4l)); // 4 different users
    }
View Full Code Here

        putSync(newID(), 3, __days[6] + 10);
        putSync(newID(), 4, __days[6] + 20);
        assertEquals(8, countAll());
        final SearchResponse response = getHistogram(__days[0], __days[7], "day", __txtField);
        assertEquals(8, response.getHits().getTotalHits());
        final InternalDistinctFacet facet = response.getFacets().facet(__facetName);
        final List<DistinctTimePeriod<NullEntry>> facetList = facet.entries();
        // Now things get a bit more complex as all the posts are identically worded apart
        // from the timestamp at the end. 3 tokens indexed per each instance of the field.
        assertEquals(4, facetList.size());
        assertEquals(__days[0], facetList.get(0).getTime());
        assertEquals(3 * 3, facetList.get(0).getTotalCount());
        assertEquals(2 + (1 * 3), facetList.get(0).getDistinctCount());
        assertEquals(__days[2], facetList.get(1).getTime());
        assertEquals(1 * 3, facetList.get(1).getTotalCount());
        assertEquals(1 * 3, facetList.get(1).getDistinctCount());
        assertEquals(__days[4], facetList.get(2).getTime());
        assertEquals(1 * 3, facetList.get(2).getTotalCount());
        assertEquals(1 * 3, facetList.get(2).getDistinctCount());
        assertEquals(__days[6], facetList.get(3).getTime());
        assertEquals(3 * 3, facetList.get(3).getTotalCount());
        assertEquals(2 + (1 * 3), facetList.get(3).getDistinctCount());
        assertThat(facet.getTotalCount(), equalTo(24l));
        assertThat(facet.getDistinctCount(), equalTo(10l)); // "document", "created", 8 usernames
    }
View Full Code Here

            final int totalItems = add(itemsPerDay);
            assertEquals(totalItems, countAll());

            System.out.println("Randomized testing: running facet");
            final SearchResponse response = getHistogram(__days[0], __days[7], "day", __userField, 1000);
            final InternalDistinctFacet facet1 = response.getFacets().facet(__facetName);
            final List<DistinctTimePeriod<NullEntry>> facetList1 = facet1.entries();
            assertEquals(7, facetList1.size());
            assertEquals(totalItems, facet1.getTotalCount());
            int tolerance = totalItems / 100;
            int totalDistinct = totalItems;
            assertTrue(String.format(
                    "With %d total distinct items: Estimated overall distinct count %d is not within 1%% tolerance of %d",
                    totalDistinct, facet1.getDistinctCount(), totalDistinct),
                    abs(totalDistinct - facet1.getDistinctCount()) <= tolerance);
            for(int i = 0; i < 7; i++) {
                final int exactUsers = itemsPerDay[i];
                assertEquals(exactUsers, facetList1.get(i).getTotalCount());
                tolerance = exactUsers / 100;
                final long fuzzyUsers = facetList1.get(i).getDistinctCount();
                //System.out.println("Exact user count = " + exactUsers);
                //System.out.println("Fuzzy user count = " + fuzzyUsers);
                assertTrue(String.format(
                        "With > %d terms per day: Estimated count %d is not within 1%% tolerance of %d",
                        minPerDay, fuzzyUsers, exactUsers),
                        abs(fuzzyUsers - exactUsers) <= tolerance);
            }

            final SearchResponse response2 = getHistogram(__days[0], __days[7], "day", __txtField, 1000);
            final InternalDistinctFacet facet2 = response2.getFacets().facet(__facetName);
            final List<DistinctTimePeriod<NullEntry>> facetList2 = facet2.entries();
            assertEquals(7, facetList2.size());
            assertEquals(3 * totalItems, facet2.getTotalCount());
            tolerance = totalItems / 100;
            totalDistinct = 2 + totalItems;
            assertTrue(String.format(
                    "With %d total distinct items: Estimated overall distinct count %d is not within 1%% tolerance of %d",
                    totalDistinct, facet2.getDistinctCount(), totalDistinct),
                    abs(totalDistinct - facet2.getDistinctCount()) <= tolerance);
            for(int i = 0; i < 7; i++) {
                final int exactTokens = itemsPerDay[i] * 3; // "Document created [by] <ID>"
                final int exactDistinctTokens = itemsPerDay[i] + 2;
                assertEquals(exactTokens, facetList2.get(i).getTotalCount());
                tolerance = exactDistinctTokens / 100;
View Full Code Here

            }
            _occurrences.put(fieldVal, null); // Free this up for GC immediately
        }

        _occurrences = null;
        final InternalFacet facet = new InternalDistinctFacet(facetName, counts);
        return facet;
    }
View Full Code Here

TOP

Related Classes of com.pearson.entech.elasticsearch.search.facet.approx.date.internal.InternalDistinctFacet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.