Package com.mapr.synth.samplers

Examples of com.mapr.synth.samplers.SchemaSampler


        for (int i = 0; i < 20; i++) {
            out.write(i + "\t" + (i * i) + "\n");
        }
        out.close();

        SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema008.json"), Charsets.UTF_8).read());

        for (int k = 0; k < 1000; k++) {
            JsonNode r = s.sample();
            assertEquals(6, r.get("x").get("x").asInt() + r.get("x").get("y").asInt());
            int i = r.get("y").get("a").asInt();
            assertEquals(i * i, r.get("y").get("b").asInt());
        }
    }
View Full Code Here


        }
    }

    @Test
    public void testJoin() throws IOException {
        SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema009.json"), Charsets.UTF_8).read());

        for (int k = 0; k < 10; k++) {
            JsonNode r = s.sample();
            assertEquals("3,6,8", r.get("x").asText());
            assertTrue(r.get("y").asInt() >= 1 && r.get("y").asInt() < 5);
            assertTrue(r.get("z").asText().matches("(xyz(,xyz)*)?"));
        }
    }
View Full Code Here

        }
    }

    @Test
    public void testEvents() throws IOException, ParseException {
        SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema012.json"), Charsets.UTF_8).read());
        long t = System.currentTimeMillis();

        SimpleDateFormat df0 = new SimpleDateFormat("yyyy-MM-dd");
        SimpleDateFormat df1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        SimpleDateFormat df2 = new SimpleDateFormat("MM/dd/yyyy HH:mm:ss");
        JsonNode old = s.sample();

        long old1 = df0.parse(old.get("foo1").asText()).getTime();
        assertTrue(Math.abs(old1 - t) < TimeUnit.MILLISECONDS.convert(1, TimeUnit.DAYS));

        long old2 = df1.parse(old.get("foo2").asText()).getTime();
        assertEquals((double) old2, df1.parse("2014-01-01 00:00:00").getTime(), 10.0);

        long old3 = df2.parse(old.get("foo3").asText()).getTime();
        assertEquals(old3, df1.parse("2014-02-01 00:00:00").getTime(), 10);

        double sum1 = 0;
        double sum2 = 0;
        double sum3 = 0;

        final int N = 10000;

        for (int k = 0; k < N; k++) {
            JsonNode r = s.sample();

            long t1 = df0.parse(r.get("foo1").asText()).getTime();
            sum1 += t1 - old1;
            old1 = t1;
View Full Code Here

        assertEquals(2000, sum3 / N, 2000 * 0.03);
    }

    @Test
    public void testFlatten() throws IOException {
        SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema010.json"), Charsets.UTF_8).read());

        for (int k = 0; k < 10; k++) {
            JsonNode r = s.sample();
            assertEquals(k, r.get("id").asInt());
            assertTrue(r.get("stuff").isArray());
            assertEquals(1, r.get("stuff").get(0).asInt());
            assertEquals(2, r.get("stuff").get(1).asInt());
            assertEquals(3, r.get("stuff").get(2).asInt());
View Full Code Here

        }

        Joiner withTab = Joiner.on("\t");

        // first generate lots of user definitions
        SchemaSampler users = new SchemaSampler(Resources.asCharSource(Resources.getResource("user-schema.txt"), Charsets.UTF_8).read());
        File userFile = File.createTempFile("user", "tsv");
        BufferedWriter out = Files.newBufferedWriter(userFile.toPath(), Charsets.UTF_8);
        for (int i = 0; i < opts.users; i++) {
            out.write(withTab.join(users.sample()));
            out.newLine();
        }
        out.close();

        // now generate a session for each user
        Splitter onTabs = Splitter.on("\t");
        Splitter onComma = Splitter.on(",");

        Random gen = new Random();
        SchemaSampler intermediate = new SchemaSampler(Resources.asCharSource(Resources.getResource("hit_step.txt"), Charsets.UTF_8).read());

        final int COUNTRY = users.getFieldNames().indexOf("country");
        final int CAMPAIGN = intermediate.getFieldNames().indexOf("campaign_list");
        final int SEARCH_TERMS = intermediate.getFieldNames().indexOf("search_keywords");
        Preconditions.checkState(COUNTRY >= 0, "Need country field in user schema");
        Preconditions.checkState(CAMPAIGN >= 0, "Need campaign_list field in step schema");
        Preconditions.checkState(SEARCH_TERMS >= 0, "Need search_keywords field in step schema");

        out = Files.newBufferedWriter(new File(opts.out).toPath(), Charsets.UTF_8);

        for (String line : Files.readAllLines(userFile.toPath(), Charsets.UTF_8)) {
            long t = (long) (TimeUnit.MILLISECONDS.convert(30, TimeUnit.DAYS) * gen.nextDouble());
            List<String> user = Lists.newArrayList(onTabs.split(line));

            // pick session length
            int n = (int) Math.floor(-30 * Math.log(gen.nextDouble()));

            for (int i = 0; i < n; i++) {
                // time on page
                int dt = (int) Math.floor(-20000 * Math.log(gen.nextDouble()));
                t += dt;

                // hit specific values
                JsonNode step = intermediate.sample();

                // check for purchase
                double p = 0.01;
                List<String> campaigns = Lists.newArrayList(onComma.split(step.get("campaign_list").asText()));
                List<String> keywords = Lists.newArrayList(onComma.split(step.get("search_keywords").asText()));
View Full Code Here

            }
            Preconditions.checkArgument(outputDir.exists() && outputDir.isDirectory(),
                    String.format("Couldn't create directory %s", opts.output));
        }

        final SchemaSampler sampler = new SchemaSampler(opts.schema);
        final AtomicLong rowCount = new AtomicLong();

        final List<ReportingWorker> tasks = Lists.newArrayList();
        int limit = (opts.count + opts.threads - 1) / opts.threads;
        int remaining = opts.count;
View Full Code Here

TOP

Related Classes of com.mapr.synth.samplers.SchemaSampler

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.