Package com.mapr.synth

Source Code of com.mapr.synth.Synth$Options

package com.mapr.synth;

import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.mapr.synth.samplers.SchemaSampler;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.OptionDef;
import org.kohsuke.args4j.spi.IntOptionHandler;
import org.kohsuke.args4j.spi.Setter;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadMXBean;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.security.AccessControlException;
import java.util.List;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;

/**
* Generates plausible database tables in JSON, CSV or TSV format.
*/
public class Synth {

    private static final int REPORTING_DELTA = 500;

    public static void main(String[] args) throws IOException, CmdLineException, InterruptedException, ExecutionException {
        final Options opts = new Options();
        CmdLineParser parser = new CmdLineParser(opts);
        try {
            parser.parseArgument(args);
        } catch (CmdLineException e) {
            System.err.println("Usage: " +
                    "[ -count <number>G|M|K ] " +
                    "-schema schema-file " +
                    "[-quote DOUBLE_QUOTE|BACK_SLASH|OPTIMISTIC] " +
                    "[-format JSON|TSV|CSV ] " +
                    "[-threads n] " +
                    "[-output output-directory-name] ");
            throw e;
        }
        Preconditions.checkArgument(opts.threads > 0 && opts.threads <= 2000,
                "Must have at least one thread and no more than 2000");

        if (opts.threads > 1) {
            Preconditions.checkArgument(!"-".equals(opts.output),
                    "If more than on thread is used, you have to use -output to set the output directory");
        }

        File outputDir = new File(opts.output);
        if (!"-".equals(opts.output)) {
            if (!outputDir.exists()) {
                Preconditions.checkState(outputDir.mkdirs(), String.format("Couldn't create output directory %s", opts.output));
            }
            Preconditions.checkArgument(outputDir.exists() && outputDir.isDirectory(),
                    String.format("Couldn't create directory %s", opts.output));
        }

        final SchemaSampler sampler = new SchemaSampler(opts.schema);
        final AtomicLong rowCount = new AtomicLong();

        final List<ReportingWorker> tasks = Lists.newArrayList();
        int limit = (opts.count + opts.threads - 1) / opts.threads;
        int remaining = opts.count;
        for (int i = 0; i < opts.threads; i++) {

            final int count = Math.min(limit, remaining);
            remaining -= count;

            tasks.add(new ReportingWorker(opts, sampler, rowCount, count, i));
        }

        final double t0 = System.nanoTime() * 1e-9;
        ExecutorService pool = Executors.newFixedThreadPool(opts.threads);
        ScheduledExecutorService blinker = Executors.newScheduledThreadPool(1);
        final AtomicBoolean finalRun = new AtomicBoolean(false);

        final PrintStream sideLog = new PrintStream(new FileOutputStream("side-log"));
        Runnable blink = new Runnable() {
            public double oldT;
            private long oldN;


            @Override
            public void run() {
                double t = System.nanoTime() * 1e-9;
                long n = rowCount.get();
                System.out.printf("%s\t%d\t%.1f\t%d\t%.1f\t%.3f\n", finalRun.get() ? "F" : "R", opts.threads, t - t0, n, n / (t - t0), (n - oldN) / (t - oldT));
                for (ReportingWorker task : tasks) {
                    ReportingWorker.ThreadReport r = task.report();
                    sideLog.printf("\t%d\t%.2f\t%.2f\t%.2f\t%.1f\t%.1f\n", r.fileNumber, r.threadTime, r.userTime, r.wallTime, r.rows / r.threadTime, r.rows / r.wallTime);
                }
                oldN = n;
                oldT = t;
            }
        };
        if (!"-".equals(opts.output)) {
            blinker.scheduleAtFixedRate(blink, 0, 10, TimeUnit.SECONDS);
        }
        List<Future<Integer>> results = pool.invokeAll(tasks);

        int total = 0;
        for (Future<Integer> result : results) {
            total += result.get();
        }
        Preconditions.checkState(total == opts.count,
                String.format("Expected to generate %d lines of output, but actually generated %d", opts.count, total));
        pool.shutdownNow();
        blinker.shutdownNow();
        finalRun.set(true);
        sideLog.close();
        blink.run();
    }

    private static class ReportingWorker implements Callable<Integer> {
        private final Options opts;
        private final SchemaSampler sampler;
        private final AtomicLong rowCount;
        private final int count;
        private final int fileNumber;
        int localCount;
        private ThreadMXBean mx;
        private AtomicLong wallTime;
        private AtomicLong threadTime;
        private AtomicLong userTime;
        private AtomicLong lastUserTime;
        final AtomicLong lastWall;
        final AtomicLong lastThreadTime;
        final AtomicLong lastRowCount;

        ReportingWorker(final Options opts, final SchemaSampler sampler, final AtomicLong rowCount, final int count, final int fileNumber) {
            mx = ManagementFactory.getThreadMXBean();
            try {
                if (mx.isThreadCpuTimeSupported())
                    mx.setThreadCpuTimeEnabled(true);
                else
                    throw new AccessControlException("");
            } catch (AccessControlException e) {
                System.out.println("CPU Usage monitoring is not available!");
                System.exit(0);
            }

            this.opts = opts;
            this.sampler = sampler;
            this.rowCount = rowCount;
            this.count = count;
            this.fileNumber = fileNumber;
            localCount = this.count;
            lastWall = new AtomicLong(System.nanoTime());
            wallTime = new AtomicLong(lastWall.get());
            lastThreadTime = new AtomicLong(mx.getCurrentThreadCpuTime());
            threadTime = new AtomicLong(lastThreadTime.get());
            lastUserTime = new AtomicLong(mx.getCurrentThreadUserTime());
            userTime = new AtomicLong(lastThreadTime.get());
            lastRowCount = new AtomicLong(0);
        }

        @Override
        public Integer call() throws Exception {
            if ("-".equals(opts.output)) {
                return generateFile(opts, sampler, System.out, localCount);
            } else {
                Path outputPath = new File(opts.output, String.format("synth-%04d", fileNumber)).toPath();

                try (PrintStream out = new PrintStream(Files.newOutputStream(outputPath,
                        StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING))) {

                    header(opts.format, sampler.getFieldNames(), out);
                    int rows = 0;
                    while (rows < localCount) {
                        int k = Math.min(localCount - rows, REPORTING_DELTA);
                        rows += k;
                        rowCount.addAndGet(generateFile(opts, sampler, out, k));
                        wallTime.set(System.nanoTime());
                        threadTime.set(mx.getCurrentThreadCpuTime());
                        userTime.set(mx.getCurrentThreadUserTime());
                    }

                    return rows;
                }
            }
        }

        public static void header(Format format, List<String> names, PrintStream out) {
            switch (format) {
                case TSV:
                    out.printf("%s\n", withTabs.join(names));
                    break;
                case CSV:
                    out.printf("%s\n", withCommas.join(names));
                    break;
            }
        }

        public static int generateFile(Options opts, SchemaSampler s, PrintStream out, int count) {
            for (int i = 0; i < count; i++) {
                format(opts.format, opts.quote, s.getFieldNames(), s.sample(), out);
            }
            return count;
        }

        public ThreadReport report() {
            return new ThreadReport();
        }

        public class ThreadReport {
            long rows;
            int fileNumber = ReportingWorker.this.fileNumber;
            double wallTime;
            double threadTime;
            double userTime;

            public ThreadReport() {
                while (true) {
                    long oldWall = lastWall.get();
                    long oldThread = lastThreadTime.get();
                    long oldUser = lastUserTime.get();
                    long oldRowCount = lastRowCount.get();

                    long wall = ReportingWorker.this.wallTime.get();
                    long thread = ReportingWorker.this.threadTime.get();
                    long user = ReportingWorker.this.userTime.get();
                    long rowCount = ReportingWorker.this.rowCount.get();

                    wallTime = (wall - oldWall) * 1e-9;
                    threadTime = (thread - oldThread) * 1e-9;
                    userTime = (user - oldUser) * 1e-9;
                    rows = rowCount - oldRowCount;

                    if (lastWall.compareAndSet(oldWall, wall) && lastThreadTime.compareAndSet(oldThread, thread)
                            && lastRowCount.compareAndSet(oldRowCount, rowCount)) {
                        // nobody stomped on our report
                        return;
                    }
                }
            }
        }
    }


    static Joiner withCommas = Joiner.on(",");
    static Joiner withTabs = Joiner.on("\t");


    private static void format(Format format, Quote quoteConvention, List<String> names, JsonNode fields, PrintStream out) {
        switch (format) {
            case JSON:
                out.printf("%s\n", fields.toString());
                break;
            case TSV:
                printDelimited(quoteConvention, names, fields, "\t", out);
                break;
            case CSV:
                printDelimited(quoteConvention, names, fields, ",", out);
                break;
        }
    }

    private static void printDelimited(Quote quoteConvention, List<String> names, JsonNode fields, String separator, PrintStream out) {
        String x = "";
        for (String name : names) {
            switch (quoteConvention) {
                case DOUBLE_QUOTE:
                    out.printf("%s%s", x, fields.get(name));
                    break;
                case OPTIMISTIC:
                    out.printf("%s%s", x, fields.get(name).asText());
                    break;
                case BACK_SLASH:
                    out.printf("%s%s", x, fields.get(name).asText().replaceAll("([,\t\\s\\\\])", "\\\\$1"));
                    break;
            }
            x = separator;
        }
        out.printf("\n");
    }

    public static enum Format {
        JSON, TSV, CSV
    }

    public static enum Quote {
        DOUBLE_QUOTE, BACK_SLASH, OPTIMISTIC
    }

    private static class Options {
        @Option(name = "-output")
        String output = "-";

        @Option(name = "-threads")
        int threads = 1;

        @Option(name = "-count", handler = SizeParser.class)
        int count = 1000;

        @Option(name = "-schema")
        File schema;

        @Option(name = "-format")
        Format format = Format.CSV;

        @Option(name = "-quote")
        Quote quote = Quote.DOUBLE_QUOTE;

        public static class SizeParser extends IntOptionHandler {
            public SizeParser(CmdLineParser parser, OptionDef option, Setter<? super Integer> setter) {
                super(parser, option, setter);
            }

            @Override
            protected Integer parse(String argument) throws NumberFormatException {
                int n = Integer.parseInt(argument.replaceAll("[KMG]?$", ""));

                switch (argument.charAt(argument.length() - 1)) {
                    case 'G':
                        n *= 1e9;
                        break;
                    case 'M':
                        n *= 1e6;
                        break;
                    case 'K':
                        n *= 1e3;
                        break;
                    default:
                        // no suffix leads here
                        break;
                }
                return n;
            }
        }
    }

}
TOP

Related Classes of com.mapr.synth.Synth$Options

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.