Package com.lulu.WordCloud

Source Code of com.lulu.WordCloud.Generator

/*
*  Copyright 2012 Ryan Bloom
*
*  Licensed under the Apache License, Version 2.0 (the "License");
*  you may not use this file except in compliance with the License.
*  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*
*  This code is adapted from MyWordle, available at:
*       https://github.com/lindenb/jsandbox/commits/master/src/sandbox/MyWordle.java
*/
package com.lulu.WordCloud;

import java.awt.Color;
import java.awt.Font;
import java.awt.Graphics2D;
import java.awt.RenderingHints;
import java.awt.geom.Area;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Random;

import javax.imageio.ImageIO;

import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;

import cue.lang.Counter;
import cue.lang.NGramIterator;
import cue.lang.stop.StopWords;

public class Generator {
    @Option(name = "-r", aliases = "--rotation", required = false, usage = "Rotation increments (if set to 90, only -90, 0, and 90 degrees are used, default 45")
    private int rotationAllowed = 45;
    @Option(name = "-m", aliases = "--dimension", required = false, usage = "Max dimension for the image, default 500px")
    private int maxDimension = 500;
    @Option(name = "-b", aliases = "--buffer", required = false, usage = "Specify the buffer around words and the full image, default 5px")
    private int buffer = 5;
    @Option(name = "-f", aliases = "--font-family", required = false, usage = "Font to use for rendering, default Dialog")
    private String fontFamily = "Dialog";
    @Option(name = "-p", aliases = "--percent", required = false, usage = "Show the words with at least X% of the usage as the highest word, default 90")
    private int minPercent = 90;
    @Option(name = "-d", required = false, usage = "Set debug flag on")
    private boolean debug = false;
    @Option(name = "-o", required = true, usage = "output to this file")
    private File fileOut = null;
    @Option(name = "-i", required = true, usage = "parse this file")
    private File fileIn = null;
    @Option(name = "-c", aliases = "--class", required = false, usage = "The class to use as the RenderEngine")
    private String renderClass = "com.lulu.WordCloud.RenderEngine";
    @Option(name = "-l", aliases = "--lemmatize", required = false, usage = "Convert words to their lemmatized form before counting")
    private boolean lemmatize = false;

    /**
     * The work horse of the Word Cloud. Loop through the words and place them on the image correctly.
     *
     * @param words The words to create the cloud from
     * @param file The output file
     */
    public void doLayout(AbstractRenderEngine renderer, Counter<String> words, File file) throws IOException {
        List<String> topWords = words.keyList();
        double high = words.getCount(topWords.get(0));
        double low = high - (high * ((double) minPercent / 100));

        List<Area> bounds = new ArrayList<Area>();
        Random rand = new Random();

        BufferedImage img = new BufferedImage(maxDimension, maxDimension, BufferedImage.TYPE_INT_RGB);
        Graphics2D g = (Graphics2D) img.getGraphics();
        g.setRenderingHint(RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON);
        g.fillRect(0, 0, img.getWidth(), img.getHeight());
        g.setBackground(Color.WHITE);

        // Compute font sizes based on dimensions
        int biggestSize = ((maxDimension / 5) * 72 / 96) + 5;
        int smallestSize = biggestSize / 8;

        for (String w : topWords) {
            int count = words.getCount(w);
            if (count < low) {
                break;
            }
            String ff = this.fontFamily;
            int fontSize = (int) (((count - low) / (high - low)) * (biggestSize - smallestSize)) + smallestSize;
            Font font = new Font(ff, Font.PLAIN, fontSize);

            Color c = new Color(rand.nextInt(255), rand.nextInt(255), rand.nextInt(255));

            g.setColor(c);
            renderer.placeWord(w, font, g, bounds);
        }

        img = renderer.getSubImage(bounds, img);
        ImageIO.write(img, "png", file);
    }

    /**
     * Convert the input file to one long string to be used for counting words.
     *
     * @param fileIn the Input file
     * @return The entire contents of the file in a single string.
     */
    private String processInput(File fileIn) throws IOException {
        BufferedReader reader = new BufferedReader(new FileReader(fileIn));
        String line = null;
        StringBuilder builder = new StringBuilder();
        String sep = System.getProperty("line.separator");

        while ((line = reader.readLine()) != null) {
            builder.append(line);
            builder.append(sep);
        }

        return builder.toString();
    }

    /**
     * Create the word list with counts per word
     *
     * @param fileIn the File to process.
     * @return a list of words with their count.
     */
    private Counter<String> createWordList(File fileIn) throws IOException {
        String contents = processInput(fileIn).toLowerCase();
        if (!lemmatize) {
            Counter<String> words = new Counter<String>();
            for (final String word : new NGramIterator(1, contents, Locale.ENGLISH, StopWords.English)) {
                words.note(word);
            }
            return words;
        } else {
            StanfordLemmatizer lemmatizer = new StanfordLemmatizer();
            return lemmatizer.lemmatize(contents, StopWords.English);
        }
    }

    /**
     * Main function for the cloud generator.
     *
     * @param args The command line arguments.
     */
    public static void main(String[] args) {
        Generator app = new Generator();

        CmdLineParser parser = new CmdLineParser(app);
        try {
            parser.parseArgument(args);
            Class<AbstractRenderEngine> renderClass = (Class<AbstractRenderEngine>) Class.forName(app.renderClass);
            Constructor ctor = renderClass.getConstructor(Integer.class, Integer.class, Integer.class, Boolean.class);
            AbstractRenderEngine renderer = (AbstractRenderEngine) ctor.newInstance(app.maxDimension, app.rotationAllowed, app.buffer, app.debug);

// new RenderEngine(app.maxDimension, app.rotationAllowed, app.buffer, app.debug);
            Counter<String> words = app.createWordList(app.fileIn);
            app.doLayout(renderer, words, app.fileOut);
        } catch (CmdLineException e) {
            // handling of wrong arguments
            System.err.println(e.getMessage());
            parser.printUsage(System.err);
        } catch (IOException e) {
            System.err.println(e.getMessage());
        } catch (ClassNotFoundException e) {
            System.err.println(e.getMessage());
        } catch (NoSuchMethodException e) {
            System.err.println(e.getMessage());
        } catch (IllegalArgumentException e) {
            System.err.println(e.getMessage());
        } catch (InstantiationException e) {
            System.err.println(e.getMessage());
        } catch (IllegalAccessException e) {
            System.err.println(e.getMessage());
        } catch (InvocationTargetException e) {
            System.err.println(e.getMessage());
        }
    }
}
TOP

Related Classes of com.lulu.WordCloud.Generator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.