/*
* Copyright 2012 Ryan Bloom
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* This code is adapted from MyWordle, available at:
* https://github.com/lindenb/jsandbox/commits/master/src/sandbox/MyWordle.java
*/
package com.lulu.WordCloud;
import java.awt.Color;
import java.awt.Font;
import java.awt.Graphics2D;
import java.awt.RenderingHints;
import java.awt.geom.Area;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Random;
import javax.imageio.ImageIO;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import cue.lang.Counter;
import cue.lang.NGramIterator;
import cue.lang.stop.StopWords;
public class Generator {
@Option(name = "-r", aliases = "--rotation", required = false, usage = "Rotation increments (if set to 90, only -90, 0, and 90 degrees are used, default 45")
private int rotationAllowed = 45;
@Option(name = "-m", aliases = "--dimension", required = false, usage = "Max dimension for the image, default 500px")
private int maxDimension = 500;
@Option(name = "-b", aliases = "--buffer", required = false, usage = "Specify the buffer around words and the full image, default 5px")
private int buffer = 5;
@Option(name = "-f", aliases = "--font-family", required = false, usage = "Font to use for rendering, default Dialog")
private String fontFamily = "Dialog";
@Option(name = "-p", aliases = "--percent", required = false, usage = "Show the words with at least X% of the usage as the highest word, default 90")
private int minPercent = 90;
@Option(name = "-d", required = false, usage = "Set debug flag on")
private boolean debug = false;
@Option(name = "-o", required = true, usage = "output to this file")
private File fileOut = null;
@Option(name = "-i", required = true, usage = "parse this file")
private File fileIn = null;
@Option(name = "-c", aliases = "--class", required = false, usage = "The class to use as the RenderEngine")
private String renderClass = "com.lulu.WordCloud.RenderEngine";
@Option(name = "-l", aliases = "--lemmatize", required = false, usage = "Convert words to their lemmatized form before counting")
private boolean lemmatize = false;
/**
* The work horse of the Word Cloud. Loop through the words and place them on the image correctly.
*
* @param words The words to create the cloud from
* @param file The output file
*/
public void doLayout(AbstractRenderEngine renderer, Counter<String> words, File file) throws IOException {
List<String> topWords = words.keyList();
double high = words.getCount(topWords.get(0));
double low = high - (high * ((double) minPercent / 100));
List<Area> bounds = new ArrayList<Area>();
Random rand = new Random();
BufferedImage img = new BufferedImage(maxDimension, maxDimension, BufferedImage.TYPE_INT_RGB);
Graphics2D g = (Graphics2D) img.getGraphics();
g.setRenderingHint(RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON);
g.fillRect(0, 0, img.getWidth(), img.getHeight());
g.setBackground(Color.WHITE);
// Compute font sizes based on dimensions
int biggestSize = ((maxDimension / 5) * 72 / 96) + 5;
int smallestSize = biggestSize / 8;
for (String w : topWords) {
int count = words.getCount(w);
if (count < low) {
break;
}
String ff = this.fontFamily;
int fontSize = (int) (((count - low) / (high - low)) * (biggestSize - smallestSize)) + smallestSize;
Font font = new Font(ff, Font.PLAIN, fontSize);
Color c = new Color(rand.nextInt(255), rand.nextInt(255), rand.nextInt(255));
g.setColor(c);
renderer.placeWord(w, font, g, bounds);
}
img = renderer.getSubImage(bounds, img);
ImageIO.write(img, "png", file);
}
/**
* Convert the input file to one long string to be used for counting words.
*
* @param fileIn the Input file
* @return The entire contents of the file in a single string.
*/
private String processInput(File fileIn) throws IOException {
BufferedReader reader = new BufferedReader(new FileReader(fileIn));
String line = null;
StringBuilder builder = new StringBuilder();
String sep = System.getProperty("line.separator");
while ((line = reader.readLine()) != null) {
builder.append(line);
builder.append(sep);
}
return builder.toString();
}
/**
* Create the word list with counts per word
*
* @param fileIn the File to process.
* @return a list of words with their count.
*/
private Counter<String> createWordList(File fileIn) throws IOException {
String contents = processInput(fileIn).toLowerCase();
if (!lemmatize) {
Counter<String> words = new Counter<String>();
for (final String word : new NGramIterator(1, contents, Locale.ENGLISH, StopWords.English)) {
words.note(word);
}
return words;
} else {
StanfordLemmatizer lemmatizer = new StanfordLemmatizer();
return lemmatizer.lemmatize(contents, StopWords.English);
}
}
/**
* Main function for the cloud generator.
*
* @param args The command line arguments.
*/
public static void main(String[] args) {
Generator app = new Generator();
CmdLineParser parser = new CmdLineParser(app);
try {
parser.parseArgument(args);
Class<AbstractRenderEngine> renderClass = (Class<AbstractRenderEngine>) Class.forName(app.renderClass);
Constructor ctor = renderClass.getConstructor(Integer.class, Integer.class, Integer.class, Boolean.class);
AbstractRenderEngine renderer = (AbstractRenderEngine) ctor.newInstance(app.maxDimension, app.rotationAllowed, app.buffer, app.debug);
// new RenderEngine(app.maxDimension, app.rotationAllowed, app.buffer, app.debug);
Counter<String> words = app.createWordList(app.fileIn);
app.doLayout(renderer, words, app.fileOut);
} catch (CmdLineException e) {
// handling of wrong arguments
System.err.println(e.getMessage());
parser.printUsage(System.err);
} catch (IOException e) {
System.err.println(e.getMessage());
} catch (ClassNotFoundException e) {
System.err.println(e.getMessage());
} catch (NoSuchMethodException e) {
System.err.println(e.getMessage());
} catch (IllegalArgumentException e) {
System.err.println(e.getMessage());
} catch (InstantiationException e) {
System.err.println(e.getMessage());
} catch (IllegalAccessException e) {
System.err.println(e.getMessage());
} catch (InvocationTargetException e) {
System.err.println(e.getMessage());
}
}
}