Package mia.clustering.ch12

Source Code of mia.clustering.ch12.CreateTwitterUserDataset

package mia.clustering.ch12;

import mia.clustering.ch12.twitter.ByKeyGroupingJob;

import org.apache.commons.codec.language.DoubleMetaphone;
import org.apache.mahout.common.Parameters;

public class CreateTwitterUserDataset {
  public static void main(String args[]) throws Exception {
    Parameters params = new Parameters();
    params.set("splitPattern", "\t");
    String inputDir = "tweets.txt";
    String outputDir = "twitter_seqfiles";
    params.set("input", inputDir);
    params.set("output", outputDir);
    params.set("selectedField", "1"); // tweet
    params.set("groupByField", "0"); // username
    ByKeyGroupingJob.startJob(params);
    DoubleMetaphone filter = new DoubleMetaphone();
// TODO: change these terms?!
    System.out.println(filter.encode("Loke"));
    System.out.println(filter.encode("companymancomic"));
    System.out.println(filter.encode("webcomics"));
    System.out.println(filter.encode("@comic"));
  }
}
TOP

Related Classes of mia.clustering.ch12.CreateTwitterUserDataset

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.