Package org.apache.mahout.classifier

Examples of org.apache.mahout.classifier.NewsgroupHelper


    model.close();

    Map<String, Set<Integer>> traceDictionary = Maps.newTreeMap();
    ModelDissector md = new ModelDissector();

    NewsgroupHelper helper = new NewsgroupHelper();
    helper.getEncoder().setTraceDictionary(traceDictionary);
    helper.getBias().setTraceDictionary(traceDictionary);

    for (File file : permute(files, helper.getRandom()).subList(0, 500)) {
      String ng = file.getParentFile().getName();
      int actual = dictionary.intern(ng);

      traceDictionary.clear();
      Vector v = helper.encodeFeatureVector(file, actual, leakType, overallCounts);
      md.update(v, traceDictionary, model);
    }

    List<String> ngNames = Lists.newArrayList(dictionary.values());
    List<ModelDissector.Weight> weights = md.summary(100);
View Full Code Here


    ResultAnalyzer ra = new ResultAnalyzer(newsGroups.values(), "DEFAULT");
    for (File file : files) {
      String ng = file.getParentFile().getName();

      int actual = newsGroups.intern(ng);
      NewsgroupHelper helper = new NewsgroupHelper();
      //no leak type ensures this is a normal vector
      Vector input = helper.encodeFeatureVector(file, actual, 0, overallCounts);
      Vector result = classifier.classifyFull(input);
      int cat = result.maxValueIndex();
      double score = result.maxValue();
      double ll = classifier.logLikelihood(actual, input);
      ClassifierResult cr = new ClassifierResult(newsGroups.values().get(cat), score, ll);
View Full Code Here

      leakType = Integer.parseInt(args[1]);
    }

    Dictionary newsGroups = new Dictionary();

    NewsgroupHelper helper = new NewsgroupHelper();
    helper.getEncoder().setProbes(2);
    AdaptiveLogisticRegression learningAlgorithm =
        new AdaptiveLogisticRegression(20, NewsgroupHelper.FEATURES, new L1());
    learningAlgorithm.setInterval(800);
    learningAlgorithm.setAveragingWindow(500);

    List<File> files = Lists.newArrayList();
    for (File newsgroup : base.listFiles()) {
      if (newsgroup.isDirectory()) {
        newsGroups.intern(newsgroup.getName());
        files.addAll(Arrays.asList(newsgroup.listFiles()));
      }
    }
    Collections.shuffle(files);
    System.out.println(files.size() + " training files");
    SGDInfo info = new SGDInfo();

    int k = 0;


    for (File file : files) {
      String ng = file.getParentFile().getName();
      int actual = newsGroups.intern(ng);

      Vector v = helper.encodeFeatureVector(file, actual, leakType, overallCounts);
      learningAlgorithm.train(actual, v);

      k++;
      State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best = learningAlgorithm.getBest();
View Full Code Here

    ResultAnalyzer ra = new ResultAnalyzer(newsGroups.values(), "DEFAULT");
    for (File file : files) {
      String ng = file.getParentFile().getName();

      int actual = newsGroups.intern(ng);
      NewsgroupHelper helper = new NewsgroupHelper();
      Vector input = helper.encodeFeatureVector(file, actual, 0, overallCounts); //no leak type ensures this is a normal vector
      Vector result = classifier.classifyFull(input);
      int cat = result.maxValueIndex();
      double score = result.maxValue();
      double ll = classifier.logLikelihood(actual, input);
      ClassifierResult cr = new ClassifierResult(newsGroups.values().get(cat), score, ll);
View Full Code Here

      leakType = Integer.parseInt(args[1]);
    }

    Dictionary newsGroups = new Dictionary();

    NewsgroupHelper helper = new NewsgroupHelper();
    helper.getEncoder().setProbes(2);
    AdaptiveLogisticRegression learningAlgorithm = new AdaptiveLogisticRegression(20, NewsgroupHelper.FEATURES, new L1());
    learningAlgorithm.setInterval(800);
    learningAlgorithm.setAveragingWindow(500);

    List<File> files = Lists.newArrayList();
    for (File newsgroup : base.listFiles()) {
      if (newsgroup.isDirectory()) {
        newsGroups.intern(newsgroup.getName());
        files.addAll(Arrays.asList(newsgroup.listFiles()));
      }
    }
    Collections.shuffle(files);
    System.out.printf("%d training files\n", files.size());
    SGDInfo info = new SGDInfo();

    int k = 0;


    for (File file : files) {
      String ng = file.getParentFile().getName();
      int actual = newsGroups.intern(ng);

      Vector v = helper.encodeFeatureVector(file, actual, leakType, overallCounts);
      learningAlgorithm.train(actual, v);

      k++;
      State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best = learningAlgorithm.getBest();
View Full Code Here

    model.close();

    Map<String, Set<Integer>> traceDictionary = Maps.newTreeMap();
    ModelDissector md = new ModelDissector();

    NewsgroupHelper helper = new NewsgroupHelper();
    helper.getEncoder().setTraceDictionary(traceDictionary);
    helper.getBias().setTraceDictionary(traceDictionary);

    for (File file : permute(files, helper.getRandom()).subList(0, 500)) {
      String ng = file.getParentFile().getName();
      int actual = dictionary.intern(ng);

      traceDictionary.clear();
      Vector v = helper.encodeFeatureVector(file, actual, leakType, overallCounts);
      md.update(v, traceDictionary, model);
    }

    List<String> ngNames = Lists.newArrayList(dictionary.values());
    List<ModelDissector.Weight> weights = md.summary(100);
View Full Code Here

      leakType = Integer.parseInt(args[1]);
    }

    Dictionary newsGroups = new Dictionary();

    NewsgroupHelper helper = new NewsgroupHelper();
    helper.getEncoder().setProbes(2);
    AdaptiveLogisticRegression learningAlgorithm =
        new AdaptiveLogisticRegression(20, NewsgroupHelper.FEATURES, new L1());
    learningAlgorithm.setInterval(800);
    learningAlgorithm.setAveragingWindow(500);

    List<File> files = Lists.newArrayList();
    for (File newsgroup : base.listFiles()) {
      if (newsgroup.isDirectory()) {
        newsGroups.intern(newsgroup.getName());
        files.addAll(Arrays.asList(newsgroup.listFiles()));
      }
    }
    Collections.shuffle(files);
    System.out.println(files.size() + " training files");
    SGDInfo info = new SGDInfo();

    int k = 0;


    for (File file : files) {
      String ng = file.getParentFile().getName();
      int actual = newsGroups.intern(ng);

      Vector v = helper.encodeFeatureVector(file, actual, leakType, overallCounts);
      learningAlgorithm.train(actual, v);

      k++;
      State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best = learningAlgorithm.getBest();
View Full Code Here

    model.close();

    Map<String, Set<Integer>> traceDictionary = Maps.newTreeMap();
    ModelDissector md = new ModelDissector();

    NewsgroupHelper helper = new NewsgroupHelper();
    helper.getEncoder().setTraceDictionary(traceDictionary);
    helper.getBias().setTraceDictionary(traceDictionary);

    for (File file : permute(files, helper.getRandom()).subList(0, 500)) {
      String ng = file.getParentFile().getName();
      int actual = dictionary.intern(ng);

      traceDictionary.clear();
      Vector v = helper.encodeFeatureVector(file, actual, leakType, overallCounts);
      md.update(v, traceDictionary, model);
    }

    List<String> ngNames = Lists.newArrayList(dictionary.values());
    List<ModelDissector.Weight> weights = md.summary(100);
View Full Code Here

TOP

Related Classes of org.apache.mahout.classifier.NewsgroupHelper

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.