Package org.apache.mahout.classifier

Examples of org.apache.mahout.classifier.ResultAnalyzer


    log.info("Sequential classification...");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();
    ResultAnalyzer analyzer = analyze ? new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown") : null;

    if (dataFS.getFileStatus(dataPath).isDir()) {
      //the input is a directory of files
      testDirectory(outputPath, converter, forest, dataset, analyzer, rng);
    else {
      // the input is one single file
      testFile(dataPath, outputPath, converter, forest, dataset, analyzer, rng);
    }

    time = System.currentTimeMillis() - time;
    log.info("Classification Time: {}", DFUtils.elapsedTime(time));

    if (analyzer != null) {
      log.info(analyzer.summarize());
    }
  }
View Full Code Here


    } else {
      throw new IllegalArgumentException("Unrecognized dataSource type: " + params.get("dataSource"));
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
    TimingStatistics totalStatistics = new TimingStatistics();
    if (subdirs != null) {
     
      for (File file : subdirs) {
        if (verbose) {
          log.info("--------------");
          log.info("Testing: {}", file);
        }
        TimingStatistics operationStats = new TimingStatistics();
       
        long lineNum = 0;
        for (String line : new FileLineIterable(new File(file.getPath()), Charset.forName(params
            .get("encoding")), false)) {
         
          Map<String,List<String>> document = new NGrams(line, Integer.parseInt(params.get("gramSize")))
              .generateNGrams();
          for (Map.Entry<String,List<String>> stringListEntry : document.entrySet()) {
            String correctLabel = stringListEntry.getKey();
            List<String> strings = stringListEntry.getValue();
            TimingStatistics.Call call = operationStats.newCall();
            TimingStatistics.Call outercall = totalStatistics.newCall();
            ClassifierResult classifiedLabel = classifier.classifyDocument(strings.toArray(new String[strings
                .size()]), params.get("defaultCat"));
            call.end();
            outercall.end();
            boolean correct = resultAnalyzer.addInstance(correctLabel, classifiedLabel);
            if (verbose) {
              // We have one document per line
              log.info("Line Number: {} Line(30): {} Expected Label: {} Classified Label: {} Correct: {}",
                new Object[] {lineNum, line.length() > 30 ? line.substring(0, 30) : line, correctLabel,
                              classifiedLabel.getLabel(), correct,});
            }
            // log.info("{} {}", correctLabel, classifiedLabel);
           
          }
          lineNum++;
        }
        /*
         * log.info("{}\t{}\t{}/{}", new Object[] {correctLabel,
         * resultAnalyzer.getConfusionMatrix().getAccuracy(correctLabel),
         * resultAnalyzer.getConfusionMatrix().getCorrect(correctLabel),
         * resultAnalyzer.getConfusionMatrix().getTotal(correctLabel)});
         */
        log.info("Classified instances from {}", file.getName());
        if (verbose) {
          log.info("Performance stats {}", operationStats.toString());
        }
      }
     
    }
    if (verbose) {
      log.info("{}", totalStatistics.toString());
    }
    log.info(resultAnalyzer.summarize());
  }
View Full Code Here

   
    Algorithm algorithm = new BayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
   
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], Integer.parseInt(params.get("gramSize")))
          .generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        assertEquals(i == j ? 4 : 0, matrix[i][j]);
      }
    }
View Full Code Here

   
    Algorithm algorithm = new CBayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], Integer.parseInt(params.get("gramSize")))
          .generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        assertEquals(i == j ? 4 : 0, matrix[i][j]);
      }
    }
View Full Code Here

    this.outputPath = outputPath;
    this.conf = conf;

    if (analyze) {
      dataset = Dataset.load(conf, datasetPath);
      analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");

    } else {
      dataset = null;
      analyzer = null;
    }
View Full Code Here

    String testDirPath = (String) cmdLine.getValue(dirOpt);
    File dir = new File(testDirPath);
    File[] subdirs = dir.listFiles();

    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(model.getLabels(), defaultCat);

    if (subdirs != null) {
      for (File subdir : subdirs) {

        String correctLabel = subdir.getName().split(".txt")[0];
        BufferedReader fileReader = new BufferedReader(new InputStreamReader(
            new FileInputStream(subdir.getPath()), encoding));
        try {
          String line;
          while ((line = fileReader.readLine()) != null) {
 
            Map<String, List<String>> document = Model.generateNGrams(line, gramSize);
            for (Map.Entry<String, List<String>> stringListEntry : document.entrySet()) {
              List<String> strings = stringListEntry.getValue();
              ClassifierResult classifiedLabel = classifier.classify(model,
                  strings.toArray(new String[strings.size()]),
                  defaultCat);
              resultAnalyzer.addInstance(correctLabel, classifiedLabel);
            }
          }
          log.info("{}\t{}\t{}/{}", new Object[]{
              correctLabel,
              resultAnalyzer.getConfusionMatrix().getAccuracy(correctLabel),
              resultAnalyzer.getConfusionMatrix().getCorrect(correctLabel),
              resultAnalyzer.getConfusionMatrix().getTotal(correctLabel)
          });
        } finally {
          fileReader.close();
        }
      }

    }
    log.info(resultAnalyzer.summarize());

  }
View Full Code Here

    SequenceFileDirIterable<Text, VectorWritable> dirIterable =
        new SequenceFileDirIterable<Text, VectorWritable>(getOutputPath(),
                                                          PathType.LIST,
                                                          PathFilters.partFilter(),
                                                          getConf());
    ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT");
    analyzeResults(labelMap, dirIterable, analyzer);

    log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer);
    return 0;
  }
View Full Code Here

        if (dataset.isNumerical(dataset.getLabelId())) {
          RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer();
          regressionAnalyzer.setInstances(results);
          log.info("{}", regressionAnalyzer);
        } else {
          ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
          for (double[] res : results) {
            analyzer.addInstance(dataset.getLabelString(res[0]),
              new ClassifierResult(dataset.getLabelString(res[1]), 1.0));
          }
          log.info("{}", analyzer);
        }
      }
View Full Code Here

        RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer();
        double[][] results = new double[resList.size()][2];
        regressionAnalyzer.setInstances(resList.toArray(results));
        log.info("{}", regressionAnalyzer);
      } else {
        ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");
        for (double[] r : resList) {
          analyzer.addInstance(dataset.getLabelString(r[0]),
            new ClassifierResult(dataset.getLabelString(r[1]), 1.0));
        }
        log.info("{}", analyzer);
      }
    }
View Full Code Here

        newsGroups.intern(newsgroup.getName());
        files.addAll(Arrays.asList(newsgroup.listFiles()));
      }
    }
    System.out.println(files.size() + " test files");
    ResultAnalyzer ra = new ResultAnalyzer(newsGroups.values(), "DEFAULT");
    for (File file : files) {
      String ng = file.getParentFile().getName();

      int actual = newsGroups.intern(ng);
      NewsgroupHelper helper = new NewsgroupHelper();
      //no leak type ensures this is a normal vector
      Vector input = helper.encodeFeatureVector(file, actual, 0, overallCounts);
      Vector result = classifier.classifyFull(input);
      int cat = result.maxValueIndex();
      double score = result.maxValue();
      double ll = classifier.logLikelihood(actual, input);
      ClassifierResult cr = new ClassifierResult(newsGroups.values().get(cat), score, ll);
      ra.addInstance(newsGroups.values().get(actual), cr);

    }
    output.println(ra);
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.classifier.ResultAnalyzer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.