Package org.apache.mahout.classifier

Examples of org.apache.mahout.classifier.ResultAnalyzer


      asfDictionary.intern(next.getFirst().toString());
      numItems++;
    }

    System.out.println(numItems + " test files");
    ResultAnalyzer ra = new ResultAnalyzer(asfDictionary.values(), "DEFAULT");
    iter = new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()), PathType.LIST, testFilter,
            null, true, conf);
    while (iter.hasNext()) {
      Pair<Text, VectorWritable> next = iter.next();
      String ng = next.getFirst().toString();

      int actual = asfDictionary.intern(ng);
      Vector result = classifier.classifyFull(next.getSecond().get());
      int cat = result.maxValueIndex();
      double score = result.maxValue();
      double ll = classifier.logLikelihood(actual, next.getSecond().get());
      ClassifierResult cr = new ClassifierResult(asfDictionary.values().get(cat), score, ll);
      ra.addInstance(asfDictionary.values().get(actual), cr);

    }
    output.println(ra);
  }
View Full Code Here


      throw new IllegalArgumentException("Unrecognized dataSource type: "
          + params.get("dataSource"));
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(),
        params.get("defaultCat"));
    final TimingStatistics totalStatistics = new TimingStatistics();
    if (subdirs != null) {

      for (File file : subdirs) {
        log.info("--------------");
        log.info("Testing: " + file);
        String correctLabel = file.getName().split(".txt")[0];
        final TimingStatistics operationStats = new TimingStatistics();

        long lineNum = 0;
        for (String line : new FileLineIterable(new File(file.getPath()),
            Charset.forName(params.get("encoding")), false)) {

          Map<String, List<String>> document = new NGrams(line, Integer
              .parseInt(params.get("gramSize"))).generateNGrams();
          for (Map.Entry<String, List<String>> stringListEntry : document
              .entrySet()) {
            List<String> strings = stringListEntry.getValue();
            TimingStatistics.Call call = operationStats.newCall();
            TimingStatistics.Call outercall = totalStatistics.newCall();
            ClassifierResult classifiedLabel = classifier.classifyDocument(
                strings.toArray(new String[strings.size()]), params
                    .get("defaultCat"));
            call.end();
            outercall.end();
            boolean correct = resultAnalyzer.addInstance(correctLabel,
                classifiedLabel);
            if (verbose) {
              // We have one document per line
              log.info("Line Number: " + lineNum + " Line(30): "
                  + (line.length() > 30 ? line.substring(0, 30) : line)
                  + " Expected Label: " + correctLabel + " Classified Label: "
                  + classifiedLabel.getLabel() + " Correct: " + correct);
            }
            // log.info("{} {}", correctLabel, classifiedLabel);

          }
          lineNum++;
        }
        log.info("{}\t{}\t{}/{}", new Object[] { correctLabel,
            resultAnalyzer.getConfusionMatrix().getAccuracy(correctLabel),
            resultAnalyzer.getConfusionMatrix().getCorrect(correctLabel),
            resultAnalyzer.getConfusionMatrix().getTotal(correctLabel) });
        log.info("{}", operationStats.toString());
      }

    }
    log.info("{}", totalStatistics.toString());
    log.info(resultAnalyzer.summarize());
  }
View Full Code Here

    log.info("Sequential classification...");
    long time = System.currentTimeMillis();

    Random rng = RandomUtils.getRandom();
    ResultAnalyzer analyzer = analyze ? new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown") : null;

    if (dataFS.getFileStatus(dataPath).isDir()) {
      //the input is a directory of files
      testDirectory(outputPath, converter, forest, dataset, analyzer, rng);
    else {
View Full Code Here

    this.outputPath = outputPath;
    this.conf = conf;

    if (analyze) {
      dataset = Dataset.load(conf, datasetPath);
      analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown");

    } else {
      dataset = null;
      analyzer = null;
    }
View Full Code Here

   
    Algorithm algorithm = new BayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
   
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], params.getGramSize()).generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        assertEquals(i == j ? 4 : 0, matrix[i][j]);
      }
    }
View Full Code Here

   
    Algorithm algorithm = new CBayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], params.getGramSize()).generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        assertEquals(i == j ? 4 : 0, matrix[i][j]);
      }
    }
View Full Code Here

    } else {
      throw new IllegalArgumentException("Unrecognized dataSource type: " + params.get("dataSource"));
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
    TimingStatistics totalStatistics = new TimingStatistics();
    if (subdirs != null) {
     
      for (File file : subdirs) {
        if (verbose) {
          log.info("--------------");
          log.info("Testing: {}", file);
        }
        TimingStatistics operationStats = new TimingStatistics();
       
        long lineNum = 0;
        for (String line : new FileLineIterable(new File(file.getPath()), Charset.forName(params
            .get("encoding")), false)) {
         
          Map<String,List<String>> document = new NGrams(line, Integer.parseInt(params.get("gramSize")))
              .generateNGrams();
          for (Map.Entry<String,List<String>> stringListEntry : document.entrySet()) {
            String correctLabel = stringListEntry.getKey();
            List<String> strings = stringListEntry.getValue();
            TimingStatistics.Call call = operationStats.newCall();
            TimingStatistics.Call outercall = totalStatistics.newCall();
            ClassifierResult classifiedLabel = classifier.classifyDocument(strings.toArray(new String[strings
                .size()]), params.get("defaultCat"));
            call.end();
            outercall.end();
            boolean correct = resultAnalyzer.addInstance(correctLabel, classifiedLabel);
            if (verbose) {
              // We have one document per line
              log.info("Line Number: {} Line(30): {} Expected Label: {} Classified Label: {} Correct: {}",
                new Object[] {lineNum, line.length() > 30 ? line.substring(0, 30) : line, correctLabel,
                              classifiedLabel.getLabel(), correct,});
View Full Code Here

    SequenceFileDirIterable<Text, VectorWritable> dirIterable =
        new SequenceFileDirIterable<Text, VectorWritable>(getOutputPath(),
                                                          PathType.LIST,
                                                          PathFilters.partFilter(),
                                                          getConf());
    ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT");
    analyzeResults(labelMap, dirIterable, analyzer);

    log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer);
    return 0;
  }
View Full Code Here

    SequenceFileDirIterable<Text, VectorWritable> dirIterable =
        new SequenceFileDirIterable<Text, VectorWritable>(getOutputPath(),
                                                          PathType.LIST,
                                                          PathFilters.partFilter(),
                                                          getConf());
    ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT");
    analyzeResults(labelMap, dirIterable, analyzer);

    log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer);
    return 0;
  }
View Full Code Here

      asfDictionary.intern(next.getFirst().toString());
      numItems++;
    }

    System.out.printf("%d test files\n", numItems);
    ResultAnalyzer ra = new ResultAnalyzer(asfDictionary.values(), "DEFAULT");
    iter = new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()), PathType.LIST, testFilter,
            null, true, conf);
    while (iter.hasNext()) {
      Pair<Text, VectorWritable> next = iter.next();
      String ng = next.getFirst().toString();

      int actual = asfDictionary.intern(ng);
      Vector result = classifier.classifyFull(next.getSecond().get());
      int cat = result.maxValueIndex();
      double score = result.maxValue();
      double ll = classifier.logLikelihood(actual, next.getSecond().get());
      ClassifierResult cr = new ClassifierResult(asfDictionary.values().get(cat), score, ll);
      ra.addInstance(asfDictionary.values().get(actual), cr);

    }
    output.printf("%s\n\n", ra.toString());
  }
View Full Code Here

TOP

Related Classes of org.apache.mahout.classifier.ResultAnalyzer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.