Package org.apache.mahout.classifier.bayes.model

Examples of org.apache.mahout.classifier.bayes.model.ClassifierContext


      }
     
    } else {
      throw new IllegalArgumentException("Unrecognized dataSource type: " + params.get("dataSource"));
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
    TimingStatistics totalStatistics = new TimingStatistics();
    if (subdirs != null) {
     
      for (File file : subdirs) {
        if (verbose) {
          log.info("--------------");
          log.info("Testing: {}", file);
        }
        TimingStatistics operationStats = new TimingStatistics();
       
        long lineNum = 0;
        for (String line : new FileLineIterable(new File(file.getPath()), Charset.forName(params
            .get("encoding")), false)) {
         
          Map<String,List<String>> document = new NGrams(line, Integer.parseInt(params.get("gramSize")))
              .generateNGrams();
          for (Map.Entry<String,List<String>> stringListEntry : document.entrySet()) {
            String correctLabel = stringListEntry.getKey();
            List<String> strings = stringListEntry.getValue();
            TimingStatistics.Call call = operationStats.newCall();
            TimingStatistics.Call outercall = totalStatistics.newCall();
            ClassifierResult classifiedLabel = classifier.classifyDocument(strings.toArray(new String[strings
                .size()]), params.get("defaultCat"));
            call.end();
            outercall.end();
            boolean correct = resultAnalyzer.addInstance(correctLabel, classifiedLabel);
            if (verbose) {
View Full Code Here


      }
     
    } else {
      throw new IllegalArgumentException("Unrecognized dataSource type: " + dataSource);
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    String defaultCat = "unknown";
    if (cmdLine.hasOption(defaultCatOpt)) {
      defaultCat = (String) cmdLine.getValue(defaultCatOpt);
    }
    File docPath = new File((String) cmdLine.getValue(classifyOpt));
    String encoding = "UTF-8";
    if (cmdLine.hasOption(encodingOpt)) {
      encoding = (String) cmdLine.getValue(encodingOpt);
    }
    Analyzer analyzer = null;
    if (cmdLine.hasOption(analyzerOpt)) {
      String className = (String) cmdLine.getValue(analyzerOpt);
      analyzer = Class.forName(className).asSubclass(Analyzer.class).newInstance();
    }
    if (analyzer == null) {
      analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    }
   
    log.info("Converting input document to proper format");
    String[] document = BayesFileFormatter.readerToDocument(analyzer, new InputStreamReader(
        new FileInputStream(docPath), Charset.forName(encoding)));
    StringBuilder line = new StringBuilder();
    for (String token : document) {
      line.append(token).append(' ');
    }
   
    List<String> doc = new NGrams(line.toString(), gramSize).generateNGramsWithoutLabel();
   
    log.info("Done converting");
    log.info("Classifying document: {}", docPath);
    ClassifierResult category = classifier.classifyDocument(doc.toArray(new String[doc.size()]), defaultCat);
    log.info("Category for {} is {}", docPath, category);
   
  }
View Full Code Here

    store.loadFeatureWeight("dd", "e", 50);

  }
 
  public void test() throws InvalidDatastoreException {
    ClassifierContext classifier = new ClassifierContext(algorithm, store);
    String[] document = {"aa", "ff"};
    ClassifierResult result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to e", "e", result.getLabel());
   
    document = new String[] {"dd"};
    result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to d", "d", result.getLabel());
   
    document = new String[] {"cc"};
    result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to d", "d", result.getLabel());
  }
View Full Code Here

    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to d", "d", result.getLabel());
  }
 
  public void testResults() throws Exception {
    ClassifierContext classifier = new ClassifierContext(algorithm, store);
    String[] document = {"aa", "ff"};
    ClassifierResult result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    System.out.println("Result: " + result);
  }
View Full Code Here

    store.loadFeatureWeight("dd", "e", 50);
   
  }
 
  public void test() throws InvalidDatastoreException {
    ClassifierContext classifier = new ClassifierContext(algorithm, store);
    String[] document = {"aa", "ff"};
    ClassifierResult result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to e", "e", result.getLabel());
   
    document = new String[] {"dd"};
    result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to d", "d", result.getLabel());
   
    document = new String[] {"cc"};
    result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to d", "d", result.getLabel());
  }
View Full Code Here

    assertNotNull("category is null and it shouldn't be", result);
    assertEquals(result + " is not equal to d", "d", result.getLabel());
  }
 
  public void testResults() throws Exception {
    ClassifierContext classifier = new ClassifierContext(algorithm, store);
    String[] document = {"aa", "ff"};
    ClassifierResult result = classifier.classifyDocument(document, "unknown");
    assertNotNull("category is null and it shouldn't be", result);
    System.out.println("Result: " + result);
  }
View Full Code Here

        }
       
      } else {
        throw new IllegalArgumentException("Unrecognized dataSource type: " + params.get("dataSource"));
      }
      classifier = new ClassifierContext(algorithm, datastore);
      classifier.initialize();
     
      defaultCategory = params.get("defaultCat");
      gramSize = Integer.valueOf(params.get("gramSize"));
    } catch (IOException ex) {
View Full Code Here

    params.set("encoding", "UTF-8");
    params.set("alpha_i", "1.0");
   
    Algorithm algorithm = new BayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
   
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], Integer.parseInt(params.get("gramSize")))
          .generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
View Full Code Here

    params.set("encoding", "UTF-8");
    params.set("alpha_i", "1.0");
   
    Algorithm algorithm = new CBayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], Integer.parseInt(params.get("gramSize")))
          .generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
View Full Code Here

        }
       
      } else {
        throw new IllegalArgumentException("Unrecognized dataSource type: " + params.get("dataSource"));
      }
      classifier = new ClassifierContext(algorithm, datastore);
      classifier.initialize();
     
      defaultCategory = params.get("defaultCat");
      gramSize = Integer.valueOf(params.get("gramSize"));
    } catch (IOException ex) {
View Full Code Here

TOP

Related Classes of org.apache.mahout.classifier.bayes.model.ClassifierContext

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.