Examples of BayesParameters


Examples of org.apache.mahout.classifier.bayes.common.BayesParameters

 
  @Override
  protected void setUp() throws Exception {
    super.setUp();
    algorithm = new BayesAlgorithm();
    store = new InMemoryBayesDatastore(new BayesParameters(1));
    // String[] labels = new String[]{"a", "b", "c", "d", "e"};
    // long[] labelCounts = new long[]{6, 20, 60, 100, 200};
    // String[] features = new String[]{"aa", "bb", "cc", "dd", "ee"};
    store.setSigmaJSigmaK(100.0);
   
View Full Code Here

Examples of org.apache.mahout.classifier.bayes.common.BayesParameters

  @Override
  protected void setUp() throws Exception {
    super.setUp();
    algorithm = new CBayesAlgorithm();
    store = new InMemoryBayesDatastore(new BayesParameters(1));
    // String[] labels = new String[]{"a", "b", "c", "d", "e"};
    // long[] labelCounts = new long[]{6, 20, 60, 100, 200};
    // String[] features = new String[]{"aa", "bb", "cc", "dd", "ee"};
    store.setSigmaJSigmaK(500.0);
   
View Full Code Here

Examples of org.apache.mahout.classifier.bayes.common.BayesParameters

    BayesFeatureMapper mapper = new BayesFeatureMapper();
    JobConf conf = new JobConf();
    conf.set("io.serializations",
      "org.apache.hadoop.io.serializer.JavaSerialization,"
          + "org.apache.hadoop.io.serializer.WritableSerialization");
    conf.set("bayes.parameters", new BayesParameters(3).toString());
    mapper.configure(conf);
   
    DummyOutputCollector<StringTuple,DoubleWritable> output = new DummyOutputCollector<StringTuple,DoubleWritable>();
    mapper.map(new Text("foo"), new Text("big brown shoe"), output,
      Reporter.NULL);
View Full Code Here

Examples of org.apache.mahout.classifier.bayes.common.BayesParameters

    super.setUp();
    ClassifierData.writeDataToFile("testdata/bayesinput", ClassifierData.DATA);
  }
 
  public void testSelfTestBayes() throws InvalidDatastoreException, IOException {
    BayesParameters params = new BayesParameters(1);
    params.set("alpha_i", "1.0");
    params.set("dataSource", "hdfs");
    TrainClassifier.trainNaiveBayes("testdata/bayesinput", "testdata/bayesmodel", params);
   
    params.set("verbose", "true");
    params.set("basePath", "testdata/bayesmodel");
    params.set("classifierType", "bayes");
    params.set("dataSource", "hdfs");
    params.set("defaultCat", "unknown");
    params.set("encoding", "UTF-8");
    params.set("alpha_i", "1.0");
   
    Algorithm algorithm = new BayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
   
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], Integer.parseInt(params.get("gramSize")))
          .generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        if (i == j)
          assertEquals(4, matrix[i][j]);
        else
          assertEquals(0, matrix[i][j]);
      }
    }
    params.set("testDirPath", "testdata/bayesinput");
    TestClassifier.classifyParallel(params);
    Configuration conf = new Configuration();
    Path outputFiles = new Path("testdata/bayesinput-output/part*");
    FileSystem fs = FileSystem.get(outputFiles.toUri(), conf);
    matrix = BayesClassifierDriver.readResult(fs, outputFiles, conf, params).getConfusionMatrix();
View Full Code Here

Examples of org.apache.mahout.classifier.bayes.common.BayesParameters

      }
    }
  }
 
  public void testSelfTestCBayes() throws InvalidDatastoreException, IOException {
    BayesParameters params = new BayesParameters(1);
    params.set("alpha_i", "1.0");
    params.set("dataSource", "hdfs");
    TrainClassifier.trainCNaiveBayes("testdata/bayesinput", "testdata/cbayesmodel", params);
   
    params.set("verbose", "true");
    params.set("basePath", "testdata/cbayesmodel");
    params.set("classifierType", "cbayes");
    params.set("dataSource", "hdfs");
    params.set("defaultCat", "unknown");
    params.set("encoding", "UTF-8");
    params.set("alpha_i", "1.0");
   
    Algorithm algorithm = new CBayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], Integer.parseInt(params.get("gramSize")))
          .generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        if (i == j)
          assertEquals(4, matrix[i][j]);
        else
          assertEquals(0, matrix[i][j]);
      }
    }
    params.set("testDirPath", "testdata/bayesinput");
    TestClassifier.classifyParallel(params);
    Configuration conf = new Configuration();
    Path outputFiles = new Path("testdata/bayesinput-output/part*");
    FileSystem fs = FileSystem.get(outputFiles.toUri(), conf);
    matrix = BayesClassifierDriver.readResult(fs, outputFiles, conf, params).getConfusionMatrix();
View Full Code Here

Examples of org.apache.mahout.classifier.bayes.common.BayesParameters

      }
     
      String input = cmdLine.getValue(inputOpt).toString();
      String output = cmdLine.getValue(outputOpt).toString();
     
      job.runJob(input, output, new BayesParameters(1));
    } catch (OptionException e) {
      log.error(e.getMessage());
      CommandLineUtil.printHelp(group);
    }
  }
View Full Code Here

Examples of org.apache.mahout.classifier.bayes.common.BayesParameters

      }
     
      Path input = new Path(cmdLine.getValue(inputOpt).toString());
      Path output = new Path(cmdLine.getValue(outputOpt).toString());
     
      job.runJob(input, output, new BayesParameters(1));
    } catch (OptionException e) {
      log.error(e.getMessage());
      CommandLineUtil.printHelp(group);
    }
  }
View Full Code Here

Examples of org.apache.mahout.classifier.bayes.common.BayesParameters

  }
 
  public static void main(String[] args) throws IOException {
    // test harness, delete me
    BayesFeatureDriver driver = new BayesFeatureDriver();
    BayesParameters p = new BayesParameters(1);
    Path input = new Path("/home/drew/mahout/bayes/20news-input");
    Path output = new Path("/home/drew/mahout/bayes/20-news-features");
    driver.runJob(input, output, p);
  }
View Full Code Here

Examples of org.apache.mahout.classifier.bayes.common.BayesParameters

      if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return;
      }
     
      BayesParameters params = new BayesParameters();
      // Setting all default values
      int gramSize = 1;
      String classifierType = "bayes";     
      String dataSource = "hdfs";
      String defaultCat = "unknown";
      String encoding = "UTF-8";
      String alphaI = "1.0";
      String classificationMethod = "sequential";

      String modelBasePath = (String) cmdLine.getValue(pathOpt);
     
      if (cmdLine.hasOption(gramSizeOpt)) {
        gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt));
       
      }
     
      if (cmdLine.hasOption(classifierType)) {
        classifierType = (String) cmdLine.getValue(typeOpt);
      }
     
      if (cmdLine.hasOption(dataSource)) {
        dataSource = (String) cmdLine.getValue(dataSource);
      }
     
      if (cmdLine.hasOption(defaultCatOpt)) {
        defaultCat = (String) cmdLine.getValue(defaultCatOpt);
      }
     
      if (cmdLine.hasOption(encodingOpt)) {
        encoding = (String) cmdLine.getValue(encodingOpt);
      }
     
      if (cmdLine.hasOption(alphaOpt)) {
        alphaI = (String) cmdLine.getValue(alphaOpt);
      }
     
      boolean verbose = cmdLine.hasOption(verboseOutputOpt);
     
      String testDirPath = (String) cmdLine.getValue(dirOpt);
     
      if (cmdLine.hasOption(methodOpt)) {
        classificationMethod = (String) cmdLine.getValue(methodOpt);
      }
     
      params.setGramSize(gramSize);
      params.set("verbose", Boolean.toString(verbose));
      params.set("basePath", modelBasePath);
      params.set("classifierType", classifierType);
      params.set("dataSource", dataSource);
      params.set("defaultCat", defaultCat);
      params.set("encoding", encoding);
      params.set("alpha_i", alphaI);
      params.set("testDirPath", testDirPath);
     
      if (classificationMethod.equalsIgnoreCase("sequential")) {
        classifySequential(params);
      } else if (classificationMethod.equalsIgnoreCase("mapreduce")) {
        classifyParallel(params);
View Full Code Here

Examples of org.apache.mahout.classifier.bayes.common.BayesParameters

    fs.copyFromLocalFile(new Path(tempInputFile.getAbsolutePath()), input);
  }

  @Test
  public void testSelfTestBayes() throws Exception {
    BayesParameters params = new BayesParameters(1);
    params.set("alpha_i", "1.0");
    params.set("dataSource", "hdfs");
    Path bayesInputPath = getTestTempFilePath("bayesinput");
    Path bayesModelPath = getTestTempDirPath("bayesmodel");
    TrainClassifier.trainNaiveBayes(bayesInputPath, bayesModelPath, params);
   
    params.set("verbose", "true");
    params.set("basePath", bayesModelPath.toString());
    params.set("classifierType", "bayes");
    params.set("dataSource", "hdfs");
    params.set("defaultCat", "unknown");
    params.set("encoding", "UTF-8");
    params.set("alpha_i", "1.0");
   
    Algorithm algorithm = new BayesAlgorithm();
    Datastore datastore = new InMemoryBayesDatastore(params);
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(), params.get("defaultCat"));
   
    for (String[] entry : ClassifierData.DATA) {
      List<String> document = new NGrams(entry[1], Integer.parseInt(params.get("gramSize")))
          .generateNGramsWithoutLabel();
      assertEquals(3, classifier.classifyDocument(document.toArray(new String[document.size()]),
        params.get("defaultCat"), 100).length);
      ClassifierResult result = classifier.classifyDocument(document.toArray(new String[document.size()]), params
          .get("defaultCat"));
      assertEquals(entry[0], result.getLabel());
      resultAnalyzer.addInstance(entry[0], result);
    }
    int[][] matrix = resultAnalyzer.getConfusionMatrix().getConfusionMatrix();
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        assertEquals(i == j ? 4 : 0, matrix[i][j]);
      }
    }
    params.set("testDirPath", bayesInputPath.toString());
    TestClassifier.classifyParallel(params);
    Configuration conf = new Configuration();
    Path outputFiles = getTestTempFilePath("bayesinput-output/part*");
    FileSystem fs = FileSystem.get(outputFiles.toUri(), conf);
    matrix = BayesClassifierDriver.readResult(fs, outputFiles, conf, params).getConfusionMatrix();
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.