Package org.apache.mahout.classifier.df.data

Examples of org.apache.mahout.classifier.df.data.Dataset


      } else {
        trainData[i] = "C," + (i + 20) + ',' (i + 20);
      }
    }
    // Dataset
    Dataset dataset = DataLoader.generateDataset("C N L", true, trainData);
    Data[] datas = new Data[3];
    datas[0] = DataLoader.loadData(dataset, trainData);

    // Training data
    trainData = new String[20];
View Full Code Here


    // Training data
    Data[] datas = generateTrainingDataA();
    // Build Forest
    DecisionForest forest = buildForest(datas);
    // Test data
    Dataset dataset = datas[0].getDataset();
    Data testData = DataLoader.loadData(dataset, TEST_DATA);

    double noValue = dataset.valueOf(4, "no");
    double yesValue = dataset.valueOf(4, "yes");
    assertEquals(noValue, forest.classify(testData.getDataset(), rng, testData.get(0)), EPSILON);
    // This one is tie-broken -- 1 is OK too
    //assertEquals(yesValue, forest.classify(testData.getDataset(), rng, testData.get(1)), EPSILON);
    assertEquals(noValue, forest.classify(testData.getDataset(), rng, testData.get(2)), EPSILON);
  }
View Full Code Here

    // Training data
    Data[] datas = generateTrainingDataA();
    // Build Forest
    DecisionForest forest = buildForest(datas);
    // Test data
    Dataset dataset = datas[0].getDataset();
    Data testData = DataLoader.loadData(dataset, TEST_DATA);

    double[][] predictions = new double[testData.size()][];
    forest.classify(testData, predictions);
    double noValue = dataset.valueOf(4, "no");
    double yesValue = dataset.valueOf(4, "yes");
    assertArrayEquals(new double[][]{{noValue, Double.NaN, Double.NaN},
        {noValue, yesValue, Double.NaN}, {noValue, noValue, Double.NaN}}, predictions);
  }
View Full Code Here

    // prepare the data
    String descriptor = Utils.randomDescriptor(rng, NUM_ATTRIBUTES);
    double[][] source = Utils.randomDoubles(rng, descriptor, false, NUM_INSTANCES);
    String[] sData = Utils.double2String(source);
    Dataset dataset = DataLoader.generateDataset(descriptor, false, sData);
    String[][] splits = Utils.splitData(sData, NUM_MAPPERS);

    MockTreeBuilder treeBuilder = new MockTreeBuilder();

    LongWritable key = new LongWritable();
View Full Code Here

  static void computeFrequencies(Data data,
                                 int attr,
                                 double[] splitPoints,
                                 int[][] counts,
                                 int[] countAll) {
    Dataset dataset = data.getDataset();

    for (int index = 0; index < data.size(); index++) {
      Instance instance = data.get(index);
      int label = (int) dataset.getLabel(instance);
      double value = instance.get(attr);
      int split = 0;
      while (split < splitPoints.length && value > splitPoints[split]) {
        split++;
      }
View Full Code Here

TOP

Related Classes of org.apache.mahout.classifier.df.data.Dataset

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.