Examples of OutputLogFilter


Examples of org.apache.hadoop.mapred.OutputLogFilter

      assertTrue("pipes job failed", result.isSuccessful());
    }

    List<String> results = new ArrayList<String>();
    for (Path p:FileUtil.stat2Paths(dfs.getFileSystem().listStatus(outputPath,
                                new OutputLogFilter()))) {
      results.add(TestMiniMRWithDFS.readOutput(p, job));
    }
    assertEquals("number of reduces is wrong",
                 expectedResults.length, results.size());
    for(int i=0; i < results.size(); i++) {
View Full Code Here

Examples of org.apache.mahout.clustering.kmeans.OutputLogFilter

   */
  private static void createCanopyFromVectorsSeq(Path input, Path output, DistanceMeasure measure) throws IOException,
      InstantiationException, IllegalAccessException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
    int part = 0;
    int id = 0;
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs,
View Full Code Here

Examples of org.apache.mahout.clustering.kmeans.OutputLogFilter

                                       int maxIterations) throws IOException, InstantiationException, IllegalAccessException {
    MeanShiftCanopyClusterer clusterer = new MeanShiftCanopyClusterer(measure, t1, t2, convergenceDelta);
    List<MeanShiftCanopy> clusters = new ArrayList<MeanShiftCanopy>();
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(clustersIn.toUri(), conf);
    FileStatus[] status = fs.listStatus(clustersIn, new OutputLogFilter());
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      try {
        Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
        MeanShiftCanopy canopy = reader.getValueClass().asSubclass(MeanShiftCanopy.class).newInstance();
View Full Code Here

Examples of org.apache.mahout.clustering.kmeans.OutputLogFilter

  private static void clusterDataSeq(Path input, Path clustersIn, Path output) throws IOException, InstantiationException,
      IllegalAccessException {
    Collection<MeanShiftCanopy> clusters = new ArrayList<MeanShiftCanopy>();
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(clustersIn.toUri(), conf);
    FileStatus[] status = fs.listStatus(clustersIn, new OutputLogFilter());
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      try {
        Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
        MeanShiftCanopy value = reader.getValueClass().asSubclass(MeanShiftCanopy.class).newInstance();
        while (reader.next(key, value)) {
          clusters.add(value);
          value = reader.getValueClass().asSubclass(MeanShiftCanopy.class).newInstance();
        }
      } finally {
        reader.close();
      }
    }
    // iterate over all points, assigning each to the closest canopy and outputting that clustering
    fs = FileSystem.get(input.toUri(), conf);
    status = fs.listStatus(input, new OutputLogFilter());
    int part = 0;
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs,
                                                           conf,
View Full Code Here

Examples of org.apache.mahout.clustering.kmeans.OutputLogFilter

    String statePath = conf.get(MeanShiftCanopyDriver.STATE_IN_KEY);
    List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
    try {
      Path path = new Path(statePath);
      FileSystem fs = FileSystem.get(path.toUri(), conf);
      FileStatus[] status = fs.listStatus(path, new OutputLogFilter());
      for (FileStatus s : status) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
        try {
          Text key = new Text();
          MeanShiftCanopy canopy = new MeanShiftCanopy();
View Full Code Here

Examples of org.apache.mahout.clustering.kmeans.OutputLogFilter

    int iteration = 1;
    while (!converged && iteration <= maxIterations) {
      log.info("Fuzzy k-Means Iteration: " + iteration);
      Configuration conf = new Configuration();
      FileSystem fs = FileSystem.get(input.toUri(), conf);
      FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
      for (FileStatus s : status) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
        try {
          Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
          VectorWritable vw = reader.getValueClass().asSubclass(VectorWritable.class).newInstance();
View Full Code Here

Examples of org.apache.mahout.clustering.kmeans.OutputLogFilter

    if (clusters.isEmpty()) {
      throw new IllegalStateException("Clusters is empty!");
    }
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
    int part = 0;
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs,
                                                           conf,
View Full Code Here

Examples of org.apache.mahout.clustering.kmeans.OutputLogFilter

   * Read the first input vector to determine the prototype size for the modelPrototype
   */
  public static int readPrototypeSize(Path input) throws IOException, InstantiationException, IllegalAccessException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
    int protoSize = 0;
    if (status.length > 0) {
      FileStatus s = status[0];
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
View Full Code Here

Examples of org.apache.mahout.clustering.kmeans.OutputLogFilter

                                                       numClusters);
      Cluster[] newModels = (Cluster[]) state.getModelFactory().sampleFromPosterior(state.getModels());
      DirichletClusterer clusterer = new DirichletClusterer(state);
      Configuration conf = new Configuration();
      FileSystem fs = FileSystem.get(input.toUri(), conf);
      FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
      for (FileStatus s : status) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
        try {
          Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
          VectorWritable vw = reader.getValueClass().asSubclass(VectorWritable.class).newInstance();
View Full Code Here

Examples of org.apache.mahout.clustering.kmeans.OutputLogFilter

    Configuration conf = new Configuration();
    List<DirichletCluster> clusters = DirichletClusterMapper.loadClusters(conf, stateIn);
    DirichletClusterer clusterer = new DirichletClusterer(emitMostLikely, threshold);
    // iterate over all points, assigning each to the closest canopy and outputing that clustering
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
    int part = 0;
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs,
                                                           conf,
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.