Package org.apache.mahout.clustering.kmeans

Examples of org.apache.mahout.clustering.kmeans.OutputLogFilter


   */
  private static void createCanopyFromVectorsSeq(Path input, Path output, DistanceMeasure measure) throws IOException,
      InstantiationException, IllegalAccessException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
    int part = 0;
    int id = 0;
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs,
View Full Code Here


                                       int maxIterations) throws IOException, InstantiationException, IllegalAccessException {
    MeanShiftCanopyClusterer clusterer = new MeanShiftCanopyClusterer(measure, t1, t2, convergenceDelta);
    List<MeanShiftCanopy> clusters = new ArrayList<MeanShiftCanopy>();
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(clustersIn.toUri(), conf);
    FileStatus[] status = fs.listStatus(clustersIn, new OutputLogFilter());
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      try {
        Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
        MeanShiftCanopy canopy = reader.getValueClass().asSubclass(MeanShiftCanopy.class).newInstance();
View Full Code Here

  private static void clusterDataSeq(Path input, Path clustersIn, Path output) throws IOException, InstantiationException,
      IllegalAccessException {
    Collection<MeanShiftCanopy> clusters = new ArrayList<MeanShiftCanopy>();
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(clustersIn.toUri(), conf);
    FileStatus[] status = fs.listStatus(clustersIn, new OutputLogFilter());
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      try {
        Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
        MeanShiftCanopy value = reader.getValueClass().asSubclass(MeanShiftCanopy.class).newInstance();
        while (reader.next(key, value)) {
          clusters.add(value);
          value = reader.getValueClass().asSubclass(MeanShiftCanopy.class).newInstance();
        }
      } finally {
        reader.close();
      }
    }
    // iterate over all points, assigning each to the closest canopy and outputting that clustering
    fs = FileSystem.get(input.toUri(), conf);
    status = fs.listStatus(input, new OutputLogFilter());
    int part = 0;
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs,
                                                           conf,
View Full Code Here

    String statePath = conf.get(MeanShiftCanopyDriver.STATE_IN_KEY);
    List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>();
    try {
      Path path = new Path(statePath);
      FileSystem fs = FileSystem.get(path.toUri(), conf);
      FileStatus[] status = fs.listStatus(path, new OutputLogFilter());
      for (FileStatus s : status) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
        try {
          Text key = new Text();
          MeanShiftCanopy canopy = new MeanShiftCanopy();
View Full Code Here

    int iteration = 1;
    while (!converged && iteration <= maxIterations) {
      log.info("Fuzzy k-Means Iteration: " + iteration);
      Configuration conf = new Configuration();
      FileSystem fs = FileSystem.get(input.toUri(), conf);
      FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
      for (FileStatus s : status) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
        try {
          Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
          VectorWritable vw = reader.getValueClass().asSubclass(VectorWritable.class).newInstance();
View Full Code Here

    if (clusters.isEmpty()) {
      throw new IllegalStateException("Clusters is empty!");
    }
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
    int part = 0;
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs,
                                                           conf,
View Full Code Here

   * Read the first input vector to determine the prototype size for the modelPrototype
   */
  public static int readPrototypeSize(Path input) throws IOException, InstantiationException, IllegalAccessException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
    int protoSize = 0;
    if (status.length > 0) {
      FileStatus s = status[0];
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
View Full Code Here

                                                       numClusters);
      Cluster[] newModels = (Cluster[]) state.getModelFactory().sampleFromPosterior(state.getModels());
      DirichletClusterer clusterer = new DirichletClusterer(state);
      Configuration conf = new Configuration();
      FileSystem fs = FileSystem.get(input.toUri(), conf);
      FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
      for (FileStatus s : status) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
        try {
          Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
          VectorWritable vw = reader.getValueClass().asSubclass(VectorWritable.class).newInstance();
View Full Code Here

    Configuration conf = new Configuration();
    List<DirichletCluster> clusters = DirichletClusterMapper.loadClusters(conf, stateIn);
    DirichletClusterer clusterer = new DirichletClusterer(emitMostLikely, threshold);
    // iterate over all points, assigning each to the closest canopy and outputing that clustering
    FileSystem fs = FileSystem.get(input.toUri(), conf);
    FileStatus[] status = fs.listStatus(input, new OutputLogFilter());
    int part = 0;
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs,
                                                           conf,
View Full Code Here

    throws IOException, InstantiationException, IllegalAccessException {

    Map<Integer, List<VectorWritable>> repPoints = RepresentativePointsMapper.getRepresentativePoints(conf, stateIn);
    Map<Integer, WeightedVectorWritable> mostDistantPoints = new HashMap<Integer, WeightedVectorWritable>();
    FileSystem fs = FileSystem.get(clusteredPointsIn.toUri(), conf);
    FileStatus[] status = fs.listStatus(clusteredPointsIn, new OutputLogFilter());
    for (FileStatus s : status) {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
      try {
        IntWritable key = (IntWritable) reader.getKeyClass().asSubclass(Writable.class).newInstance();
        WeightedVectorWritable vw = reader.getValueClass().asSubclass(WeightedVectorWritable.class).newInstance();
View Full Code Here

TOP

Related Classes of org.apache.mahout.clustering.kmeans.OutputLogFilter

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.