Package cbcb.kmulus.util

Examples of cbcb.kmulus.util.PresenceVector


        throws IOException, InterruptedException {
     
      String sequence = sequenceAndHeader.substring(sequenceAndHeader.indexOf(
          GenerateClusterPresenceVectors.SEQUENCE_SEPARATOR) + 1);
     
      PresenceVector pv = new PresenceVector(kmer_length);
     
      for (int i = 0; i <= sequence.length() - kmer_length; i++) {
        int hash = Biology.getAAKmerHash(sequence.substring(i, i + kmer_length));
        if (hash >= 0) {
          pv.setKmer(hash);
        }
      }
     
      context.write(clusterId, pv);
    }
View Full Code Here


    }
   
    public void reduce(LongWritable key, Iterable<PresenceVector> values, Context context)
      throws IOException, InterruptedException {
     
      PresenceVector clusterPV = new PresenceVector(kmerLength);
     
      for (PresenceVector pv : values) {
        clusterPV = clusterPV.union(pv);
      }
     
      context.write(key, clusterPV);
    }
View Full Code Here

        continue;

      SequenceFile.Reader reader = new SequenceFile.Reader(fs,srcFileStatus.getPath(), conf);

      LongWritable key = new LongWritable();
      PresenceVector value = new PresenceVector();

      while (reader.next(key, value) != false) {
        if (key.toString().equals("")) {
          break;
        }

        clusterCenters[currCenter] = new PresenceVector(value);
        ++currCenter;
        key.set(0);
      }
      reader.close();
    }
View Full Code Here

        // Skip the header character.
        LongWritable seqId = new LongWritable(new Integer(line.substring(1, spaceIndex)));
        String sequence = line.substring(spaceIndex + 1);
       
        // Create the feature vector.
        PresenceVector featureVector = new PresenceVector(kmerLength);
        featureVector.setId(seqId.get());
       
        for (int i = 0; i <= sequence.length() - kmerLength; i += kmerLength) {
          int hash = Biology.getAAKmerHash(sequence.substring(i, i + kmerLength));
          if (hash >= 0) {
            featureVector.setKmer(hash);
          }
        }   
         
        context.write(seqId, featureVector);
      }
View Full Code Here

    int numOfSequences = 0;
    List<PresenceVector> vectors = new ArrayList<PresenceVector>();
   
    // copy all values
    for (PresenceVector value : values) {     
      vectors.add(new PresenceVector(value));
      ++numOfSequences;     
    }

    if (debug) {
      LOG.info("Number of sequences: " + numOfSequences);
    }
   
    PresenceVector clusterCenter = null;
    int minDistance = Integer.MAX_VALUE;
    for (int i = 0; i < vectors.size(); i++) {
      PresenceVector tempCenter = vectors.get(i);
      int distance = 0;
     
      for(int j=0; j < vectors.size(); j++){
        distance+= tempCenter.getHammingDistance(vectors.get(j));
      }
     
      if (distance < minDistance){
        clusterCenter = tempCenter;
        minDistance = distance;
View Full Code Here

        continue;

      SequenceFile.Reader reader = new SequenceFile.Reader(fs,srcFileStatus.getPath(), conf);

      LongWritable key = new LongWritable();
      PresenceVector value = new PresenceVector();

      while (reader.next(key, value) != false) {
        if (key.toString().equals("")) {
          break;
        }

        clusterCenters[currCenter] = new PresenceVector(value);
        ++currCenter;
        key.set(0);
      }
      reader.close();
    }
View Full Code Here

    if (debug) {
      LOG.info("Distances: " + distances.toString());
      LOG.info("Number of sequences: " + numOfSequences);
    }
   
    PresenceVector clusterCenter = new PresenceVector(kmerLength);
   
    // Normalize the presence vector.
    for (int i = 0; i < distances.length; i++) {
      // Distance metric relied on bit vectors of only 0 or 1, not counts.
      int distance = (int) Math.round(distances[i] / ((double) numOfSequences));
      if (distance > 0)
        clusterCenter.setKmer(i);
    }
   
    context.write(key, clusterCenter);
  }
View Full Code Here

      SequenceFile.Reader reader = new SequenceFile.Reader(fs,
          new Path(sequenceInputPath + "/part-r-"
          + formatter.format(currPart)), conf);

      LongWritable key = new LongWritable();
      PresenceVector value = new PresenceVector();

      LOG.info("Creating file at: " + tempInput
          + "/output-0/part-r-00000");

      for (int i = 0; i < new Integer(numClusters); i++) {
View Full Code Here

          continue;

        SequenceFile.Reader reader = new SequenceFile.Reader(fs, srcFileStatus.getPath(), conf);
     
        LongWritable key = new LongWritable();
        PresenceVector value = new PresenceVector();
       
        while (reader.next(key, value) != false) {
          if (key.toString().equals(""))
            break;

          centers[currCenter] = new PresenceVector(value);
          ++currCenter;
          if(currCenter % 100 == 0)
            LOG.info("currCenter: " + currCenter);
          key.set(0);
        }
View Full Code Here

TOP

Related Classes of cbcb.kmulus.util.PresenceVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.