Package kmer

Examples of kmer.ProteinKmerCountFeatureVector


public class ProteinKmerCountFeatureVectorTest extends TestCase {
 
  /** Test for {@link ProteinKmerCountFeatureVectorTest#getIndexForSequence(String)}. */
  public void testGetIndexForSequence() {

    ProteinKmerCountFeatureVector vector = new ProteinKmerCountFeatureVector(2, 20);
    assertEquals(vector.getIndexForSequence("AR"), 1);
    assertEquals(vector.getIndexForSequence("VV"), 399);
   
    vector = new ProteinKmerCountFeatureVector(3, 20);
   
    assertEquals(vector.getIndexForSequence("AAA"), 0);
    assertEquals(vector.getIndexForSequence("AAR"), 1);
    assertEquals(vector.getIndexForSequence("AAD"), 3);
    assertEquals(vector.getIndexForSequence("RAA"), 400);
    assertEquals(vector.getIndexForSequence("RAR"), 401);
    assertEquals(vector.getIndexForSequence("VVV"), 7999);
   
    vector = new ProteinKmerCountFeatureVector(4, 20);

    assertEquals(vector.getIndexForSequence("AAAA"), 0);
    assertEquals(vector.getIndexForSequence("AAAA"), 0);
    assertEquals(vector.getIndexForSequence("EQHI"), 42569);
    assertEquals(vector.getIndexForSequence("VVVV"), 159999);   
  }
View Full Code Here


    byte[] testBytes = new byte[(int)Math.pow(20, 3)];
   
    initializeBytes(testBytes);
    testBytes[0] = (byte) 0x1;
   
    ProteinKmerCountFeatureVector vector = new ProteinKmerCountFeatureVector(testBytes, 3, 20);
   
    assertTrue(equalBytes(vector.getFeatureVector(), testBytes));

    testBytes[0] = (byte) 0x0;
   
    assertTrue(!equalBytes(vector.getFeatureVector(), testBytes));
  }
View Full Code Here

    assertTrue(!equalBytes(vector.getFeatureVector(), testBytes));
  }
 
  /** Test for {@link ProteinKmerCountFeatureVectorTest#ProteinKmerFeatureVector(byte[])}. */
  public void testPrintKmers() {   
    ProteinKmerCountFeatureVector vector = new ProteinKmerCountFeatureVector(3, 20);
   
    vector.parseKmersFromSequence("AVAA");
   
    System.out.println(vector.printKmers());
   
    assertTrue(true);
  }
View Full Code Here

    assertTrue(true);
  }
 
  /** Test for {@link ProteinKmerCountFeatureVectorTest#parseKmersFromSequence(String)}. */
  public void testSetBitsCorrectly() {
    ProteinKmerCountFeatureVector vector = new ProteinKmerCountFeatureVector(2, 20);
    String sequence = "AA";
   
    byte[] testBytes = new byte[(int) Math.pow(vector.getAlphabetSize(),
        vector.getKmerLength())];
   
    initializeBytes(testBytes);
    testBytes[0] = (byte) 0x1;
   
    vector.parseKmersFromSequence(sequence);
    byte[] bytes = vector.getFeatureVector();

    System.out.println(vector.printByteVector());
    System.out.println(printByteVector(testBytes));
   
    assertTrue(equalBytes(bytes, testBytes));
   
    sequence = "AR";

    vector = new ProteinKmerCountFeatureVector(2, 20);
    vector.parseKmersFromSequence(sequence);
    bytes = vector.getFeatureVector();
   
    testBytes[0] = (byte) 0x0;
    testBytes[1] = (byte) 0x1;
   
    System.out.println(vector.printByteVector());
    System.out.println(printByteVector(testBytes));
   
    assertTrue(equalBytes(bytes, testBytes));
   
    sequence = "AV";
   
    vector = new ProteinKmerCountFeatureVector(2, 20);
    vector.parseKmersFromSequence(sequence);
    bytes = vector.getFeatureVector();
   
    testBytes[1] = (byte) 0x0;
    testBytes[19] = (byte) 0x1;
   
    System.out.println(vector.printByteVector());
    System.out.println(printByteVector(testBytes));
   
    assertTrue(equalBytes(bytes, testBytes));
   
    sequence = "VV";
   
    vector = new ProteinKmerCountFeatureVector(2, 20);
    vector.parseKmersFromSequence(sequence);
    bytes = vector.getFeatureVector();
   
    testBytes[19] = (byte) 0x0;
    testBytes[399] = (byte) 0x1;
   
    System.out.println(vector.printByteVector());
    System.out.println(printByteVector(testBytes));
   
    assertTrue(equalBytes(bytes, testBytes));
  }
View Full Code Here

  }
 
  /** Test for {@link ProteinKmerCountFeatureVectorTest#getIndexForSequence(String)}. */
  public void testincrementCountAtPosition() {

    ProteinKmerCountFeatureVector vector = new ProteinKmerCountFeatureVector(2, 20);
   
    String sequence = "AAAAAAAA";
    vector.parseKmersFromSequence(sequence);
    assertEquals(vector.getFeatureVector()[0], 7);
   
    vector = new ProteinKmerCountFeatureVector(3, 20);
   
    sequence = "ARARARARAR";
   
    vector.parseKmersFromSequence(sequence);
    assertEquals(vector.getFeatureVector()[vector.getIndexForSequence("ARA")],
        vector.getFeatureVector()[vector.getIndexForSequence("RAR")])
  }
View Full Code Here

    // Randomly select a center.
    int item = new Random().nextInt(closestCenters.size());   
    closestCenter = closestCenters.get(item);
   
    if (debug) {
      ProteinKmerCountFeatureVector featureVector = new ProteinKmerCountFeatureVector(value.getBytes() /*vector*/, kmerLength);
      LOG.info("Closest Center: " + closestCenter + ", Distance: " + minDistance + ", FeatureVector: " + featureVector.printByteVector());
    }
   
    if (closestCenter == -1) {
     
    } else {
View Full Code Here

    Double[] distances = new Double[(int) Math.ceil(Math.pow(alphabetSize, kmerLength))];
    Arrays.fill(distances, 0.0);
   
    int numOfSequences = 0;
   
    ProteinKmerCountFeatureVector featureVector = null;
   
    // Create the feature vector.
    for (BytesWritable value : values) {
     
      byte[] valueVector = new byte[numberOfBytes];
      System.arraycopy(value.getBytes(), 0, valueVector, 0, valueVector.length);
     
      // ProteinKmerBitFeatureVector featureVector = new ProteinKmerBitFeatureVector(valueVector, kmerLength);
      featureVector = new ProteinKmerCountFeatureVector(valueVector, kmerLength);
     
      for (int position : featureVector.getPositionsSet()) {
        // OLD CODE for presence/absense:
        //distances[position] += 1;
       
        distances[position] += (int) featureVector.getFeatureVector()[position];
      }
     
      if (debug) {
        LOG.info(featureVector.printByteVector());
        LOG.info(featureVector.printKmers());
      }
     
      featureVector = null;     
      ++numOfSequences;     
    }

    if (debug) {
      LOG.info("Number of bytes: " + numberOfBytes);
      LOG.info("Distances: " + distances.toString());
      LOG.info("Number of sequences: " + numOfSequences);
    }
   
    // ProteinKmerBitFeatureVector clusterFeatureVector = new ProteinKmerBitFeatureVector(kmerLength);
    ProteinKmerCountFeatureVector clusterFeatureVector = new ProteinKmerCountFeatureVector(kmerLength);
   
    // Normalize the feature vector.
    for (int i = 0; i < distances.length; i++) {
      /* OLD distance metric relied on bit vectors of only 0 or 1, not counts.
      int distance = (int) Math.round(distances[i] / ((double) numOfSequences));
      if (distance > 0)
        clusterFeatureVector.setBitAtPosition(i);
      */
     
      byte distance = (byte) Math.round(distances[i] / ((double) numOfSequences));
      //if (distance > 0)
     
      clusterFeatureVector.getFeatureVector()[i] = distance;
    }
   

    if (debug) {
      LOG.info("New cluster center feature vector: " + clusterFeatureVector.printByteVector());
      LOG.info("New cluster center kmers: " + clusterFeatureVector.printKmers());
    }
   
    context.write(key, new BytesWritable(clusterFeatureVector.getFeatureVector()));
  }
View Full Code Here

TOP

Related Classes of kmer.ProteinKmerCountFeatureVector

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.