File bamFile = params.bamFile;
SAMFileReader
.setDefaultValidationStringency(ValidationStringency.SILENT);
SAMFileReader samFileReader = null;
if (params.bamFile == null) {
log.warn("No input file, reading from input...");
samFileReader = new SAMFileReader(System.in);
} else
samFileReader = new SAMFileReader(bamFile);
ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory
.getReferenceSequenceFile(params.referenceFasta);
BLOCK_PROTO.recordsPerSlice = params.maxSliceSize;
ReferenceSequence sequence = null;
List<SAMRecord> samRecords = new ArrayList<SAMRecord>(
params.maxContainerSize);
int prevSeqId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
SAMRecordIterator iterator = samFileReader.iterator();
{
String seqName = null;
SAMRecord samRecord = iterator.next();
if (samRecord == null)
throw new RuntimeException("No records found.");
seqName = samRecord.getReferenceName();
prevSeqId = samRecord.getReferenceIndex();
samRecords.add(samRecord);
// if (samFileReader.getFileHeader().getReadGroups().isEmpty()
// || samFileReader.getFileHeader().getReadGroup(
// Sam2CramRecordFactory.UNKNOWN_READ_GROUP_ID) == null) {
// log.info("Adding default read group.");
// SAMReadGroupRecord readGroup = new SAMReadGroupRecord(
// Sam2CramRecordFactory.UNKNOWN_READ_GROUP_ID);
//
// readGroup
// .setSample(Sam2CramRecordFactory.UNKNOWN_READ_GROUP_SAMPLE);
// samFileReader.getFileHeader().addReadGroup(readGroup);
// }
if (SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(seqName))
sequence = null;
else
sequence = Utils.trySequenceNameVariants(referenceSequenceFile,
seqName);
}
QualityScorePreservation preservation;
if (params.losslessQS)
preservation = new QualityScorePreservation("*40");
else
preservation = new QualityScorePreservation(params.qsSpec);
byte[] ref = sequence == null ? new byte[0] : sequence.getBases();
{
// hack:
int newLines = 0;
for (byte b : ref)
if (b == 10)
newLines++;
byte[] ref2 = new byte[ref.length - newLines];
int j = 0;
for (int i = 0; i < ref.length; i++)
if (ref[i] == 10)
continue;
else
ref2[j++] = ref[i];
ref = ref2;
}
OutputStream os;
if (params.outputCramFile != null) {
FileOutputStream fos = new FileOutputStream(params.outputCramFile);
os = new BufferedOutputStream(fos);
} else {
log.warn("No output file, writint to STDOUT.");
os = System.out;
}
if (params.encrypt) {
CipherOutputStream_256 cos = new CipherOutputStream_256(os, pass,
128);
os = cos.getCipherOutputStream();
}
CramHeader h = new CramHeader(2, 0, params.bamFile == null ? "STDIN"
: params.bamFile.getName(), samFileReader.getFileHeader());
long offset = ReadWrite.writeCramHeader(h, os);
long bases = 0;
long coreBytes = 0;
long[] externalBytes = new long[10];
BLOCK_PROTO.recordsPerSlice = params.maxSliceSize;
long globalRecordCounter = 0;
MessageDigest md5_MessageDigest = MessageDigest.getInstance("MD5");
do {
if (params.outputCramFile == null && System.out.checkError())
return;
SAMRecord samRecord = iterator.next();
if (samRecord == null)
// no more records
break;
if (samRecord.getReferenceIndex() != prevSeqId
|| samRecords.size() >= params.maxContainerSize) {
if (!samRecords.isEmpty()) {
List<CramRecord> records = convert(samRecords,
samFileReader.getFileHeader(), ref, preservation,
params.captureAllTags, params.captureTags,
params.ignoreTags);
samRecords.clear();
Container container = BLOCK_PROTO.buildContainer(records,
samFileReader.getFileHeader(),
params.preserveReadNames, globalRecordCounter,
null, true);
for (Slice s : container.slices) {
if (s.alignmentStart < 1) {
s.refMD5 = new byte[16];
Arrays.fill(s.refMD5, (byte) 0);
continue;
}
md5_MessageDigest.reset();
int span = Math.min(s.alignmentSpan, ref.length
- s.alignmentStart);
if (s.alignmentStart == 0)
System.out.println("gotcha");
md5_MessageDigest.update(ref, s.alignmentStart - 1,
span);
String sliceRef = new String(ref, s.alignmentStart - 1,
Math.min(span, 30));
s.refMD5 = md5_MessageDigest.digest();
log.debug("Slice ref starts with: " + sliceRef);
log.debug("Slice ref md5: "
+ (String.format("%032x", new BigInteger(1,
s.refMD5))));
}
globalRecordCounter += records.size();
records.clear();
long len = ReadWrite.writeContainer(container, os);
container.offset = offset;
offset += len;
log.info(String
.format("CONTAINER WRITE TIMES: header build time %dms, slices build time %dms, io time %dms.",
container.buildHeaderTime / 1000000,
container.buildSlicesTime / 1000000,
container.writeTime / 1000000));
for (Slice s : container.slices) {
coreBytes += s.coreBlock.getCompressedContent().length;
for (Integer i : s.external.keySet())
externalBytes[i] += s.external.get(i)
.getCompressedContent().length;
}
}
}
if (prevSeqId != samRecord.getReferenceIndex()) {
if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
sequence = Utils
.trySequenceNameVariants(referenceSequenceFile,
samRecord.getReferenceName());
ref = sequence.getBases();
{
// hack:
int newLines = 0;
for (byte b : ref)
if (b == 10)
newLines++;
byte[] ref2 = new byte[ref.length - newLines];
int j = 0;
for (int i = 0; i < ref.length; i++)
if (ref[i] == 10)
continue;
else
ref2[j++] = ref[i];
ref = ref2;
}
} else
ref = new byte[] {};
prevSeqId = samRecord.getReferenceIndex();
}
samRecords.add(samRecord);
bases += samRecord.getReadLength();
if (params.maxRecords-- < 1)
break;
} while (iterator.hasNext());
{ // copied for now, should be a subroutine:
if (!samRecords.isEmpty()) {
List<CramRecord> records = convert(samRecords,
samFileReader.getFileHeader(), ref, preservation,
params.captureAllTags, params.captureTags,
params.ignoreTags);
samRecords.clear();
Container container = BLOCK_PROTO.buildContainer(records,
samFileReader.getFileHeader(),
params.preserveReadNames, globalRecordCounter, null,
true);
for (Slice s : container.slices) {
if (s.alignmentStart < 1) {
s.refMD5 = new byte[16];
Arrays.fill(s.refMD5, (byte) 0);
continue;
}
md5_MessageDigest.reset();
int span = Math.min(s.alignmentSpan, ref.length
- s.alignmentStart);
if (s.alignmentStart == 0)
System.out.println("gotcha");
md5_MessageDigest.update(ref, s.alignmentStart - 1,
span);
String sliceRef = new String(ref, s.alignmentStart - 1,
Math.min(span, 30));
s.refMD5 = md5_MessageDigest.digest();
log.debug("Slice ref starts with: " + sliceRef);
log.debug("Slice ref md5: "
+ (String.format("%032x", new BigInteger(1,
s.refMD5))));
}
records.clear();
ReadWrite.writeContainer(container, os);
log.info(String
.format("CONTAINER WRITE TIMES: header build time %dms, slices build time %dms, io time %dms.",
container.buildHeaderTime / 1000000,
container.buildSlicesTime / 1000000,
container.writeTime / 1000000));
for (Slice s : container.slices) {
coreBytes += s.coreBlock.getCompressedContent().length;
for (Integer i : s.external.keySet())
externalBytes[i] += s.external.get(i)
.getCompressedContent().length;
}
}
}
iterator.close();
samFileReader.close();
os.close();
StringBuilder sb = new StringBuilder();
sb.append(String.format("STATS: core %.2f b/b", 8f * coreBytes / bases));
for (int i = 0; i < externalBytes.length; i++)