}
Encoding<Integer> bestEncoding = calc.getBestEncoding();
h.eMap.put(
EncodingKey.AP_AlignmentPositionOffset,
new EncodingParams(bestEncoding.id(), bestEncoding
.toByteArray()));
}
{ // read group
HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
for (CramRecord r : records)
calculator.add(r.getReadGroupID());
calculator.calculate();
h.eMap.put(EncodingKey.RG_ReadGroup, HuffmanIntegerEncoding
.toParam(calculator.values(), calculator.bitLens()));
}
{ // read name encoding:
HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
for (CramRecord r : records)
calculator.add(r.getReadName().length());
calculator.calculate();
h.eMap.put(EncodingKey.RN_ReadName, ByteArrayLenEncoding.toParam(
HuffmanIntegerEncoding.toParam(calculator.values(),
calculator.bitLens()), ExternalByteArrayEncoding
.toParam(readNameID)));
// h.eMap.put(EncodingKey.RN_ReadName,
// ByteArrayStopEncoding.toParam((byte) 0, readNameID));
}
{ // records to next fragment
IntegerEncodingCalculator calc = new IntegerEncodingCalculator(
"records to next fragment");
for (CramRecord r : records)
calc.addValue(r.getRecordsToNextFragment());
Encoding<Integer> bestEncoding = calc.getBestEncoding();
h.eMap.put(
EncodingKey.NF_RecordsToNextFragment,
new EncodingParams(bestEncoding.id(), bestEncoding
.toByteArray()));
}
{ // tag count
HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
for (CramRecord r : records)
calculator.add(r.tags == null ? 0 : r.tags.length);
calculator.calculate();
h.eMap.put(EncodingKey.TC_TagCount, HuffmanIntegerEncoding.toParam(
calculator.values(), calculator.bitLens()));
}
{ // tag name and type
HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
for (CramRecord r : records) {
if (r.tags == null)
continue;
for (ReadTag tag : r.tags)
calculator.add(tag.keyType3BytesAsInt);
}
calculator.calculate();
h.eMap.put(EncodingKey.TN_TagNameAndType, HuffmanIntegerEncoding
.toParam(calculator.values(), calculator.bitLens()));
}
{
Comparator<ReadTag> comparator = new Comparator<ReadTag>() {
@Override
public int compare(ReadTag o1, ReadTag o2) {
return o1.keyType3BytesAsInt - o2.keyType3BytesAsInt;
}
};
Comparator<byte[]> baComparator = new Comparator<byte[]>() {
@Override
public int compare(byte[] o1, byte[] o2) {
if (o1.length - o2.length != 0)
return o1.length - o2.length;
for (int i = 0; i < o1.length; i++)
if (o1[i] != o2[i])
return o1[i] - o2[i];
return 0;
}
};
Map<byte[], MutableInt> map = new TreeMap<byte[], MutableInt>(
baComparator);
MutableInt noTagCounter = new MutableInt();
map.put(new byte[0], noTagCounter);
for (CramRecord r : records) {
if (r.tags == null) {
noTagCounter.value++;
r.tagIdsIndex = noTagCounter;
continue;
}
Arrays.sort(r.tags, comparator);
r.tagIds = new byte[r.tags.length * 3];
int tagIndex = 0;
for (int i = 0; i < r.tags.length; i++) {
r.tagIds[i * 3] = (byte) r.tags[tagIndex].keyType3Bytes
.charAt(0);
r.tagIds[i * 3 + 1] = (byte) r.tags[tagIndex].keyType3Bytes
.charAt(1);
r.tagIds[i * 3 + 2] = (byte) r.tags[tagIndex].keyType3Bytes
.charAt(2);
tagIndex++;
}
MutableInt count = map.get(r.tagIds);
if (count == null) {
count = new MutableInt();
map.put(r.tagIds, count);
}
count.value++;
r.tagIdsIndex = count;
}
byte[][][] dic = new byte[map.size()][][];
int i = 0;
HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
for (byte[] idsAsBytes : map.keySet()) {
int nofIds = idsAsBytes.length / 3;
dic[i] = new byte[nofIds][];
for (int j = 0; j < idsAsBytes.length;) {
int idIndex = j / 3;
dic[i][idIndex] = new byte[3];
dic[i][idIndex][0] = idsAsBytes[j++];
dic[i][idIndex][1] = idsAsBytes[j++];
dic[i][idIndex][2] = idsAsBytes[j++];
}
calculator.add(i, map.get(idsAsBytes).value);
map.get(idsAsBytes).value = i++;
}
calculator.calculate();
h.eMap.put(EncodingKey.TL_TagIdList, HuffmanIntegerEncoding
.toParam(calculator.values(), calculator.bitLens()));
h.dictionary = dic;
}
{ // tag values
Map<Integer, HuffmanParamsCalculator> cc = new TreeMap<Integer, HuffmanParamsCalculator>();
for (CramRecord r : records) {
if (r.tags == null)
continue;
for (ReadTag tag : r.tags) {
switch (tag.keyType3BytesAsInt) {
// case ReadTag.OQZ:
// case ReadTag.BQZ:
// EncodingParams params = h.tMap
// .get(tag.keyType3BytesAsInt);
// if (params == null) {
// h.tMap.put(tag.keyType3BytesAsInt,
// ByteArrayStopEncoding.toParam((byte) 1,
// tagValueExtID));
// }
// break;
default:
HuffmanParamsCalculator c = cc
.get(tag.keyType3BytesAsInt);
if (c == null) {
c = new HuffmanParamsCalculator();
cc.put(tag.keyType3BytesAsInt, c);
}
c.add(tag.getValueAsByteArray().length);
break;
}
}
}
if (!cc.isEmpty())
for (Integer key : cc.keySet()) {
HuffmanParamsCalculator c = cc.get(key);
c.calculate();
h.tMap.put(key, ByteArrayLenEncoding.toParam(
HuffmanIntegerEncoding.toParam(c.values(),
c.bitLens()),
ExternalByteArrayEncoding.toParam(tagValueExtID)));
}
for (Integer key : h.tMap.keySet()) {
log.debug(String.format("TAG ENCODING: %d, %s", key,
h.tMap.get(key)));
}
// for (CramRecord r : records) {
// if (r.tags == null || r.tags.isEmpty())
// continue;
// for (ReadTag tag : r.tags) {
// EncodingParams params = h.tMap.get(tag.keyType3BytesAsInt);
// if (params == null) {
// h.tMap.put(tag.keyType3BytesAsInt,
// ByteArrayStopEncoding.toParam((byte) 0,
// tagValueExtID));
// }
// }
// }
}
{ // number of read features
HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
for (CramRecord r : records)
calculator.add(r.getReadFeatures() == null ? 0 : r
.getReadFeatures().size());
calculator.calculate();
h.eMap.put(EncodingKey.FN_NumberOfReadFeatures,
HuffmanIntegerEncoding.toParam(calculator.values(),
calculator.bitLens()));
}
{ // feature position
IntegerEncodingCalculator calc = new IntegerEncodingCalculator(
"read feature position");
for (CramRecord r : records) {
int prevPos = 0;
if (r.getReadFeatures() == null)
continue;
for (ReadFeature rf : r.getReadFeatures()) {
calc.addValue(rf.getPosition() - prevPos);
prevPos = rf.getPosition();
}
}
Encoding<Integer> bestEncoding = calc.getBestEncoding();
h.eMap.put(EncodingKey.FP_FeaturePosition, new EncodingParams(
bestEncoding.id(), bestEncoding.toByteArray()));
}
{ // feature code
HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();