BufferedReader reader = null;
PrintWriter writer = null;
SortingCollection cltn = getSortingCollection(maxRecords, tmpDir);
try {
reader = new BufferedReader(new FileReader(inputFile));
writer = new PrintWriter(new BufferedWriter(new FileWriter(outputFile)));
// Parse the header line
String headerLine = reader.readLine();
String[] tokens = headerLine.split("\t");
//The sample names in a GCT file start at column 2,
int sampleStart = 2;
String nextLine = null;
TrackType dataType = TrackType.GENE_EXPRESSION;
while ((nextLine = reader.readLine()) != null) {
// A gct row can map to multiple loci, normally this indicates a problem with the probe
DataRow row = new DataRow(nextLine);
String probe = row.getProbe();
if (probe.startsWith("cg")) {
dataType = TrackType.DNA_METHYLATION;
}
List<Locus> loci = locusHelper.getLoci(probe, row.getDescription(), genome.getId());
if (loci == null || loci.isEmpty()) {
System.out.println("No locus found for: " + probe + " " + row.getDescription());
} else {
for (Locus locus : loci) {
String igvLine = locus.getChr() + "\t" + locus.getStart() + "\t" + locus.getEnd() + "\t" + probe +
row.getData();
cltn.add(new SortableRecord(locus.getChr(), locus.getStart(), igvLine));
}
}
}
writer.println("#type=" + dataType.toString());
writer.print("Chr\tStart\tEnd\tProbe");
for (int i = sampleStart; i < tokens.length; i++) {
writer.print("\t" + tokens[i]);
}
writer.println();
// Ouputput the sorted file
CloseableIterator<SortableRecord> iter = cltn.iterator();
while (iter.hasNext()) {
SortableRecord al = iter.next();
writer.println(al.getText());
}