}
}
public void processFile(String inputFile) {
System.out.println("Processing: " + inputFile);
SparseTextFileReader reader = new SparseTextFileReader();
reader.open(inputFile);
SparseTextFileLine line = new SparseTextFileLine();
FeatureStatistics stat;
int count = 0;
int maxFeatureIndex = 0;
while (reader.loadNextLine(line)) {
if (line.meta) {
MetaData metaData = MetaLineParser.parse(line.content);
if (metaData instanceof FeatureMetaData) {
fid2name.put(((FeatureMetaData) metaData).id, ((FeatureMetaData) metaData).name);
}
} else {
int prevIdx = 0;
for (int i = 0; i < line.numPairs; i++) {
FeatureValuePair pair = line.pairs[i];
if (pair.featureIndex != (prevIdx + 1)) {
for (int f = prevIdx + 1; f < pair.featureIndex; f++) {
stat = fid2statistics.get(f);
if (stat != null) {
if (stat.maxValue < 0) {
stat.maxValue = 0;
}
if (stat.minValue > 0) {
stat.minValue = 0;
}
}
}
}
stat = fid2statistics.get(pair.featureIndex);
if (stat == null) {
stat = new FeatureStatistics();
fid2statistics.put(pair.featureIndex, stat);
if (count > 0) {
stat.minValue = 0;
stat.maxValue = 0;
}
if (pair.featureIndex > maxFeatureIndex) {
maxFeatureIndex = pair.featureIndex;
}
}
if (Double.isInfinite(pair.featureValue)) {
System.out.println(count + "\t" + pair.featureValue);
}
if (pair.featureValue > stat.maxValue) {
stat.maxValue = pair.featureValue;
}
if (pair.featureValue < stat.minValue) {
stat.minValue = pair.featureValue;
}
prevIdx = pair.featureIndex;
}
if (prevIdx < maxFeatureIndex) {
for (int f = prevIdx + 1; f <= maxFeatureIndex; f++) {
stat = fid2statistics.get(f);
if (stat != null) {
if (stat.maxValue < 0) {
stat.maxValue = 0;
}
if (stat.minValue > 0) {
stat.minValue = 0;
}
}
}
}
count++;
if (count % 100000 == 0) {
System.out.println("\t Processed: " + count);
dumpStatistics(System.out);
}
}
}
reader.close();
loadStatistics();
}