Package edu.uci.jforestsx.input.sparse

Examples of edu.uci.jforestsx.input.sparse.SparseTextFileReader


    valueHashMaps = new ArrayList<HashMap<Integer, Integer>>(featureCount);
    for (int f = 0; f < featureCount; f++) {
      valueHashMaps.add(new HashMap<Integer, Integer>());
    }

    SparseTextFileReader reader = new SparseTextFileReader();
    reader.open(textFile);
    SparseTextFileLine line = new SparseTextFileLine();
    HashMap<Integer, Integer> curMap;
    int key;
    instanceCount = 0;
    while (reader.loadNextLine(line)) {
      if (line.meta) {
        continue;
      }
      for (int i = 0; i < line.numPairs; i++) {
        FeatureValuePair pair = line.pairs[i];
        curMap = valueHashMaps.get(pair.featureIndex - 1);
        key = (int) pair.featureValue;
        Integer count = curMap.get(key);
        if (count == null) {
          count = 0;
        }
        count++;
        curMap.put(key, count);
      }
      handle(line);
      instanceCount++;
    }
    reader.close();
    System.out.println("  [Done in: " + timer.getElapsedSeconds() + " seconds.]");
  }
View Full Code Here


    }

    targets = new double[instanceCount];
    int[] zeroCount = new int[featureCount];

    SparseTextFileReader reader = new SparseTextFileReader();
    reader.open(textFile);
    SparseTextFileLine line = new SparseTextFileLine();
    int instanceIdx = 0;
    while (reader.loadNextLine(line)) {
      if (line.meta) {
        continue;
      }
      targets[instanceIdx] = line.target;
      for (int i = 0; i < line.numPairs; i++) {
        FeatureValuePair pair = line.pairs[i];
        int fidx = pair.featureIndex - 1;
        int index = Arrays.binarySearch(valueDistributions[fidx], (int) pair.featureValue);
        bins[fidx].set(instanceIdx, index);
        if (index == 0) {
          zeroCount[fidx]++;
        }
      }
      instanceIdx++;
    }
    reader.close();
    System.out.println("  [Done in: " + timer.getElapsedSeconds() + " seconds.]");
  }
View Full Code Here

*/

public class Sparse2SvmConvertor {

  public static void convert(String inputFile, String outputFile) throws Exception {
    SparseTextFileReader reader = new SparseTextFileReader();
    reader.open(inputFile);
    SparseTextFileLine line = new SparseTextFileLine();
    int count = 0;
    PrintStream output = new PrintStream(new File(outputFile));
    StringBuilder sb = new StringBuilder();
    int featureCount = 0;
    while (reader.loadNextLine(line)) {
      if (line.meta) {
        MetaData metaData = MetaLineParser.parse(line.content);
        if (metaData instanceof FeatureMetaData) {
          featureCount++;
        }
      } else {
        sb.setLength(0);
        sb.append(line.target);
        for (int i = 0; i < line.numPairs; i++) {
          FeatureValuePair pair = line.pairs[i];
          sb.append(" " + pair.featureIndex + ":" + pair.featureValue);
        }       
        sb.append("\n");
        output.print(sb.toString());
        count++;
        if (count % 10000 == 0) {
          System.out.println("\t Processed: " + count);
        }
      }
    }
    reader.close();
  }
View Full Code Here

  private int featureCount;
  private StringBuilder sb;
  private int targetColIdx;
 
  public void init(String inputFile, String outputFile, boolean dumpAsSparse) throws Exception {
    reader = new SparseTextFileReader();
    reader.open(inputFile);
    this.dumpAsSparse = dumpAsSparse;
   
    output = new PrintStream(new File(outputFile));
    output.println("@RELATION myrel");
View Full Code Here

    FeatureAnalyzer featureAnalyzer = new FeatureAnalyzer();
    featureAnalyzer.loadFeaturesFromFile(featuresStatFile);

    StringBuilder sb = new StringBuilder();
    double value;
    SparseTextFileReader reader = new SparseTextFileReader();
    SparseTextFileLine line = new SparseTextFileLine();
    reader.open(inputFilename);
    int intValue;
    try {
      PrintStream output = new PrintStream(new File(outputFilename));
      int count = 0;
      while (reader.loadNextLine(line)) {
        if (line.meta) {
          output.println(line.content);
          continue;
        }
        sb.setLength(0);
        sb.append(line.target);
        if (line.qid != null) {
          sb.append(" qid:" + line.qid);
        }


        // System.out.println("Converting line with qid:" + line.qid); // SISTA
        for (int i = 0; i < line.numPairs; i++) {
          FeatureValuePair pair = line.pairs[i];
          // System.out.println("\t" + pair.featureIndex + ":" + pair.featureValue); // SISTA
          value = pair.featureValue;
          int idx = pair.featureIndex - 1;
          if (featureAnalyzer.onLogScale[idx]) {
            value = (Math.log(value - featureAnalyzer.min[idx] + 1) * featureAnalyzer.factor[idx]);
          } else {
            value = (value - featureAnalyzer.min[idx]) * featureAnalyzer.factor[idx];
          }
          intValue = (int) Math.round(value);
          // System.out.println("\t\tdiscrete value: " + intValue); // SISTA
          if (intValue != 0) {
            sb.append(" " + pair.featureIndex + ":" + intValue);
          }
        }
        output.println(sb.toString());
        count++;
        if (count % 10000 == 0) {
          System.out.println(count);
        }
      }
      output.close();
    } catch (Exception e) {
      e.printStackTrace();
    }
    reader.close();
  }
View Full Code Here

    }
  }

  public void processFile(String inputFile) {
    System.out.println("Processing: " + inputFile);
    SparseTextFileReader reader = new SparseTextFileReader();
    reader.open(inputFile);
    SparseTextFileLine line = new SparseTextFileLine();
    FeatureStatistics stat;
    int count = 0;
    int maxFeatureIndex = 0;
    while (reader.loadNextLine(line)) {
      if (line.meta) {
        MetaData metaData = MetaLineParser.parse(line.content);
        if (metaData instanceof FeatureMetaData) {
          fid2name.put(((FeatureMetaData) metaData).id, ((FeatureMetaData) metaData).name);
        }
      } else {
        int prevIdx = 0;
        for (int i = 0; i < line.numPairs; i++) {
          FeatureValuePair pair = line.pairs[i];
          if (pair.featureIndex != (prevIdx + 1)) {
            for (int f = prevIdx + 1; f < pair.featureIndex; f++) {
              stat = fid2statistics.get(f);
              if (stat != null) {
                if (stat.maxValue < 0) {
                  stat.maxValue = 0;
                }
                if (stat.minValue > 0) {
                  stat.minValue = 0;
                }
              }
            }
          }
          stat = fid2statistics.get(pair.featureIndex);
          if (stat == null) {
            stat = new FeatureStatistics();
            fid2statistics.put(pair.featureIndex, stat);
            if (count > 0) {
              stat.minValue = 0;
              stat.maxValue = 0;
            }
            if (pair.featureIndex > maxFeatureIndex) {
              maxFeatureIndex = pair.featureIndex;
            }
          }
          if (Double.isInfinite(pair.featureValue)) {
            System.out.println(count + "\t" + pair.featureValue);
          }
          if (pair.featureValue > stat.maxValue) {
            stat.maxValue = pair.featureValue;
          }
          if (pair.featureValue < stat.minValue) {
            stat.minValue = pair.featureValue;
          }         
          prevIdx = pair.featureIndex;
        }
        if (prevIdx < maxFeatureIndex) {
          for (int f = prevIdx + 1; f <= maxFeatureIndex; f++) {
            stat = fid2statistics.get(f);
            if (stat != null) {
              if (stat.maxValue < 0) {
                stat.maxValue = 0;
              }
              if (stat.minValue > 0) {
                stat.minValue = 0;
              }
            }
          }
        }
        count++;
        if (count % 100000 == 0) {
          System.out.println("\t Processed: " + count);
          dumpStatistics(System.out);
        }
      }
    }
    reader.close();
    loadStatistics();   
  }
View Full Code Here

TOP

Related Classes of edu.uci.jforestsx.input.sparse.SparseTextFileReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.