Package org.encog.app.analyst.csv.basic

Source Code of org.encog.app.analyst.csv.basic.BasicFile

/*
* Encog(tm) Core v3.0 - Java Version
* http://www.heatonresearch.com/encog/
* http://code.google.com/p/encog-java/
* Copyright 2008-2011 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*  
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.app.analyst.csv.basic;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;

import org.encog.Encog;
import org.encog.NullStatusReportable;
import org.encog.StatusReportable;
import org.encog.app.analyst.script.AnalystScript;
import org.encog.app.analyst.script.DataField;
import org.encog.app.quant.QuantError;
import org.encog.app.quant.QuantTask;
import org.encog.util.csv.CSVFormat;
import org.encog.util.csv.ReadCSV;

/**
* Many of the Encog quant CSV processors are based upon this class. This class
* is not useful on its own. However, it does form the foundation for most Encog
* CSV file processing.
*/
public class BasicFile implements QuantTask {

  /**
   * The default report interval.
   */
  public static final int REPORT_INTERVAL = 10000;

  /**
   * Append a separator. The separator will only be appended if the line is
   * not empty.  This is used to build comma(or other) separated lists.
   *
   * @param line
   *            The line to append to.
   * @param format
   *       The format to use.
   */
  public static void appendSeparator(final StringBuilder line,
      final CSVFormat format) {
    if ((line.length() > 0)
        && !line.toString().endsWith(format.getSeparator() + "")) {
      line.append(format.getSeparator());
    }
  }

  /**
   * The column headings from the input file.
   */
  private String[] inputHeadings;

  /**
   * The desired precision when numbers must be written. Defaults to 10
   * decimal places.
   */
  private int precision;

  /**
   * Most Encog CSV classes must analyze a CSV file before actually processing
   * it. This property specifies if the file has been analyzed yet.
   */
  private boolean analyzed;

  /**
   * The input filename. This is the file being analyzed/processed.
   */
  private File inputFilename;

  /**
   * True, if input headers should be expected.
   */
  private boolean expectInputHeaders;

  /**
   * The format of the input file.
   */
  private CSVFormat inputFormat;

  /**
   * The number of columns in the input file.
   */
  private int columnCount;

  /**
   * Allows status to be reported. Defaults to no status reported.
   */
  private StatusReportable report;

  /**
   * The number of records to process before status is updated. Defaults to
   * 10k.
   */
  private int reportInterval;

  /**
   * The number of records to process. This is determined when the file is
   * analyzed.
   */
  private int recordCount;

  /**
   * The last time status was updated.
   */
  private int lastUpdate;

  /**
   * The current record.
   */
  private int currentRecord;

  /**
   * Should output headers be produced?
   */
  private boolean produceOutputHeaders;

  /**
   * True, if the process should stop.
   */
  private boolean cancel;

  /**
   * The output format, usually, the same as the input format.
   */
  private CSVFormat outputFormat;

  /**
   * The Encog script to use.
   */
  private AnalystScript script;

  /**
   * Construct the object, and set the defaults.
   */
  public BasicFile() {
    this.precision = Encog.DEFAULT_PRECISION;
    this.report = new NullStatusReportable();
    this.reportInterval = REPORT_INTERVAL;
    this.produceOutputHeaders = true;
    resetStatus();
  }

  /**
   * @return The column count.
   */
  public final int getColumnCount() {
    return this.columnCount;
  }

  /**
   * @return The input filename.
   */
  public final File getInputFilename() {
    return this.inputFilename;
  }

  /**
   * @return THe input format.
   */
  public final CSVFormat getInputFormat() {
    return this.inputFormat;
  }

  /**
   * @return The input headings.
   */
  public final String[] getInputHeadings() {
    return this.inputHeadings;
  }

  /**
   * @return the outputFormat
   */
  public final CSVFormat getOutputFormat() {
    return this.outputFormat;
  }

  /**
   * @return The precision to use.
   */
  public final int getPrecision() {
    return this.precision;
  }

  /**
   * @return Get the record count. File must have been analyzed first to read
   *         the record count.
   */
  public final int getRecordCount() {
    if (!this.analyzed) {
      throw new QuantError("Must analyze file first.");
    }
    return this.recordCount;

  }

  /**
   * @return The status reporting object.
   */
  public final StatusReportable getReport() {
    return this.report;
  }

  /**
   * @return The reporting interval, an update will be sent for every block of
   *         rows that matches the size of this property.
   */
  public final int getReportInterval() {
    return this.reportInterval;
  }

  /**
   * @return Has the file been analyzed.
   */
  public final boolean isAnalyzed() {
    return this.analyzed;
  }

  /**
   * @return True if we are expecting input headers.
   */
  public final boolean isExpectInputHeaders() {
    return this.expectInputHeaders;
  }

  /**
   * @return the produceOutputHeaders
   */
  public final boolean isProduceOutputHeaders() {
    return this.produceOutputHeaders;
  }

  /**
   * Perform a basic analyze of the file. This method is used mostly
   * internally.
   */
  public final void performBasicCounts() {
    if (this.outputFormat == null) {
      this.outputFormat = this.inputFormat;
    }

    resetStatus();
    int rc = 0;
    final ReadCSV csv = new ReadCSV(this.inputFilename.toString(),
        this.expectInputHeaders, this.inputFormat);
    while (csv.next() && !this.cancel) {
      updateStatus(true);
      rc++;
    }
    this.recordCount = rc;
    this.columnCount = csv.getColumnCount();

    readHeaders(csv);
    csv.close();
    reportDone(true);
  }

  /**
   * Prepare the output file, write headers if needed.
   *
   * @param outputFile
   *            The name of the output file.
   * @return The output stream for the text file.
   */
  public final PrintWriter prepareOutputFile(final File outputFile) {
    try {
      final PrintWriter tw = new PrintWriter(new FileWriter(outputFile));
      if (this.outputFormat == null) {
        this.outputFormat = this.inputFormat;
      }

      // write headers, if needed
      if (this.produceOutputHeaders) {
        int index = 0;
        final StringBuilder line = new StringBuilder();

        if (this.inputHeadings != null) {
          for (final String str : this.inputHeadings) {
            if (line.length() > 0) {
              line.append(this.outputFormat.getSeparator());
            }
            line.append("\"");
            line.append(str);
            line.append("\"");
            index++;
          }
        } else {
          for (int i = 0; i < this.columnCount; i++) {
            line.append("\"field:");
            line.append(i + 1);
            line.append("\"");
          }
        }
        tw.println(line.toString());
      }

      return tw;

    } catch (final IOException e) {
      throw new QuantError(e);
    }
  }

  /**
   * Read the headers from a CSV file. Used mostly internally.
   *
   * @param csv
   *            The CSV file to read from.
   */
  public final void readHeaders(final ReadCSV csv) {
    if (this.expectInputHeaders) {
      this.inputHeadings = new String[csv.getColumnNames().size()];
      for (int i = 0; i < csv.getColumnNames().size(); i++) {
        this.inputHeadings[i] = csv.getColumnNames().get(i);
      }
    } else {
      this.inputHeadings = new String[csv.getColumnCount()];
     
      int i = 0;
      if (this.getScript() != null) {
        for (DataField field : this.getScript().getFields()) {
          this.inputHeadings[i++] = field.getName();
        }
      }
     
      while (i < csv.getColumnCount()) {
        this.inputHeadings[i] = "field:" + i;
        i++;
      }
    }
  }

  /**
   * Report that we are done. Used internally.
   *
   * @param isAnalyzing
   *            True if we are analyzing.
   */
  public final void reportDone(final boolean isAnalyzing) {
    if (isAnalyzing) {
      this.report.report(this.recordCount, this.recordCount,
          "Done analyzing");
    } else {
      this.report.report(this.recordCount, this.recordCount,
          "Done processing");
    }
  }

  /**
   * Report that we are done. Used internally.
   *
   * @param task
   *            The message.
   */
  public final void reportDone(final String task) {
    this.report.report(this.recordCount, this.recordCount, task);
  }

  /**
   * Request a stop.
   */
  @Override
  public final void requestStop() {
    this.cancel = true;
  }

  /**
   * Reset the reporting stats. Used internally.
   */
  public final void resetStatus() {
    this.lastUpdate = 0;
    this.currentRecord = 0;
  }

  /**
   * Set to true, if the file has been analyzed.
   *
   * @param theAnalyzed
   *            True, if the file has been analyzed.
   */
  public final void setAnalyzed(final boolean theAnalyzed) {
    this.analyzed = theAnalyzed;
  }

  /**
   * Set the column count.
   *
   * @param theColumnCount
   *            The new column count.
   */
  public final void setColumnCount(final int theColumnCount) {
    this.columnCount = theColumnCount;
  }

  /**
   * Set the flag to determine if we are expecting input headers.
   *
   * @param theExpectInputHeaders Are input headers expected?
   */
  public final void setExpectInputHeaders(
      final boolean theExpectInputHeaders) {
    this.expectInputHeaders = theExpectInputHeaders;
  }

  /**
   * Set the input filename.
   *
   * @param theInputFilename
   *            The input filename.
   */
  public final void setInputFilename(final File theInputFilename) {
    this.inputFilename = theInputFilename;
  }

  /**
   * Set the input format.
   *
   * @param theInputFormat
   *            The new inputFormat format.
   */
  public final void setInputFormat(final CSVFormat theInputFormat) {
    this.inputFormat = theInputFormat;
  }

  /**
   * Set the input headings.
   *
   * @param theInputHeadings
   *            The new input headings.
   */
  public final void setInputHeadings(final String[] theInputHeadings) {
    this.inputHeadings = theInputHeadings;
  }

  /**
   * @param theOutputFormat
   *            the outputFormat to set
   */
  public final void setOutputFormat(final CSVFormat theOutputFormat) {
    this.outputFormat = theOutputFormat;
  }

  /**
   * Set the precision to use.
   *
   * @param thePrecision
   *            The precision to use.
   */
  public final void setPrecision(final int thePrecision) {
    this.precision = thePrecision;
  }

  /**
   * @param theProduceOutputHeaders
   *            the produceOutputHeaders to set
   */
  public final void setProduceOutputHeaders(
      final boolean theProduceOutputHeaders) {
    this.produceOutputHeaders = theProduceOutputHeaders;
  }

  /**
   * Set the record count.
   *
   * @param v
   *            The record count.
   */
  public final void setRecordCount(final int v) {
    this.recordCount = v;
  }

  /**
   * Set the status reporting object.
   *
   * @param theReport
   *            The status reporting object.
   */
  public final void setReport(final StatusReportable theReport) {
    this.report = theReport;
  }

  /**
   * Set the reporting interval.
   *
   * @param theReportInterval
   *            The new reporting interval.
   */
  public final void setReportInterval(final int theReportInterval) {
    this.reportInterval = theReportInterval;
  }

  /**
   * @return Should we stop?
   */
  @Override
  public final boolean shouldStop() {
    return this.cancel;
  }

  /** {@inheritDoc} */
  @Override
  public final String toString() {
    final StringBuilder result = new StringBuilder("[");
    result.append(getClass().getSimpleName());
    result.append(" inputFilename=");
    result.append(this.inputFilename);
    result.append(", recordCount=");
    result.append(this.recordCount);
    result.append("]");
    return result.toString();
  }

  /**
   * Update the status. Used internally.
   *
   * @param isAnalyzing
   *            True if we are in the process of analyzing.
   */
  public final void updateStatus(final boolean isAnalyzing) {
    if (isAnalyzing) {
      updateStatus("Analyzing");
    } else {
      updateStatus("Processing");
    }
  }

  /**
   * Report the current status.
   *
   * @param task
   *            The string to report.
   */
  public final void updateStatus(final String task) {
    boolean shouldDisplay = false;

    if (this.currentRecord == 0) {
      shouldDisplay = true;
    }

    this.currentRecord++;
    this.lastUpdate++;

    if (this.lastUpdate > this.reportInterval) {
      this.lastUpdate = 0;
      shouldDisplay = true;
    }

    if (shouldDisplay) {
      this.report.report(this.recordCount, this.currentRecord, task);
    }
  }

  /**
   * Validate that the file has been analyzed. Throw an error, if it has not.
   */
  public final void validateAnalyzed() {
    if (!this.analyzed) {
      throw new QuantError("File must be analyzed first.");
    }
  }

  /**
   * Write a row to the output file.
   *
   * @param tw
   *            The output stream.
   * @param row
   *            The row to write out.
   */
  public final void writeRow(final PrintWriter tw, final LoadedRow row) {
    final StringBuilder line = new StringBuilder();

    for (int i = 0; i < row.getData().length; i++) {
      BasicFile.appendSeparator(line, this.outputFormat);
      line.append(row.getData()[i]);
    }

    tw.println(line.toString());
  }

  /**
   * @return the script
   */
  public final AnalystScript getScript() {
    return script;
  }

  /**
   * @param theScript the script to set
   */
  public final void setScript(final AnalystScript theScript) {
    this.script = theScript;
  }

}
TOP

Related Classes of org.encog.app.analyst.csv.basic.BasicFile

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.