Package org.integratedmodelling.riskwiz.learning.data.loader

Source Code of org.integratedmodelling.riskwiz.learning.data.loader.XRFFLoader

/*
*    This program is free software; you can redistribute it and/or modify
*    it under the terms of the GNU General Public License as published by
*    the Free Software Foundation; either version 2 of the License, or
*    (at your option) any later version.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU General Public License for more details.
*
*    You should have received a copy of the GNU General Public License
*    along with this program; if not, write to the Free Software
*    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
* XRFFLoader.java
* Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
*
*/

package org.integratedmodelling.riskwiz.learning.data.loader;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.util.zip.GZIPInputStream;

import org.integratedmodelling.riskwiz.learning.data.Instance;
import org.integratedmodelling.riskwiz.learning.data.Instances;
import org.integratedmodelling.riskwiz.learning.data.xml.XMLInstances;


/**
<!-- globalinfo-start -->
* Reads a source that is in the XML version of the ARFF format. It automatically decompresses the data if the extension is '.xrff.gz'.
* <p/>
<!-- globalinfo-end -->
*
* @author FracPete (fracpete at waikato dot ac dot nz)
* @version $Revision: 1.3 $
* @see Loader
*/
public class XRFFLoader extends AbstractFileLoader
        implements BatchConverter, URLSourcedLoader {

    /** for serialization */
    private static final long serialVersionUID = 3764533621135196582L;

    /** the file extension */
    public static String FILE_EXTENSION = XMLInstances.FILE_EXTENSION;

    /** the extension for compressed files */
    public static String FILE_EXTENSION_COMPRESSED = FILE_EXTENSION + ".gz";

    /** the url */
    protected String m_URL = "http://";

    /** The reader for the source file. */
    protected transient Reader m_sourceReader = null;

    /** the loaded XML document */
    protected XMLInstances m_XMLInstances;
 
    /**
     * Returns a string describing this Loader
     *
     * @return     a description of the Loader suitable for
     *       displaying in the explorer/experimenter gui
     */
    public String globalInfo() {
        return
                "Reads a source that is in the XML version of the ARFF format. "
                + "It automatically decompresses the data if the extension is '"
                + FILE_EXTENSION_COMPRESSED + "'.";
    }

    /**
     * Get the file extension used for libsvm files
     *
     * @return     the file extension
     */
    @Override
  public String getFileExtension() {
        return FILE_EXTENSION;
    }

    /**
     * Gets all the file extensions used for this type of file
     *
     * @return the file extensions
     */
    @Override
  public String[] getFileExtensions() {
        return new String[] { FILE_EXTENSION, FILE_EXTENSION_COMPRESSED};
    }

    /**
     * Returns a description of the file type.
     *
     * @return     a short file description
     */
    @Override
  public String getFileDescription() {
        return "XRFF data files";
    }

    /**
     * Resets the Loader ready to read a new data set
     *
     * @throws IOException   if something goes wrong
     */
    @Override
  public void reset() throws IOException {
        m_structure = null;
        m_XMLInstances = null;

        setRetrieval(NONE);
   
        if ((m_File != null) && (new File(m_File)).isFile()) {
            setFile(new File(m_File));
        } else if ((m_URL != null) && !m_URL.equals("http://")) {
            setURL(m_URL);
        }
    }

    /**
     * Resets the Loader object and sets the source of the data set to be
     * the supplied File object.
     *
     * @param file     the source file.
     * @throws IOException   if an error occurs
     */
    @Override
  public void setSource(File file) throws IOException {
        m_structure = null;
        m_XMLInstances = null;
   
        setRetrieval(NONE);

        if (file == null) {
            throw new IOException("Source file object is null!");
        }

        try {
            if (file.getName().endsWith(FILE_EXTENSION_COMPRESSED)) {
                setSource(new GZIPInputStream(new FileInputStream(file)));
            } else {
                setSource(new FileInputStream(file));
            }
        } catch (FileNotFoundException ex) {
            throw new IOException("File not found");
        }
   
        m_sourceFile = file;
        m_File = file.getAbsolutePath();
    }

    /**
     * Resets the Loader object and sets the source of the data set to be
     * the supplied url.
     *
     * @param url   the source url.
     * @throws IOException   if an error occurs
     */
    public void setSource(URL url) throws IOException {
        m_structure = null;
        m_XMLInstances = null;
   
        setRetrieval(NONE);
   
        setSource(url.openStream());

        m_URL = url.toString();
    }
 
    /**
     * Set the url to load from
     *
     * @param url     the url to load from
     * @throws IOException   if the url can't be set.
     */
    @Override
  public void setURL(String url) throws IOException {
        m_URL = url;
        setSource(new URL(url));
    }

    /**
     * Return the current url
     *
     * @return the current url
     */
    @Override
  public String retrieveURL() {
        return m_URL;
    }

    /**
     * Resets the Loader object and sets the source of the data set to be
     * the supplied InputStream.
     *
     * @param in       the source InputStream.
     * @throws IOException   if initialization of reader fails.
     */
    @Override
  public void setSource(InputStream in) throws IOException {
        m_File = (new File(System.getProperty("user.dir"))).getAbsolutePath();
        m_URL = "http://";

        m_sourceReader = new BufferedReader(new InputStreamReader(in));
    }
 
    /**
     * Determines and returns (if possible) the structure (internally the
     * header) of the data set as an empty set of instances.
     *
     * @return       the structure of the data set as an empty set
     *         of Instances
     * @throws IOException   if an error occurs
     */
    @Override
  public Instances getStructure() throws IOException {
        if (m_sourceReader == null) {
            throw new IOException("No source has been specified");
        }

        if (m_structure == null) {
            try {
                m_XMLInstances = new XMLInstances(m_sourceReader);
                m_structure = new Instances(m_XMLInstances.getInstances(), 0);
            } catch (IOException ioe) {
                // just re-throw it
                throw ioe;
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        return new Instances(m_structure, 0);
    }
 
    /**
     * Return the full data set. If the structure hasn't yet been determined
     * by a call to getStructure then method should do so before processing
     * the rest of the data set.
     *
     * @return       the structure of the data set as an empty
     *         set of Instances
     * @throws IOException   if there is no source or parsing fails
     */
    @Override
  public Instances getDataSet() throws IOException {
        if (m_sourceReader == null) {
            throw new IOException("No source has been specified");
        }
   
        if (getRetrieval() == INCREMENTAL) {
            throw new IOException(
                    "Cannot mix getting Instances in both incremental and batch modes");
        }

        setRetrieval(BATCH);
        if (m_structure == null) {
            getStructure();
        }

        return m_XMLInstances.getInstances();
    }

    /**
     * XRFFLoader is unable to process a data set incrementally.
     *
     * @param structure    ignored
     * @return       never returns without throwing an exception
     * @throws IOException   always. XRFFLoader is unable to process a
     *         data set incrementally.
     */
    @Override
  public Instance getNextInstance(Instances structure) throws IOException {
        throw new IOException("XRFFLoader can't read data sets incrementally.");
    }

    /**
     * Main method.
     *
     * @param args   should contain the name of an input file.
     */
    public static void main(String[] args) {
        runFileLoader(new XRFFLoader(), args);
    }
}
TOP

Related Classes of org.integratedmodelling.riskwiz.learning.data.loader.XRFFLoader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.