Package edu.ucla.sspace.temporal

Source Code of edu.ucla.sspace.temporal.TemporalSemanticSpaceUtils

/*
* Copyright 2009 David Jurgens
*
* This file is part of the S-Space package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package edu.ucla.sspace.temporal;

import edu.ucla.sspace.vector.Vector;
import edu.ucla.sspace.vector.VectorIO;

import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;

import java.util.Set;
import java.util.SortedSet;

import java.util.logging.Level;
import java.util.logging.Logger;

/**
* A collection of utility methods for interacting with {@link
* TemporalSemanticSpace} instances.
*
* <p> <a name="format"></a>
*
* A temporal semantic space can be written in one of two formats:
* <ul>
*
* <li> text
*
* </li>
*
* <li> binary
*
* </li>
*
* </ul>
*
* @see TemporalSemanticSpace
*/
public class TemporalSemanticSpaceUtils {

    private static final Logger LOGGER =
    Logger.getLogger(TemporalSemanticSpaceUtils.class.getName());
   
    /**
     * The type of formatting to use when writing a semantic space to a file.
     * See <a href="SemantSpaceUtils.html#format">here</a> for file format
     * specifications.
     */
    public enum TSSpaceFormat { TEXT, BINARY, SPARSE_TEXT, SPARSE_BINARY }

    /**
     * Uninstantiable
     */
    private TemporalSemanticSpaceUtils() { }

    /**
     * Loads and returns the {@link TemporalSemanticSpace} stored at the file
     * name in {@link TSSpaceFormat#TEXT text} format.
     *
     * @param sspaceFileName the name of a file containing a {@link
     *        TemporalSemanticSpace} that has been written to disk
     */
    public static TemporalSemanticSpace loadTemporalSemanticSpace(
            String sspaceFileName) {
        return loadTemporalSemanticSpace(new File(sspaceFileName),
                                         TSSpaceFormat.TEXT);
    }

    /**
     * Loads and returns the {@link TemporalSemanticSpace} stored at the file in
     * {@link TSSpaceFormat#TEXT text} format.
     *
     * @param sspaceFile a file containing a {@link TemporalSemanticSpace} that
     *                   has been written to disk
     */
    public static TemporalSemanticSpace loadTemporalSemanticSpace(
            File sspaceFile) {
        return loadTemporalSemanticSpace(sspaceFile, TSSpaceFormat.TEXT);
    }
   
    /**
     * Loads and returns the {@link TemporalSemanticSpace} stored at the file in
     * the specified format.
     *
     * @param sspaceFile a file containing a {@link TemporalSemanticSpace} that
     *        has been written to disk
     * @param format the format of the {@link TemporalSemanticSpace} in the file
     */
    public static TemporalSemanticSpace loadTemporalSemanticSpace(
            File sspaceFile, TSSpaceFormat format) {
        return new FileBasedTemporalSemanticSpace(sspaceFile, format);
    }

    /**
     * Writes the data contained in the {@link TemporalSemanticSpace} to the
     * file with the provided name using the {@link TSSpaceFormat#TEXT} format.
     * See <a href="#format">here</a> for file format specifications.
     */
    public static void printTemporalSemanticSpace(TemporalSemanticSpace sspace,
                                                  String outputFileName)
        throws IOException {
        printTemporalSemanticSpace(sspace, new File(outputFileName), TSSpaceFormat.TEXT);
    }

    /**
     * Writes the data contained in the {@link TemporalSemanticSpace} to the
     * provided file using the {@link TSSpaceFormat#TEXT} format.  See <a
     * href="#format">here</a> for file format specifications.
     */
    public static void printTemporalSemanticSpace(TemporalSemanticSpace sspace,
                                                  File output)
        throws IOException {
        printTemporalSemanticSpace(sspace, output, TSSpaceFormat.TEXT);
    }

    /**
     * Writes the data contained in the {@link TemporalSemanticSpace} to the
     * provided file and format.  See <a href="#format">here</a> for file format
     * specifications.
     */
    public static void printTemporalSemanticSpace(TemporalSemanticSpace sspace,
                                                  File output,
                                                  TSSpaceFormat format)
        throws IOException {

        switch (format) {
        case TEXT:
            printText(sspace, output);
            break;
        case SPARSE_TEXT:
            printSparseText(sspace, output);
            break;
        case BINARY:
            printBinary(sspace, output);
            break;
        case SPARSE_BINARY:
            printSparseBinary(sspace, output);
            break;
        default:
            throw new IllegalArgumentException("Unknown format type: "+ format);
        }
    }

    private static void printText(TemporalSemanticSpace sspace, File output)
        throws IOException {

        PrintWriter pw = new PrintWriter(output);

        Set<String> words = sspace.getWords();
        // determine how many dimensions are used by the vectors
        int dimensions = 0;
        if (words.size() > 0) {
            dimensions = sspace.getVectorLength();
        }

        int size = words.size();
        // print out how many vectors there are and the number of dimensions
        pw.println(size + " " + dimensions);

        int wordCount = 0;
        for (String word : words) {
            pw.print(word + "|");
            if (LOGGER.isLoggable(Level.INFO)) {
                LOGGER.info(String.format("serializing text %d/%d: %s",
                              wordCount++, size, word));
            }
            for (long timestep : sspace.getTimeSteps(word)) {
                Vector timeSlice =
                    sspace.getVectorBetween(word, timestep, timestep + 1);
                if (timeSlice != null) {
                    pw.print(timestep + " " +
                             VectorIO.toString(timeSlice) + "|");
                }
            }
            pw.println("");
        }
        pw.close();
    }

    /**
     *
     */
    private static void printSparseText(TemporalSemanticSpace sspace,
                    File output)
        throws IOException {

        PrintWriter pw = new PrintWriter(output);

        Set<String> words = sspace.getWords();
        // determine how many dimensions are used by the vectors
        int dimensions = 0;
        if (words.size() > 0) {
            dimensions = sspace.getVectorLength();
        }

        int size = words.size();
        // print out how many vectors there are and the number of dimensions
        pw.println(size + " " + dimensions);

        int wordCount = 0;
        for (String word : words) {
            pw.print(word + "|");
            if (LOGGER.isLoggable(Level.INFO)) {
                LOGGER.info(String.format("serializing sparse text %d/%d: %s",
                              wordCount++, size, word));
            }

            SortedSet<Long> timeSteps = sspace.getTimeSteps(word);

            for (long timestep : timeSteps) {
                Vector timeSlice =
                    sspace.getVectorBetween(word, timestep, timestep + 1);
                if (timeSlice != null) {
                    // count the non-zero
                    int nonZero = 0;
                    for (int i = 0; i < timeSlice.length(); ++i) {
                        if (timeSlice.getValue(i).doubleValue() != 0d) {
                            nonZero++;
                        }
                    }
                   
                    pw.print(timestep + " " + nonZero + "%");
                    StringBuilder sb = new StringBuilder(nonZero * 4);
                    for (int i = 0; i < timeSlice.length(); ++i) {
                        double d = timeSlice.getValue(i).doubleValue();
                        if (d != 0d) {
                            sb.append(i).append(",").append(d);
                        }
                        if (i + 1 < timeSlice.length()) {
                            sb.append(",");
                        }
                    }
                    pw.print(sb.toString() + "|");
                }
            }
            pw.println("");
        }
        pw.close();
    }

    /**
     *
     */
    private static void printBinary(TemporalSemanticSpace sspace, File output)
        throws IOException {

        DataOutputStream dos =
            new DataOutputStream(new FileOutputStream(output));

        Set<String> words = sspace.getWords();
        // determine how many dimensions are used by the vectors
        int dimensions = 0;
        if (words.size() > 0) {
            dimensions = sspace.getVectorLength();
        }

        int size = words.size();
        // print out how many vectors there are and the number of dimensions
        dos.writeInt(size);
        dos.writeInt(dimensions);

        int wordCount = 0;
        for (String word : words) {
            dos.writeUTF(word);
            if (LOGGER.isLoggable(Level.INFO)) {
                LOGGER.info(String.format("serializing binary %d/%d: %s",
                              wordCount++, size, word));
            }

            for (long timestep : sspace.getTimeSteps(word)) {
                Vector timeSlice =
                    sspace.getVectorBetween(word, timestep, timestep + 1);
                if (timeSlice != null) {
                    dos.writeLong(timestep);
                    for (int i = 0; i < timeSlice.length(); ++i) {
                        dos.writeDouble(timeSlice.getValue(i).doubleValue());
                    }
                }
            }
        }
        dos.close();
    }

    private static void printSparseBinary(TemporalSemanticSpace sspace,
                                          File output)
        throws IOException {

        DataOutputStream dos =
            new DataOutputStream(new FileOutputStream(output));

        Set<String> words = sspace.getWords();
        // determine how many dimensions are used by the vectors
        int dimensions = 0;
        if (words.size() > 0) {
            dimensions = sspace.getVectorLength();
        }

        int size = words.size();
        // print out how many vectors there are and the number of dimensions
        dos.writeInt(size);
        dos.writeInt(dimensions);

        int wordCount = 0;
        for (String word : words) {
            dos.writeUTF(word);
            if (LOGGER.isLoggable(Level.INFO)) {
                LOGGER.info(String.format("serializing sparse binary %d/%d: %s",
                              wordCount++, size, word));
            }

            SortedSet<Long> timeSteps = sspace.getTimeSteps(word);

            // write out the number of time steps seen for this word
            dos.writeInt(timeSteps.size());

            for (long timestep : timeSteps) {
                Vector timeSlice =
                    sspace.getVectorBetween(word, timestep, timestep + 1);
                if (timeSlice != null) {
                    // count the non-zero
                    int nonZero = 0;
                    for (int i = 0; i < timeSlice.length(); ++i) {
                        if (timeSlice.getValue(i).doubleValue()  != 0d) {
                            nonZero++;
                        }
                    }

                    dos.writeLong(timestep);
                    dos.writeInt(nonZero);
                    for (int i = 0; i < timeSlice.length(); ++i) {
                        double d = timeSlice.getValue(i).doubleValue();
                        if (d != 0d) {
                            dos.writeInt(i);
                            dos.writeDouble(d);
                        }
                    }
                }
            }
        }
        dos.close();
    }
}
TOP

Related Classes of edu.ucla.sspace.temporal.TemporalSemanticSpaceUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.