Package edu.cmu.sphinx.frontend.transform

Source Code of edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform

/*
* Copyright 1999-2004 Carnegie Mellon University. 
* Portions Copyright 2002-2004 Sun Microsystems, Inc. 
* Portions Copyright 2002-2004 Mitsubishi Electric Research Laboratories.
* All Rights Reserved.  Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/

package edu.cmu.sphinx.frontend.transform;

import edu.cmu.sphinx.frontend.BaseDataProcessor;
import edu.cmu.sphinx.frontend.Data;
import edu.cmu.sphinx.frontend.DataProcessingException;
import edu.cmu.sphinx.frontend.DoubleData;
import edu.cmu.sphinx.util.Complex;
import edu.cmu.sphinx.util.props.*;


/**
* Computes the Discrete Fourier Transform (FT) of an input sequence, using Fast Fourier Transform (FFT). Fourier
* Transform is the process of analyzing a signal into its frequency components. In speech, rather than analyzing the
* signal over its entire duration, we analyze one <b>window</b> of audio data. This window is the product of applying a
* sliding Hamming window to the signal. Moreover, since the amplitude is a lot more important than the phase for speech
* recognition, this class returns the power spectrum of that window of data instead of the complex spectrum. Each value
* in the returned spectrum represents the strength of that particular frequency for that window of data.
* <p/>
* By default, the number of FFT points is the closest power of 2 that is equal to or larger than the number of samples
* in the incoming window of data. The FFT points can also be set by the user with the property defined by {@link
* #PROP_NUMBER_FFT_POINTS}. The length of the returned power spectrum is the number of FFT points, divided by 2, plus
* 1. Since the input signal is real, the FFT is symmetric, and the information contained in the whole vector is already
* present in its first half.
* <p/>
* Note that each call to {@link #getData() getData} only returns the spectrum of one window of data. To display the
* spectrogram of the entire original audio, one has to collect all the spectra from all the windows generated from the
* original data. A spectrogram is a two dimensional representation of three dimensional information. The horizontal
* axis represents time. The vertical axis represents the frequency. If we slice the spectrogram at a given time, we get
* the spectrum computed as the short term Fourier transform of the signal windowed around that time stamp. The
* intensity of the spectrum for each time frame is given by the color in the graph, or by the darkness in a gray scale
* plot. The spectrogram can be thought of as a view from the top of a surface generated by concatenating the spectral
* vectors obtained from the windowed signal.
* <p/>
* For example, Figure 1 below shows the audio signal of the utterance "one three nine oh", and Figure 2 shows its
* spectrogram, produced by putting together all the spectra returned by this FFT. Frequency is on the vertical axis,
* and time is on the horizontal axis. The darkness of the shade represents the strength of that frequency at that point
* in time: <p> <br><img src="doc-files/139o.jpg"> <br><b>Figure 1: The audio signal of the utterance "one three nine
* oh".</b> <p> <br><img src="doc-files/139ospectrum.jpg"> <br><b>Figure 2: The spectrogram of the utterance "one three
* nine oh" in Figure 1.</b>
*/
public class DiscreteFourierTransform extends BaseDataProcessor {

    /** The property for the number of points in the Fourier Transform. */
    @S4Integer(defaultValue = -1)
    public static final String PROP_NUMBER_FFT_POINTS = "numberFftPoints";

    /** The property for the invert transform. */
    @S4Boolean(defaultValue = false)
    public static final String PROP_INVERT = "invert";

    private boolean isNumberFftPointsSet;
    private int numberFftPoints;
    private int logBase2NumberFftPoints;
    private int numberDataPoints;
    private boolean invert;

    private Complex[] weightFft;
    private Complex[] inputFrame;
    private Complex[] from;
    private Complex[] to;

    private Complex weightFftTimesFrom2;
    private Complex tempComplex;

    public DiscreteFourierTransform( int numberFftPoints, boolean invert ) {
        initLogger();
        this.numberFftPoints = numberFftPoints;
        this.isNumberFftPointsSet = (numberFftPoints != -1);
        this.invert = invert;
    }

    public DiscreteFourierTransform() {
    }

   /*
    * (non-Javadoc)
    *
    * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
    */
    @Override
    public void newProperties(PropertySheet ps) throws PropertyException {
        super.newProperties(ps);
        logger = ps.getLogger();
        numberFftPoints = ps.getInt(PROP_NUMBER_FFT_POINTS);
        isNumberFftPointsSet = (numberFftPoints != -1);
        invert = ps.getBoolean(PROP_INVERT);
    }


    /* (non-Javadoc)
    * @see edu.cmu.sphinx.frontend.DataProcessor#initialize(edu.cmu.sphinx.frontend.CommonConfig)
    */
    @Override
    public void initialize() {
        super.initialize();
        if (isNumberFftPointsSet) {
            initializeFFT();
        }
    }


    /** Initialize all the data structures necessary for computing FFT. */
    private void initializeFFT() {
        /**
         * Number of points in the FFT. By default, the value is 512,
         * which means that we compute 512 values around a circle in the
         * complex plane. Complex conjugate pairs will yield the same
         * power, therefore the power produced by indices 256 through
         * 511 are symmetrical with the ones between 1 and 254. Therefore,
         * we need only return values between 0 and 255.
         */
        computeLogBase2(numberFftPoints);
        createWeightFft(numberFftPoints, invert);
        initComplexArrays();
        weightFftTimesFrom2 = new Complex();
        tempComplex = new Complex();
    }


    /** Initialize all the Complex arrays that will be necessary for FFT. */
    private void initComplexArrays() {

        inputFrame = new Complex[numberFftPoints];
        from = new Complex[numberFftPoints];
        to = new Complex[numberFftPoints];

        for (int i = 0; i < numberFftPoints; i++) {
            inputFrame[i] = new Complex();
            from[i] = new Complex();
            to[i] = new Complex();
        }
    }


    /**
     * Process data, creating the power spectrum from an input frame.
     *
     * @param input the input frame
     * @return a DoubleData that is the power spectrum of the input frame
     * @throws java.lang.IllegalArgumentException
     *
     */
    private DoubleData process(DoubleData input)
            throws IllegalArgumentException {

        /**
         * Create complex input sequence equivalent to the real
         * input sequence.
         * If the number of points is less than the window size,
         * we incur in aliasing. If it's greater, we pad the input
         * sequence with zeros.
         */
        double[] in = input.getValues();

        if (numberFftPoints < in.length) {
            int i = 0;
            for (; i < numberFftPoints; i++) {
                inputFrame[i].set(in[i], 0.0f);
            }
            for (; i < in.length; i++) {
                tempComplex.set(in[i], 0.0f);
                inputFrame[i % numberFftPoints].addComplex
                        (inputFrame[i % numberFftPoints], tempComplex);
            }
        } else {
            int i = 0;
            for (; i < in.length; i++) {
                inputFrame[i].set(in[i], 0.0f);
            }
            for (; i < numberFftPoints; i++) {
                inputFrame[i].reset();
            }
        }

        /**
         * Create output sequence.
         */
        double[] outputSpectrum = new double[(numberFftPoints >> 1) + 1];

        /**
         * Start Fast Fourier Transform recursion
         */
        recurseFft(inputFrame, outputSpectrum, numberFftPoints, invert);

        /**
         * Return the power spectrum
         */
        DoubleData output = new DoubleData
                (outputSpectrum, input.getSampleRate(),
                        input.getFirstSampleNumber());

        return output;
    }


    /**
     * Make sure the number of points in the FFT is a power of 2 by computing its log base 2 and checking for
     * remainders.
     *
     * @param numberFftPoints number of points in the FFT
     * @throws java.lang.IllegalArgumentException
     *
     */
    private void computeLogBase2(int numberFftPoints)
            throws IllegalArgumentException {
        this.logBase2NumberFftPoints = 0;
        for (int k = numberFftPoints; k > 1;
             k >>= 1, this.logBase2NumberFftPoints++) {
            if (((k % 2) != 0) || (numberFftPoints < 0)) {
                throw new IllegalArgumentException("Not a power of 2: "
                        + numberFftPoints);
            }
        }
    }


    /**
     * Initializes the <b>weightFft[]</b> vector. <p><b>weightFft[k] = w ^ k</b></p> where: <p><b>w = exp(-2 * PI * i /
     * N)</b></p> <p><b>i</b> is a complex number such that <b>i * i = -1</b> and <b>N</b> is the number of points in
     * the FFT. Since <b>w</b> is complex, this is the same as</p> <p><b>Re(weightFft[k]) = cos ( -2 * PI * k /
     * N)</b></p> <p><b>Im(weightFft[k]) = sin ( -2 * PI * k / N)</b></p>
     *
     * @param numberFftPoints number of points in the FFT
     * @param invert          whether it's direct (false) or inverse (true) FFT
     */
    private void createWeightFft(int numberFftPoints, boolean invert) {
        /**
         * weightFFT will have numberFftPoints/2 complex elements.
         */
        weightFft = new Complex[numberFftPoints >> 1];

        /**
         * For the inverse FFT,
         * w = 2 * PI / numberFftPoints;
         */
        double w = -2 * Math.PI / numberFftPoints;
        if (invert) {
            w = -w;
        }

        for (int k = 0; k < (numberFftPoints >> 1); k++) {
            weightFft[k] = new Complex(Math.cos(w * k), Math.sin(w * k));
        }
    }


    /**
     * Reads the next DoubleData object, which is a data frame from which we'll compute the power spectrum. Signal
     * objects just pass through unmodified.
     *
     * @return the next available power spectrum DoubleData object, or null if no Spectrum object is available
     * @throws DataProcessingException if there is a processing error
     */
    @Override
    public Data getData() throws DataProcessingException {

        Data input = getPredecessor().getData();

        getTimer().start();

        if ((input != null) && (input instanceof DoubleData)) {
            DoubleData data = (DoubleData) input;
            if (!isNumberFftPointsSet) {
                /*
                 * If numberFftPoints is not set by the user,
                 * figure out the numberFftPoints and initialize the
                 * data structures appropriately.
                 */
                if (numberDataPoints != data.getValues().length) {
                    numberDataPoints = data.getValues().length;
                    numberFftPoints = getNumberFftPoints(numberDataPoints);
                    initializeFFT();
                }
            } else {
                /*
                 * Warn if the user-set numberFftPoints is not ideal.
                 */
                if (numberDataPoints != data.getValues().length) {
                    numberDataPoints = data.getValues().length;
                    int idealFftPoints = getNumberFftPoints(numberDataPoints);
                    if (idealFftPoints != numberFftPoints) {
                        logger.warning("User set numberFftPoints (" +
                                numberFftPoints + ") is not ideal (" +
                                idealFftPoints + ')');
                    }
                }
            }
            input = process(data);
        }

        // At this point - or in the call immediatelly preceding
        // this -, we should have created a cepstrum frame with
        // whatever data came last, even if we had less than
        // window size of data.

        getTimer().stop();

        return input;
    }


    /**
     * Returns the ideal number of FFT points given the number of samples. The ideal number of FFT points is the closest
     * power of 2 that is equal to or larger than the number of samples in the incoming window.
     *
     * @param numberSamples the number of samples in the incoming window
     * @return the closest power of 2 that is equal to or larger than the number of samples in the incoming window
     */
    private  static int getNumberFftPoints(int numberSamples) {
        int fftPoints = 1;

        while (fftPoints < numberSamples) {
            fftPoints <<= 1;
            if (fftPoints < 1) {
                throw new Error("Invalid # of FFT points: " + fftPoints);
            }
        }
        return fftPoints;
    }


    /**
     * Establish the recursion. The FFT computation will be computed by as a recursion. Each stage in the butterfly will
     * be fully computed during recursion. In fact, we use the mechanism of recursion only because it's the simplest way
     * of switching the "input" and "output" vectors. The output of a stage is the input to the next stage. The
     * butterfly computes elements in place, but we still need to switch the vectors. We could copy it (not very
     * efficient...) or, in C, switch the pointers. We can avoid the pointers by using recursion.
     *
     * @param input           input sequence
     * @param output          output sequence
     * @param numberFftPoints number of points in the FFT
     * @param invert          whether it's direct (false) or inverse (true) FFT
     */
    private void recurseFft(Complex[] input,
                            double[] output,
                            int numberFftPoints,
                            boolean invert) {

        double divisor;

        /**
         * The direct and inverse FFT are essentially the same
         * algorithm, except for two difference: a scaling factor of
         * "numberFftPoints" and the signal of the exponent in the
         * weightFft vectors, defined in the method
         * <code>createWeightFft</code>.
         */

        if (!invert) {
            divisor = 1.0;
        } else {
            divisor = numberFftPoints;
        }

        /**
         * Initialize the "from" and "to" variables.
         */
        for (int i = 0; i < numberFftPoints; i++) {
            to[i].reset();
            from[i].scaleComplex(input[i], divisor);
        }

        /**
         * Repeat the recursion log2(numberFftPoints) times,
         * i.e., we have log2(numberFftPoints) butterfly stages.
         */
        butterflyStage(from, to, numberFftPoints, numberFftPoints >> 1);

        /**
         * Compute energy ("float") for each frequency point
         * from the fft ("complex")
         */
        if ((this.logBase2NumberFftPoints & 1) == 0) {
            for (int i = 0; i <= (numberFftPoints >> 1); i++) {
                output[i] = from[i].squaredMagnitudeComplex();
            }
        } else {
            for (int i = 0; i <= (numberFftPoints >> 1); i++) {
                output[i] = to[i].squaredMagnitudeComplex();
            }
        }
        }


    /**
     * Compute one stage in the FFT butterfly. The name "butterfly" appears because this method computes elements in
     * pairs, and a flowgraph of the computation (output "0" comes from input "0" and "1" and output "1" comes from
     * input "0" and "1") resembles a butterfly.
     * <p/>
     * We repeat <code>butterflyStage</code> for <b>log_2(numberFftPoints)</b> stages, by calling the recursion with the
     * argument <code>currentDistance</code> divided by 2 at each call, and checking if it's still > 0.
     *
     * @param from            the input sequence at each stage
     * @param to              the output sequence
     * @param numberFftPoints the total number of points
     * @param currentDistance the "distance" between elements in the butterfly
     */
    private void butterflyStage(Complex[] from,
                                Complex[] to,
                                int numberFftPoints,
                                int currentDistance) {
        int ndx1From;
        int ndx2From;
        int ndx1To;
        int ndx2To;
        int ndxWeightFft;

        if (currentDistance > 0) {

            int twiceCurrentDistance = 2 * currentDistance;

            for (int s = 0; s < currentDistance; s++) {
                ndx1From = s;
                ndx2From = s + currentDistance;
                ndx1To = s;
                ndx2To = s + (numberFftPoints >> 1);
                ndxWeightFft = 0;
                while (ndxWeightFft < (numberFftPoints >> 1)) {
                    /**
                     * <b>weightFftTimesFrom2 = weightFft[k]   </b>
                     * <b>                      *from[ndx2From]</b>
                     */
                    weightFftTimesFrom2.multiplyComplex
                            (weightFft[ndxWeightFft], from[ndx2From]);
                    /**
                     * <b>to[ndx1To] = from[ndx1From]       </b>
                     * <b>             + weightFftTimesFrom2</b>
                     */
                    to[ndx1To].addComplex
                            (from[ndx1From], weightFftTimesFrom2);
                    /**
                     * <b>to[ndx2To] = from[ndx1From]       </b>
                     * <b>             - weightFftTimesFrom2</b>
                     */
                    to[ndx2To].subtractComplex
                            (from[ndx1From], weightFftTimesFrom2);
                    ndx1From += twiceCurrentDistance;
                    ndx2From += twiceCurrentDistance;
                    ndx1To += currentDistance;
                    ndx2To += currentDistance;
                    ndxWeightFft += currentDistance;
                }
            }

            /**
             * This call'd better be the last call in this block, so when
             * it returns we go straight into the return line below.
             *
             * We switch the <i>to</i> and <i>from</i> variables,
             * the total number of points remains the same, and
             * the <i>currentDistance</i> is divided by 2.
             */
            butterflyStage(to, from, numberFftPoints, (currentDistance >> 1));
        }
        }
}
TOP

Related Classes of edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.