/*
* Copyright 1999-2004 Carnegie Mellon University.
* Portions Copyright 2002-2004 Sun Microsystems, Inc.
* Portions Copyright 2002-2004 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.frontend.transform;
import edu.cmu.sphinx.frontend.BaseDataProcessor;
import edu.cmu.sphinx.frontend.Data;
import edu.cmu.sphinx.frontend.DataProcessingException;
import edu.cmu.sphinx.frontend.DoubleData;
import edu.cmu.sphinx.util.Complex;
import edu.cmu.sphinx.util.props.*;
/**
* Computes the Discrete Fourier Transform (FT) of an input sequence, using Fast Fourier Transform (FFT). Fourier
* Transform is the process of analyzing a signal into its frequency components. In speech, rather than analyzing the
* signal over its entire duration, we analyze one <b>window</b> of audio data. This window is the product of applying a
* sliding Hamming window to the signal. Moreover, since the amplitude is a lot more important than the phase for speech
* recognition, this class returns the power spectrum of that window of data instead of the complex spectrum. Each value
* in the returned spectrum represents the strength of that particular frequency for that window of data.
* <p/>
* By default, the number of FFT points is the closest power of 2 that is equal to or larger than the number of samples
* in the incoming window of data. The FFT points can also be set by the user with the property defined by {@link
* #PROP_NUMBER_FFT_POINTS}. The length of the returned power spectrum is the number of FFT points, divided by 2, plus
* 1. Since the input signal is real, the FFT is symmetric, and the information contained in the whole vector is already
* present in its first half.
* <p/>
* Note that each call to {@link #getData() getData} only returns the spectrum of one window of data. To display the
* spectrogram of the entire original audio, one has to collect all the spectra from all the windows generated from the
* original data. A spectrogram is a two dimensional representation of three dimensional information. The horizontal
* axis represents time. The vertical axis represents the frequency. If we slice the spectrogram at a given time, we get
* the spectrum computed as the short term Fourier transform of the signal windowed around that time stamp. The
* intensity of the spectrum for each time frame is given by the color in the graph, or by the darkness in a gray scale
* plot. The spectrogram can be thought of as a view from the top of a surface generated by concatenating the spectral
* vectors obtained from the windowed signal.
* <p/>
* For example, Figure 1 below shows the audio signal of the utterance "one three nine oh", and Figure 2 shows its
* spectrogram, produced by putting together all the spectra returned by this FFT. Frequency is on the vertical axis,
* and time is on the horizontal axis. The darkness of the shade represents the strength of that frequency at that point
* in time: <p> <br><img src="doc-files/139o.jpg"> <br><b>Figure 1: The audio signal of the utterance "one three nine
* oh".</b> <p> <br><img src="doc-files/139ospectrum.jpg"> <br><b>Figure 2: The spectrogram of the utterance "one three
* nine oh" in Figure 1.</b>
*/
public class DiscreteFourierTransform extends BaseDataProcessor {
/** The property for the number of points in the Fourier Transform. */
@S4Integer(defaultValue = -1)
public static final String PROP_NUMBER_FFT_POINTS = "numberFftPoints";
/** The property for the invert transform. */
@S4Boolean(defaultValue = false)
public static final String PROP_INVERT = "invert";
private boolean isNumberFftPointsSet;
private int numberFftPoints;
private int logBase2NumberFftPoints;
private int numberDataPoints;
private boolean invert;
private Complex[] weightFft;
private Complex[] inputFrame;
private Complex[] from;
private Complex[] to;
private Complex weightFftTimesFrom2;
private Complex tempComplex;
public DiscreteFourierTransform( int numberFftPoints, boolean invert ) {
initLogger();
this.numberFftPoints = numberFftPoints;
this.isNumberFftPointsSet = (numberFftPoints != -1);
this.invert = invert;
}
public DiscreteFourierTransform() {
}
/*
* (non-Javadoc)
*
* @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
*/
@Override
public void newProperties(PropertySheet ps) throws PropertyException {
super.newProperties(ps);
logger = ps.getLogger();
numberFftPoints = ps.getInt(PROP_NUMBER_FFT_POINTS);
isNumberFftPointsSet = (numberFftPoints != -1);
invert = ps.getBoolean(PROP_INVERT);
}
/* (non-Javadoc)
* @see edu.cmu.sphinx.frontend.DataProcessor#initialize(edu.cmu.sphinx.frontend.CommonConfig)
*/
@Override
public void initialize() {
super.initialize();
if (isNumberFftPointsSet) {
initializeFFT();
}
}
/** Initialize all the data structures necessary for computing FFT. */
private void initializeFFT() {
/**
* Number of points in the FFT. By default, the value is 512,
* which means that we compute 512 values around a circle in the
* complex plane. Complex conjugate pairs will yield the same
* power, therefore the power produced by indices 256 through
* 511 are symmetrical with the ones between 1 and 254. Therefore,
* we need only return values between 0 and 255.
*/
computeLogBase2(numberFftPoints);
createWeightFft(numberFftPoints, invert);
initComplexArrays();
weightFftTimesFrom2 = new Complex();
tempComplex = new Complex();
}
/** Initialize all the Complex arrays that will be necessary for FFT. */
private void initComplexArrays() {
inputFrame = new Complex[numberFftPoints];
from = new Complex[numberFftPoints];
to = new Complex[numberFftPoints];
for (int i = 0; i < numberFftPoints; i++) {
inputFrame[i] = new Complex();
from[i] = new Complex();
to[i] = new Complex();
}
}
/**
* Process data, creating the power spectrum from an input frame.
*
* @param input the input frame
* @return a DoubleData that is the power spectrum of the input frame
* @throws java.lang.IllegalArgumentException
*
*/
private DoubleData process(DoubleData input)
throws IllegalArgumentException {
/**
* Create complex input sequence equivalent to the real
* input sequence.
* If the number of points is less than the window size,
* we incur in aliasing. If it's greater, we pad the input
* sequence with zeros.
*/
double[] in = input.getValues();
if (numberFftPoints < in.length) {
int i = 0;
for (; i < numberFftPoints; i++) {
inputFrame[i].set(in[i], 0.0f);
}
for (; i < in.length; i++) {
tempComplex.set(in[i], 0.0f);
inputFrame[i % numberFftPoints].addComplex
(inputFrame[i % numberFftPoints], tempComplex);
}
} else {
int i = 0;
for (; i < in.length; i++) {
inputFrame[i].set(in[i], 0.0f);
}
for (; i < numberFftPoints; i++) {
inputFrame[i].reset();
}
}
/**
* Create output sequence.
*/
double[] outputSpectrum = new double[(numberFftPoints >> 1) + 1];
/**
* Start Fast Fourier Transform recursion
*/
recurseFft(inputFrame, outputSpectrum, numberFftPoints, invert);
/**
* Return the power spectrum
*/
DoubleData output = new DoubleData
(outputSpectrum, input.getSampleRate(),
input.getFirstSampleNumber());
return output;
}
/**
* Make sure the number of points in the FFT is a power of 2 by computing its log base 2 and checking for
* remainders.
*
* @param numberFftPoints number of points in the FFT
* @throws java.lang.IllegalArgumentException
*
*/
private void computeLogBase2(int numberFftPoints)
throws IllegalArgumentException {
this.logBase2NumberFftPoints = 0;
for (int k = numberFftPoints; k > 1;
k >>= 1, this.logBase2NumberFftPoints++) {
if (((k % 2) != 0) || (numberFftPoints < 0)) {
throw new IllegalArgumentException("Not a power of 2: "
+ numberFftPoints);
}
}
}
/**
* Initializes the <b>weightFft[]</b> vector. <p><b>weightFft[k] = w ^ k</b></p> where: <p><b>w = exp(-2 * PI * i /
* N)</b></p> <p><b>i</b> is a complex number such that <b>i * i = -1</b> and <b>N</b> is the number of points in
* the FFT. Since <b>w</b> is complex, this is the same as</p> <p><b>Re(weightFft[k]) = cos ( -2 * PI * k /
* N)</b></p> <p><b>Im(weightFft[k]) = sin ( -2 * PI * k / N)</b></p>
*
* @param numberFftPoints number of points in the FFT
* @param invert whether it's direct (false) or inverse (true) FFT
*/
private void createWeightFft(int numberFftPoints, boolean invert) {
/**
* weightFFT will have numberFftPoints/2 complex elements.
*/
weightFft = new Complex[numberFftPoints >> 1];
/**
* For the inverse FFT,
* w = 2 * PI / numberFftPoints;
*/
double w = -2 * Math.PI / numberFftPoints;
if (invert) {
w = -w;
}
for (int k = 0; k < (numberFftPoints >> 1); k++) {
weightFft[k] = new Complex(Math.cos(w * k), Math.sin(w * k));
}
}
/**
* Reads the next DoubleData object, which is a data frame from which we'll compute the power spectrum. Signal
* objects just pass through unmodified.
*
* @return the next available power spectrum DoubleData object, or null if no Spectrum object is available
* @throws DataProcessingException if there is a processing error
*/
@Override
public Data getData() throws DataProcessingException {
Data input = getPredecessor().getData();
getTimer().start();
if ((input != null) && (input instanceof DoubleData)) {
DoubleData data = (DoubleData) input;
if (!isNumberFftPointsSet) {
/*
* If numberFftPoints is not set by the user,
* figure out the numberFftPoints and initialize the
* data structures appropriately.
*/
if (numberDataPoints != data.getValues().length) {
numberDataPoints = data.getValues().length;
numberFftPoints = getNumberFftPoints(numberDataPoints);
initializeFFT();
}
} else {
/*
* Warn if the user-set numberFftPoints is not ideal.
*/
if (numberDataPoints != data.getValues().length) {
numberDataPoints = data.getValues().length;
int idealFftPoints = getNumberFftPoints(numberDataPoints);
if (idealFftPoints != numberFftPoints) {
logger.warning("User set numberFftPoints (" +
numberFftPoints + ") is not ideal (" +
idealFftPoints + ')');
}
}
}
input = process(data);
}
// At this point - or in the call immediatelly preceding
// this -, we should have created a cepstrum frame with
// whatever data came last, even if we had less than
// window size of data.
getTimer().stop();
return input;
}
/**
* Returns the ideal number of FFT points given the number of samples. The ideal number of FFT points is the closest
* power of 2 that is equal to or larger than the number of samples in the incoming window.
*
* @param numberSamples the number of samples in the incoming window
* @return the closest power of 2 that is equal to or larger than the number of samples in the incoming window
*/
private static int getNumberFftPoints(int numberSamples) {
int fftPoints = 1;
while (fftPoints < numberSamples) {
fftPoints <<= 1;
if (fftPoints < 1) {
throw new Error("Invalid # of FFT points: " + fftPoints);
}
}
return fftPoints;
}
/**
* Establish the recursion. The FFT computation will be computed by as a recursion. Each stage in the butterfly will
* be fully computed during recursion. In fact, we use the mechanism of recursion only because it's the simplest way
* of switching the "input" and "output" vectors. The output of a stage is the input to the next stage. The
* butterfly computes elements in place, but we still need to switch the vectors. We could copy it (not very
* efficient...) or, in C, switch the pointers. We can avoid the pointers by using recursion.
*
* @param input input sequence
* @param output output sequence
* @param numberFftPoints number of points in the FFT
* @param invert whether it's direct (false) or inverse (true) FFT
*/
private void recurseFft(Complex[] input,
double[] output,
int numberFftPoints,
boolean invert) {
double divisor;
/**
* The direct and inverse FFT are essentially the same
* algorithm, except for two difference: a scaling factor of
* "numberFftPoints" and the signal of the exponent in the
* weightFft vectors, defined in the method
* <code>createWeightFft</code>.
*/
if (!invert) {
divisor = 1.0;
} else {
divisor = numberFftPoints;
}
/**
* Initialize the "from" and "to" variables.
*/
for (int i = 0; i < numberFftPoints; i++) {
to[i].reset();
from[i].scaleComplex(input[i], divisor);
}
/**
* Repeat the recursion log2(numberFftPoints) times,
* i.e., we have log2(numberFftPoints) butterfly stages.
*/
butterflyStage(from, to, numberFftPoints, numberFftPoints >> 1);
/**
* Compute energy ("float") for each frequency point
* from the fft ("complex")
*/
if ((this.logBase2NumberFftPoints & 1) == 0) {
for (int i = 0; i <= (numberFftPoints >> 1); i++) {
output[i] = from[i].squaredMagnitudeComplex();
}
} else {
for (int i = 0; i <= (numberFftPoints >> 1); i++) {
output[i] = to[i].squaredMagnitudeComplex();
}
}
}
/**
* Compute one stage in the FFT butterfly. The name "butterfly" appears because this method computes elements in
* pairs, and a flowgraph of the computation (output "0" comes from input "0" and "1" and output "1" comes from
* input "0" and "1") resembles a butterfly.
* <p/>
* We repeat <code>butterflyStage</code> for <b>log_2(numberFftPoints)</b> stages, by calling the recursion with the
* argument <code>currentDistance</code> divided by 2 at each call, and checking if it's still > 0.
*
* @param from the input sequence at each stage
* @param to the output sequence
* @param numberFftPoints the total number of points
* @param currentDistance the "distance" between elements in the butterfly
*/
private void butterflyStage(Complex[] from,
Complex[] to,
int numberFftPoints,
int currentDistance) {
int ndx1From;
int ndx2From;
int ndx1To;
int ndx2To;
int ndxWeightFft;
if (currentDistance > 0) {
int twiceCurrentDistance = 2 * currentDistance;
for (int s = 0; s < currentDistance; s++) {
ndx1From = s;
ndx2From = s + currentDistance;
ndx1To = s;
ndx2To = s + (numberFftPoints >> 1);
ndxWeightFft = 0;
while (ndxWeightFft < (numberFftPoints >> 1)) {
/**
* <b>weightFftTimesFrom2 = weightFft[k] </b>
* <b> *from[ndx2From]</b>
*/
weightFftTimesFrom2.multiplyComplex
(weightFft[ndxWeightFft], from[ndx2From]);
/**
* <b>to[ndx1To] = from[ndx1From] </b>
* <b> + weightFftTimesFrom2</b>
*/
to[ndx1To].addComplex
(from[ndx1From], weightFftTimesFrom2);
/**
* <b>to[ndx2To] = from[ndx1From] </b>
* <b> - weightFftTimesFrom2</b>
*/
to[ndx2To].subtractComplex
(from[ndx1From], weightFftTimesFrom2);
ndx1From += twiceCurrentDistance;
ndx2From += twiceCurrentDistance;
ndx1To += currentDistance;
ndx2To += currentDistance;
ndxWeightFft += currentDistance;
}
}
/**
* This call'd better be the last call in this block, so when
* it returns we go straight into the return line below.
*
* We switch the <i>to</i> and <i>from</i> variables,
* the total number of points remains the same, and
* the <i>currentDistance</i> is divided by 2.
*/
butterflyStage(to, from, numberFftPoints, (currentDistance >> 1));
}
}
}