/*
* Copyright 2006-2007 Columbia University.
*
* This file is part of MEAPsoft.
*
* MEAPsoft is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* MEAPsoft is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with MEAPsoft; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*
* See the file "COPYING" for the text of the license.
*/
package com.meapsoft.featextractors;
import java.util.Arrays;
import com.meapsoft.FeatExtractor;
import com.meapsoft.STFT;
import com.meapsoft.RingMatrix;
/**
* Averages all spectral frames together into a single feature vector
* and then converts the vector to the mel frequency scale.
*
* @author Mike Mandel (mim@ee.columbia.edu)
*/
public class AvgMelSpec extends FeatureExtractor {
// for each mel bin...
protected double[] melCenter; // actual targe mel value at center of this bin
protected double[] melWidth; // mel width divisor for this bin (constant, except broadens in low bins)
// for each fft bin
protected double[] melOfLin;
protected double[] linSpec;
protected int N;
protected int outDim;
public double lin2mel(double fq) {
return 1127.0 * Math.log(1.0 + fq / 700.0);
}
public double mel2lin(double mel) {
return 700.0 * (Math.exp(mel / 1127.0) - 1.0);
}
// Default constructor - Use 40 mel spaced bins
public AvgMelSpec() {
this(FeatExtractor.nfft/2+1, FeatExtractor.feSamplingRate, 40);
}
public AvgMelSpec(int N, float sampleRate, int outDim) {
this.N = N;
this.outDim = outDim;
linSpec = new double[N];
// Calculate the locations of the bin centers on the mel scale and
// as indices into the input vector
melCenter = new double[outDim+2];
melWidth = new double[outDim+2];
double melMin = lin2mel(0);
//double melMax = lin2mel(sampleRate/2);
double melMax = lin2mel((8000.0 < sampleRate/2)? 8000.0 : sampleRate/2); // dpwe 2006-12-11 - hard maximum
double hzPerBin = sampleRate/2/N;
for(int i=0; i<outDim+2; i++) {
melCenter[i] = melMin + i * (melMax - melMin) / (outDim + 1);
// System.out.println("centersMel["+i+"]="+centersMel[i]+" centersInd[]="+centersInd[i]);
}
for(int i=0; i<outDim+1; i++) {
melWidth[i] = melCenter[i+1]-melCenter[i];
double linbinwidth = (mel2lin(melCenter[i+1])-mel2lin(melCenter[i]))/hzPerBin;
if (linbinwidth < 1) {
melWidth[i] = lin2mel(mel2lin(melCenter[i])+hzPerBin) - melCenter[i];
}
//System.out.println("melBin="+i+" melCenter="+melCenter[i]+" melWidth="+melWidth[i]+"("+mel2lin(melCenter[i]-melWidth[i])/hzPerBin+".."+mel2lin(melCenter[i])/hzPerBin+".."+mel2lin(melCenter[i]+melWidth[i])/hzPerBin);
}
// precalculate mel translations of fft bin frequencies
melOfLin = new double[N];
for(int i=0; i<N; i++) {
melOfLin[i] = lin2mel(i * sampleRate / (2*N));
// System.out.println("linbin2Mel["+i+"]="+linbin2mel[i]);
}
}
public double[] features(STFT stft, long startFrame, int length) {
double[] melSpec = new double[outDim];
double[] curFrame;
double sum = 0;
// we're expecting a certain frequency resolution...
boolean recalculateSTFT = stft.getRows() != N;
RingMatrix newstft = null;
if(recalculateSTFT) {
// keep the same number of frames as in stft
newstft = STFT.getSTFT(stft.getSamples(startFrame, startFrame+length), (N-1)*2, stft.nhop);
length = newstft.getColumns();
}
// intialize average to 0
Arrays.fill(linSpec, 0);
// collect average linear spectrum
for(int frame=0; frame<length; frame++) {
if(!recalculateSTFT)
curFrame = stft.getFrame(startFrame+frame);
else
curFrame = newstft.getColumn(frame);
for(int band=0; band<linSpec.length; band++)
linSpec[band] += curFrame[band] / length;
}
// convert log magnitude to linear magnitude for binning
for(int band=0; band<linSpec.length; band++)
//linSpec[band] = Math.exp(linSpec[band]);
linSpec[band] = Math.pow(10,linSpec[band]/10);
// convert to mel scale
for(int bin=0; bin<outDim; bin++) {
// initialize
melSpec[bin] = 0;
for(int i = 0; i < linSpec.length; ++i) {
double weight = 1.0 - (Math.abs(melOfLin[i] - melCenter[bin])/melWidth[bin]);
if (weight > 0) {
melSpec[bin] += weight * linSpec[i];
}
}
// Take log
melSpec[bin] = 10*Math.log(melSpec[bin]) / Math.log(10);
sum += melSpec[bin];
}
// Audio scrubber takes care of normalization, level is a good cue
// for(int bin=0; bin<outDim; bin++)
// melSpec[bin] = melSpec[bin] / sum;
return melSpec;
}
public String description()
{
return "Computes the mean spectrum of a chunk and converts it to the perceptually weighted Mel frequency scale.";
}
}