Package edu.cmu.sphinx.frontend.denoise

Source Code of edu.cmu.sphinx.frontend.denoise.Denoise

/*
* Copyright 2013 Carnegie Mellon University.
* All Rights Reserved.  Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.frontend.denoise;

import java.util.Arrays;

import edu.cmu.sphinx.frontend.BaseDataProcessor;
import edu.cmu.sphinx.frontend.Data;
import edu.cmu.sphinx.frontend.DataProcessingException;
import edu.cmu.sphinx.frontend.DataStartSignal;
import edu.cmu.sphinx.frontend.DoubleData;
import edu.cmu.sphinx.util.props.PropertyException;
import edu.cmu.sphinx.util.props.PropertySheet;
import edu.cmu.sphinx.util.props.S4Double;
import edu.cmu.sphinx.util.props.S4Integer;

/**
* The noise filter, same as implemented in sphinxbase/sphinxtrain/pocketsphinx.
*
* Noise removal algorithm is inspired by the following papers Computationally
* Efficient Speech Enchancement by Spectral Minina Tracking by G. Doblinger
*
* Power-Normalized Cepstral Coefficients (PNCC) for Robust Speech Recognition
* by C. Kim.
*
* For the recent research and state of art see papers about IMRCA and A
* Minimum-Mean-Square-Error Noise Reduction Algorithm On Mel-Frequency Cepstra
* For Robust Speech Recognition by Dong Yu and others
*
*/
public class Denoise extends BaseDataProcessor {

    double[] power;
    double[] noise;
    double[] floor;
    double[] peak;

    @S4Double(defaultValue = 0.7)
    public final static String LAMBDA_POWER = "lambdaPower";
    double lambdaPower;

    @S4Double(defaultValue = 0.999)
    public final static String LAMBDA_A = "lambdaA";
    double lambdaA;

    @S4Double(defaultValue = 0.5)
    public final static String LAMBDA_B = "lambdaB";
    double lambdaB;

    @S4Double(defaultValue = 0.85)
    public final static String LAMBDA_T = "lambdaT";
    double lambdaT;

    @S4Double(defaultValue = 0.2)
    public final static String MU_T = "muT";
    double muT;

    @S4Double(defaultValue = 2.0)
    public final static String EXCITATION_THRESHOLD = "excitationThreshold";
    double excitationThreshold;

    @S4Double(defaultValue = 20.0)
    public final static String MAX_GAIN = "maxGain";
    double maxGain;

    @S4Integer(defaultValue = 4)
    public final static String SMOOTH_WINDOW = "smoothWindow";
    int smoothWindow;

    final static double EPS = 1e-10;

    public Denoise(double lambdaPower, double lambdaA, double lambdaB,
            double lambdaT, double muT, double excitationThreshold,
            double maxGain, int smoothWindow) {
        this.lambdaPower = lambdaPower;
        this.lambdaA = lambdaA;
        this.lambdaB = lambdaB;
        this.lambdaT = lambdaT;
        this.muT = muT;
        this.excitationThreshold = excitationThreshold;
        this.maxGain = maxGain;
        this.smoothWindow = smoothWindow;
    }

    public Denoise() {
    }

    /*
     * (non-Javadoc)
     *
     * @see
     * edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util
     * .props.PropertySheet)
     */
    @Override
    public void newProperties(PropertySheet ps) throws PropertyException {
        super.newProperties(ps);
        lambdaPower = ps.getDouble(LAMBDA_POWER);
        lambdaA = ps.getDouble(LAMBDA_A);
        lambdaB = ps.getDouble(LAMBDA_B);
        lambdaT = ps.getDouble(LAMBDA_T);
        muT = ps.getDouble(MU_T);
        excitationThreshold = ps.getDouble(EXCITATION_THRESHOLD);
        maxGain = ps.getDouble(MAX_GAIN);
        smoothWindow = ps.getInt(SMOOTH_WINDOW);
    }

    @Override
    public Data getData() throws DataProcessingException {
        Data inputData = getPredecessor().getData();
        int i;

        if (inputData instanceof DataStartSignal) {
            power = null;
            noise = null;
            floor = null;
            peak = null;
            return inputData;
        }
        if (!(inputData instanceof DoubleData)) {
            return inputData;
        }

        DoubleData inputDoubleData = (DoubleData) inputData;
        double[] input = inputDoubleData.getValues();
        int length = input.length;

        if (power == null)
            initStatistics(input, length);

        updatePower(input);

        estimateEnvelope(power, noise);

        double[] signal = new double[length];
        for (i = 0; i < length; i++) {
            signal[i] = Math.max(power[i] - noise[i], 0.0);
        }

        estimateEnvelope(signal, floor);

        tempMasking(signal);

        powerBoosting(signal);

        double[] gain = new double[length];
        for (i = 0; i < length; i++) {
            gain[i] = signal[i] / (power[i] + EPS);
            gain[i] = Math.min(Math.max(gain[i], 1.0 / maxGain), maxGain);
        }
        double[] smoothGain = smooth(gain);

        for (i = 0; i < length; i++) {
            input[i] *= smoothGain[i];
        }

        return inputData;
    }

    private double[] smooth(double[] gain) {
        double[] result = new double[gain.length];
        for (int i = 0; i < gain.length; i++) {
            int start = Math.max(i - smoothWindow, 0);
            int end = Math.min(i + smoothWindow + 1, gain.length);
            double sum = 0.0;
            for (int j = start; j < end; j++) {
                sum += gain[j];
            }
            result[i] = sum / (end - start);
        }
        return result;
    }

    private void powerBoosting(double[] signal) {
        for (int i = 0; i < signal.length; i++) {
            if (signal[i] < floor[i])
                signal[i] = floor[i];
            if (signal[i] < excitationThreshold * noise[i])
                signal[i] = floor[i];
        }
    }

    private void tempMasking(double[] signal) {
        for (int i = 0; i < signal.length; i++) {
            double in = signal[i];

            peak[i] *= lambdaT;
            if (signal[i] < lambdaT * peak[i])
                signal[i] = peak[i] * muT;

            if (in > peak[i])
                peak[i] = in;
        }
    }

    private void updatePower(double[] input) {
        for (int i = 0; i < input.length; i++) {
            power[i] = lambdaPower * power[i] + (1 - lambdaPower) * input[i];
        }
    }

    private void estimateEnvelope(double[] signal, double[] envelope) {
        for (int i = 0; i < signal.length; i++) {
            if (signal[i] > envelope[i])
                envelope[i] = lambdaA * envelope[i] + (1 - lambdaA) * signal[i];
            else
                envelope[i] = lambdaB * envelope[i] + (1 - lambdaB) * signal[i];
        }
    }

    private void initStatistics(double[] input, int length) {
        /* no previous data, initialize the statistics */
        power = Arrays.copyOf(input, length);
        noise = Arrays.copyOf(input, length);
        floor = new double[length];
        peak = new double[length];
        for (int i = 0; i < length; i++) {
            floor[i] = input[i] / maxGain;
        }
    }
}
TOP

Related Classes of edu.cmu.sphinx.frontend.denoise.Denoise

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.