Source Code of org.vocvark.jAudioTools.FeaturesExtractionProcessor

package org.vocvark.jAudioTools;




import org.vocvark.Aggregators.AggregatorContainer;
import org.vocvark.AudioFeatures.FeatureExtractor;
import org.vocvark.AudioFeatures.FeatureExtractorBaseImpl;
import org.vocvark.Cancel;
import org.vocvark.DataTypes.ExtractionResult;
import org.vocvark.DataTypes.FeatureDefinition;
import org.vocvark.DataTypes.OverallResult;
import org.vocvark.DataTypes.PerWindowResult;
import org.vocvark.DataTypes.RecordingInfo;
import org.vocvark.DataTypes.Settings;
import org.vocvark.Updater;


import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import java.io.File;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;




/**
 * This class is used to pre-process and extract features from audio recordings.
 * An object of this class should be instantiated with parameters indicating the
 * details of how features are to be extracted.
 * <p/>
 * The extractFeatures method should be called whenever recordings are available
 * to be analyzed. This mehtod should be called once for each recording. It will
 * write the extracted feature values to an XML file after each call. This will
 * also save feature definitions to another XML file.
 * <p/>
 * The finalize method should be called when all features have been extracted.
 * this will finish writing the feature values to the XML file.
 * <p/>
 * Features are extracted for each window and, when appropriate, the average and
 * standard deviation of each of these features is extracted for each recording.
 *
 * @author Cory McKay; Vladimir Kravtsov
 */
public class FeaturesExtractionProcessor {
    private Settings settings;
    // The features that are to be extracted.
    private List<FeatureExtractor> featureExtractors;


    // The dependencies of the features in the featureExtractors field.
    // The first indice corresponds to the featureExtractors indice
    // and the second identifies the number of the dependent feature.
    // The entry identifies the indice of the feature in featureExtractors
    // that corresponds to a dependant feature. The first dimension will be
    // null if there are no dependent features.
    private int[][] featureExtractorDependencies;


    // The longest number of windows of previous features that each feature must
    // have before it can be extracted. The indice corresponds to that of
    // featureExtractors.
    private int[] maxFeatureOffsets;


    // Which features are to be saved after processing. Entries correspond to
    // the
    // featureExtractors field.
    private Boolean[] featuresToSave;


    // hook for allowing visual updates of how far along the extraction is.
    private Updater updater;


    // allows external entity to halt execution
    private Cancel cancel;


    private AggregatorContainer aggregatorContainer;


    /* CONSTRUCTOR ************************************************************ */


    /**
     * Validates and stores the configuration to use for extracting features
     * from audio recordings. Prepares the feature_vector_file and
     * feature_key_file XML files for saving.
     *
     *
     * @param allFeatureExtractors
     * @param featuresToSaveAmongAll Which features are to be saved. Entries correspond to the
     *                                   all_feature_extractors parameter.
     * @throws Exception Throws an informative exception if the input parameters are
     *                   invalid.
     */
    public FeaturesExtractionProcessor(
            Settings settings,
            List<FeatureExtractor> allFeatureExtractors,
            Boolean[] featuresToSaveAmongAll,
            Cancel cancel,
            AggregatorContainer container
    ) throws Exception {
        validateSettings(settings, featuresToSaveAmongAll);
        this.settings = settings;
        this.cancel = cancel;
        this.aggregatorContainer = container;


        // Find which features need to be extracted and in what order. Also find
        // the indices of dependencies and the maximum offsets for each feature.
        findAndOrderFeaturesToExtract(allFeatureExtractors, featuresToSaveAmongAll);
    }


    private void validateSettings(Settings settings, Boolean[] featuresToSaveAmongAll) throws Exception {
        // Throw an exception if the control parameters are invalid
        if (settings.getWindowOverlap() < 0.0 || settings.getWindowOverlap() >= 1.0)
            throw new Exception("Window overlap fraction is " + settings.getWindowOverlap()
                    + ".\n" + "This value must be 0.0 or above and less than 1.0.");
        if (settings.getWindowSize() < 3)
            throw new Exception("Window size is " + settings.getWindowSize() + ".\n" + "This value must be above 2.");
        Boolean isOneOrMoreSelected = false;
        for (Boolean aFeaturesToSaveAmongAll : featuresToSaveAmongAll) {
            if (aFeaturesToSaveAmongAll) {
                isOneOrMoreSelected = true;
                break;
            }
        }
        if (!isOneOrMoreSelected) {
            throw new Exception("No features have been set to be saved.");
        }
    }


    /**
     * Extract the features from the provided audio file. This includes
     * pre-processing involving sample rate conversion, windowing and, possibly,
     * normalisation. The feature values are automatically saved to the
     * feature_vector_file XML file referred to by the values_writer field. The
     * definitions of the features that are saved are also saved to the
     * feature_key_file XML file referred to by the definitions_writer field.
     *
     * @param recordingInfo The audio file to extract features from.
     */
    public ExtractionResult extractFeatures(RecordingInfo recordingInfo, Updater updater) throws Exception {
        File recording = new File(recordingInfo.file_path);
        // Pre-process the recording and extract the samples from the audio
        this.updater = updater;
        double[] samples = preProcessRecording(recording);
        if (cancel.isCancel()) {
            throw new RuntimeException("Killed after loading data");
        }
        // Calculate the window start indices
        LinkedList<Integer> window_start_indices_list = new LinkedList<Integer>();
        int this_start = 0;
        while (this_start < samples.length) {
            window_start_indices_list.add(this_start);
            this_start += settings.getWindowSize() - settings.getWindowOverlapOffset();
        }
        Integer[] window_start_indices_I = window_start_indices_list.toArray(new Integer[1]);
        int[] window_start_indices = new int[window_start_indices_I.length];


        // if were using a progress bar, set its max update
        if (updater != null) {
            updater.setFileLength(window_start_indices.length);
        }


        for (int i = 0; i < window_start_indices.length; i++)
            window_start_indices[i] = window_start_indices_I[i];


        // Extract the feature values from the samples
        double[][][] window_feature_values = getFeatures(samples, window_start_indices);




        // Find the feature averages and standard deviations if appropriate


        if (settings.isSaveOverallRecordingFeatures()) {
            aggregatorContainer.addFeaturesExtractors(featureExtractors, featuresToSave);
            aggregatorContainer.aggregate(window_feature_values);
            return new OverallResult(recordingInfo, aggregatorContainer.getResults());
        } else {
            return new PerWindowResult(recordingInfo, perWindowFeaturesValuesToMap(window_feature_values));
        }
    }


    private Map<FeatureDefinition, double[][]> perWindowFeaturesValuesToMap(double[][][] windowFeatureValues) {
        Map<FeatureDefinition, double[][]> result = new HashMap<FeatureDefinition, double[][]>();
        for (int feature = 0; feature < featureExtractors.size(); feature++) {
            if (featuresToSave[feature]) {
                double[][] featureValueOnWindow = new double[windowFeatureValues.length][];
                for (int window = 0; window < windowFeatureValues.length; window++) {


                    featureValueOnWindow[window] = windowFeatureValues[window][feature];


                }
                result.put(featureExtractors.get(feature).getFeatureDefinition(), featureValueOnWindow);
            }
        }
        return result;
    }


    /* PRIVATE METHODS ******************************************************** */


    /**
     * Fills the featureExtractors, featureExtractorDependencies,
     * maxFeatureOffsets and featuresToSave fields. This involves finding
     * which features need to be extracted and in what order and finding the
     * indices of dependencies and the maximum offsets for each feature.
     * <p/>
     * Daniel McEnnis 05-07-05 added feature offset of dependancies to
     * max_offset
     *
     * @param allFeatureExtractors     All features that can be extracted.
     * @param featuresToSaveAmongAll Which features are to be saved. Entries correspond to the
     */
    private void findAndOrderFeaturesToExtract(
            List<FeatureExtractor> allFeatureExtractors, Boolean[] featuresToSaveAmongAll
    ) {
        // Find the names of all features
        // Find dependencies of all features marked to be extracted.
        // Mark as null if features are not to be extracted. Note that will also
        // be null if there are no dependencies.
        String[] allFeatureNames = new String[allFeatureExtractors.size()];
        String[][] dependencies = new String[allFeatureExtractors.size()][];
        for (int i = 0; i < allFeatureExtractors.size(); i++) {
            allFeatureNames[i] = allFeatureExtractors.get(i).getFeatureDefinition().name;
            if (featuresToSaveAmongAll[i]) {
                dependencies[i] = allFeatureExtractors.get(i).getDependencies();
            } else {
                dependencies[i] = null;
            }
        }
        // Add dependencies to dependencies and if any features are not marked
        // for
        // saving but are marked as a dependency of a feature that is marked to
        // be
        // saved. Also fill featuresToExtract in order to know what features
        // to
        // extract(but not necessarily save).
        boolean done = false;
        Boolean[] featuresToExtract = new Boolean[dependencies.length];
        System.arraycopy(featuresToSaveAmongAll, 0, featuresToExtract, 0, featuresToExtract.length);
        while (!done) {
            done = true;
            for (int feat = 0; feat < dependencies.length; feat++)
                if (dependencies[feat] != null)
                    for (int i = 0; i < dependencies[feat].length; i++) {
                        String name = dependencies[feat][i];
                        for (int j = 0; j < allFeatureNames.length; j++) {
                            if (name.equals(allFeatureNames[j])) {
                                if (!featuresToExtract[j]) {
                                    featuresToExtract[j] = true;
                                    dependencies[j] = allFeatureExtractors.get(j).getDependencies();
                                    if (dependencies[j] != null)
                                        done = false;
                                }
                                j = allFeatureNames.length;
                            }
                        }
                    }
        }


        // Find the correct order to extract features in by filling the
        // featureExtractors field
        int numberFeaturesToExtract = 0;
        for (boolean aFeaturesToExtract : featuresToExtract)
            if (aFeaturesToExtract)
                numberFeaturesToExtract++;
        FeatureExtractor[] featureExtractors = new FeatureExtractorBaseImpl[numberFeaturesToExtract];
        featuresToSave = new Boolean[numberFeaturesToExtract];
        Arrays.fill(featuresToSave, false);
        Boolean[] featureAdded = new Boolean[dependencies.length];
        Arrays.fill(featureAdded, false);
        int currentPosition = 0;
        done = false;
        while (!done) {
            done = true;


            // Add all features that have no remaining dependencies and remove
            // their dependencies from all not added features
            for (int feat = 0; feat < dependencies.length; feat++) {
                if (featuresToExtract[feat] && !featureAdded[feat])
                    if (dependencies[feat] == null) { // addAggregatorList feature if it has no dependencies
                        featureAdded[feat] = true;
                        featureExtractors[currentPosition] = allFeatureExtractors.get(feat);
                        featuresToSave[currentPosition] = featuresToSaveAmongAll[feat];
                        currentPosition++;
                        done = false;


                        // Remove this dependency from all features that have
                        // it as a dependency and are marked to be extracted
                        for (int i = 0; i < dependencies.length; i++)
                            if (featuresToExtract[i] && dependencies[i] != null) {
                                int num_defs = dependencies[i].length;
                                for (int j = 0; j < num_defs; j++) {
                                    if (dependencies[i][j].equals(allFeatureNames[feat])) {
                                        if (dependencies[i].length == 1) {
                                            dependencies[i] = null;
                                            j = num_defs;
                                        } else {
                                            String[] temp = new String[dependencies[i].length - 1];
                                            int m = 0;
                                            for (int k = 0; k < dependencies[i].length; k++) {
                                                if (k != j) {
                                                    temp[m] = dependencies[i][k];
                                                    m++;
                                                }
                                            }
                                            dependencies[i] = temp;
                                            j--;
                                            num_defs--;
                                        }
                                    }
                                }
                            }
                    }
            }
        }


        // Find the indices of the feature extractor dependencies for each
        // feature
        // extractor
        featureExtractorDependencies = new int[featureExtractors.length][];
        String[] feature_names = new String[featureExtractors.length];
        for (int feat = 0; feat < feature_names.length; feat++) {
            feature_names[feat] = featureExtractors[feat]
                    .getFeatureDefinition().name;
        }
        String[][] feature_dependencies_str = new String[featureExtractors.length][];
        for (int feat = 0; feat < feature_dependencies_str.length; feat++)
            feature_dependencies_str[feat] = featureExtractors[feat]
                    .getDependencies();
        for (int i = 0; i < feature_dependencies_str.length; i++)
            if (feature_dependencies_str[i] != null) {
                featureExtractorDependencies[i] = new int[feature_dependencies_str[i].length];
                for (int j = 0; j < feature_dependencies_str[i].length; j++)
                    for (int k = 0; k < feature_names.length; k++)
                        if (feature_dependencies_str[i][j]
                                .equals(feature_names[k]))
                            featureExtractorDependencies[i][j] = k;
            }


        // Find the maximum offset for each feature
        // Daniel McEnnis 5-07-05 added feature offset of dependancies to
        // max_offset
        maxFeatureOffsets = new int[featureExtractors.length];
        for (int i = 0; i < maxFeatureOffsets.length; i++) {
            if (featureExtractors[i].getDependencyOffsets() == null)
                maxFeatureOffsets[i] = 0;
            else {
                int[] theseOffsets = featureExtractors[i].getDependencyOffsets();
                maxFeatureOffsets[i] = Math.abs(theseOffsets[0] + maxFeatureOffsets[featureExtractorDependencies[i][0]]);
                for (int k = 0; k < theseOffsets.length; k++) {
                    int val = Math.abs(theseOffsets[k])
                            + maxFeatureOffsets[featureExtractorDependencies[i][k]];
                    if (val > maxFeatureOffsets[i]) {
                        maxFeatureOffsets[i] = val;
                    }
                }
            }
        }


       this.featureExtractors = Arrays.asList(featureExtractors);
    }


    /**
     * Returns the samples stored in the given audio file.
     * <p/>
     * The samples are re-encoded using the sampling rate in the settings.samplingRate
     * field. All channels are projected into one channel. Samples are
     * normalised if the normalise field is true.
     *
     * @param recordingFile The audio file to extract samples from.
     * @return The processed audio samples. Values will fall between a minimum
     *         of -1 and +1. The indice identifies the sample number.
     * @throws Exception An exception is thrown if a problem occurs during file
     *                   reading or pre- processing.
     */
    private double[] preProcessRecording(File recordingFile) throws Exception {
        // Get the original audio and its format
        AudioInputStream originalStream = AudioSystem.getAudioInputStream(recordingFile);
        AudioFormat originalFormat = originalStream.getFormat();


        // Set the bit depth
        int bitDepth = originalFormat.getSampleSizeInBits();
        if (bitDepth != 8 && bitDepth != 16) {
            bitDepth = 16;
        }


        // If the audio is not PCM signed big endian, then convert it to PCM
        // signed
        // This is particularly necessary when dealing with MP3s
        AudioInputStream secondStream = originalStream;
        if (originalFormat.getEncoding() != AudioFormat.Encoding.PCM_SIGNED || !originalFormat.isBigEndian()) {
            AudioFormat newFormat = new AudioFormat(
                    AudioFormat.Encoding.PCM_SIGNED,
                    originalFormat.getSampleRate(),
                    bitDepth,
                    originalFormat.getChannels(),
                    originalFormat.getChannels() * (bitDepth / 8),
                    originalFormat.getSampleRate(),
                    true);
            secondStream = AudioSystem.getAudioInputStream(newFormat, originalStream);
        }


        // Convert to the set sampling rate, if it is not already at this
        // sampling rate.
        // Also, convert to an appropriate bit depth if necessary.
        AudioInputStream newStream = secondStream;
        if (originalFormat.getSampleRate() !=  settings.getSamplingRate().floatValue()
                || bitDepth != originalFormat.getSampleSizeInBits() ) {
            AudioFormat newFormat = new AudioFormat(
                    AudioFormat.Encoding.PCM_SIGNED,
                    settings.getSamplingRate().floatValue(),
                    bitDepth,
                    originalFormat.getChannels(),
                    originalFormat.getChannels() * (bitDepth / 8),
                    originalFormat.getSampleRate(),
                    true);
            newStream = AudioSystem.getAudioInputStream(newFormat, secondStream);
        }


        // Extract data from the AudioInputStream
        AudioSamples audioData = new AudioSamples(newStream, recordingFile.getPath(), false);


        // Normalise samples if this option has been requested
        if (settings.isNormalize()) {
            audioData.normalizeMixedDownSamples();
        }


        // Return all channels compressed into one
        return audioData.getSamplesMixedDown();
    }


    /**
     * Breaks the given samples into the appropriate windows and extracts
     * features from each window.
     *
     * @param samples              The samples to extract features from. Sample values should
     *                             generally be between -1 and +1.
     * @param window_start_indices The indices of samples that correspond to where each window
     *                             should start.
     * @return The extracted feature values for this recording. The first indice
     *         identifies the window, the second identifies the feature and the
     *         third identifies the feature value. The third dimension will be
     *         null if the given feature could not be extracted for the given
     *         window.
     * @throws Exception Throws an exception if a problem occurs.
     */
    private double[][][] getFeatures(double[] samples, int[] window_start_indices) throws Exception {
        // The extracted feature values for this recording. The first indice identifies the window, the second
        // identifies the feature and the third identifies the feature value.
        double[][][] results = new double[window_start_indices.length][featureExtractors.size()][];


        // Calculate how frequently to make updates to the updater;
        int updateThreshold = 1;
        if (window_start_indices.length > 100) {
            updateThreshold = window_start_indices.length / 100;
        }


        // Extract features from each window one by one and addAggregatorList save the results.
        // The last window is zero-padded at the end if it falls off the edge of the provided samples.
        for (int win = 0; win < window_start_indices.length; win++) {
            // Do we need to update the progress bar or not
            if ((updater != null) && (win % updateThreshold == 0)) {
                updater.announceUpdate(win);
                if (cancel.isCancel()) {
                    throw new RuntimeException("Killed while processing features");
                }
            }


            // Find the samples in this window and zero-pad if necessary
            double[] window = new double[settings.getWindowSize()];
            int start_sample = window_start_indices[win];
            int end_sample = start_sample + settings.getWindowSize() - 1;
            if (end_sample < samples.length)
                for (int samp = start_sample; samp <= end_sample; samp++)
                    window[samp - start_sample] = samples[samp];
            else
                for (int samp = start_sample; samp <= end_sample; samp++) {
                    if (samp < samples.length)
                        window[samp - start_sample] = samples[samp];
                    else
                        window[samp - start_sample] = 0.0;
                }


            // Extract the features one by one
            for (int feat = 0; feat < featureExtractors.size(); feat++) {
                // Only extract this feature if enough previous information is available to extract this feature
                if (win >= maxFeatureOffsets[feat]) {
                    // Find the correct feature
                    FeatureExtractor feature = featureExtractors.get(feat);


                    // Find previously extracted feature values that this feature needs
                    double[][] otherFeatureValues = null;
                    if (featureExtractorDependencies[feat] != null) {
                        otherFeatureValues = new double[featureExtractorDependencies[feat].length][];
                        for (int i = 0; i < featureExtractorDependencies[feat].length; i++) {
                            int feature_indice = featureExtractorDependencies[feat][i];
                            int offset = feature.getDependencyOffsets()[i];
                            otherFeatureValues[i] = results[win + offset][feature_indice];
                        }
                    }
                    // Store the extracted feature values
                    results[win][feat] = feature.extractFeature(window, settings.getSamplingRate(), otherFeatureValues);
                } else
                    results[win][feat] = null;
            }
        }


        return results;
    }


    public Settings getSettings() {
        return settings;
    }
}
Source Code of org.vocvark.jAudioTools.FeaturesExtractionProcessor

Related Classes of org.vocvark.jAudioTools.FeaturesExtractionProcessor