package org.vocvark.jAudioTools;
import org.vocvark.Aggregators.AggregatorContainer;
import org.vocvark.AudioFeatures.FeatureExtractor;
import org.vocvark.AudioFeatures.FeatureExtractorBaseImpl;
import org.vocvark.Cancel;
import org.vocvark.DataTypes.ExtractionResult;
import org.vocvark.DataTypes.FeatureDefinition;
import org.vocvark.DataTypes.OverallResult;
import org.vocvark.DataTypes.PerWindowResult;
import org.vocvark.DataTypes.RecordingInfo;
import org.vocvark.DataTypes.Settings;
import org.vocvark.Updater;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import java.io.File;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* This class is used to pre-process and extract features from audio recordings.
* An object of this class should be instantiated with parameters indicating the
* details of how features are to be extracted.
* <p/>
* The extractFeatures method should be called whenever recordings are available
* to be analyzed. This mehtod should be called once for each recording. It will
* write the extracted feature values to an XML file after each call. This will
* also save feature definitions to another XML file.
* <p/>
* The finalize method should be called when all features have been extracted.
* this will finish writing the feature values to the XML file.
* <p/>
* Features are extracted for each window and, when appropriate, the average and
* standard deviation of each of these features is extracted for each recording.
*
* @author Cory McKay; Vladimir Kravtsov
*/
public class FeaturesExtractionProcessor {
private Settings settings;
// The features that are to be extracted.
private List<FeatureExtractor> featureExtractors;
// The dependencies of the features in the featureExtractors field.
// The first indice corresponds to the featureExtractors indice
// and the second identifies the number of the dependent feature.
// The entry identifies the indice of the feature in featureExtractors
// that corresponds to a dependant feature. The first dimension will be
// null if there are no dependent features.
private int[][] featureExtractorDependencies;
// The longest number of windows of previous features that each feature must
// have before it can be extracted. The indice corresponds to that of
// featureExtractors.
private int[] maxFeatureOffsets;
// Which features are to be saved after processing. Entries correspond to
// the
// featureExtractors field.
private Boolean[] featuresToSave;
// hook for allowing visual updates of how far along the extraction is.
private Updater updater;
// allows external entity to halt execution
private Cancel cancel;
private AggregatorContainer aggregatorContainer;
/* CONSTRUCTOR ************************************************************ */
/**
* Validates and stores the configuration to use for extracting features
* from audio recordings. Prepares the feature_vector_file and
* feature_key_file XML files for saving.
*
*
* @param allFeatureExtractors
* @param featuresToSaveAmongAll Which features are to be saved. Entries correspond to the
* all_feature_extractors parameter.
* @throws Exception Throws an informative exception if the input parameters are
* invalid.
*/
public FeaturesExtractionProcessor(
Settings settings,
List<FeatureExtractor> allFeatureExtractors,
Boolean[] featuresToSaveAmongAll,
Cancel cancel,
AggregatorContainer container
) throws Exception {
validateSettings(settings, featuresToSaveAmongAll);
this.settings = settings;
this.cancel = cancel;
this.aggregatorContainer = container;
// Find which features need to be extracted and in what order. Also find
// the indices of dependencies and the maximum offsets for each feature.
findAndOrderFeaturesToExtract(allFeatureExtractors, featuresToSaveAmongAll);
}
private void validateSettings(Settings settings, Boolean[] featuresToSaveAmongAll) throws Exception {
// Throw an exception if the control parameters are invalid
if (settings.getWindowOverlap() < 0.0 || settings.getWindowOverlap() >= 1.0)
throw new Exception("Window overlap fraction is " + settings.getWindowOverlap()
+ ".\n" + "This value must be 0.0 or above and less than 1.0.");
if (settings.getWindowSize() < 3)
throw new Exception("Window size is " + settings.getWindowSize() + ".\n" + "This value must be above 2.");
Boolean isOneOrMoreSelected = false;
for (Boolean aFeaturesToSaveAmongAll : featuresToSaveAmongAll) {
if (aFeaturesToSaveAmongAll) {
isOneOrMoreSelected = true;
break;
}
}
if (!isOneOrMoreSelected) {
throw new Exception("No features have been set to be saved.");
}
}
/**
* Extract the features from the provided audio file. This includes
* pre-processing involving sample rate conversion, windowing and, possibly,
* normalisation. The feature values are automatically saved to the
* feature_vector_file XML file referred to by the values_writer field. The
* definitions of the features that are saved are also saved to the
* feature_key_file XML file referred to by the definitions_writer field.
*
* @param recordingInfo The audio file to extract features from.
*/
public ExtractionResult extractFeatures(RecordingInfo recordingInfo, Updater updater) throws Exception {
File recording = new File(recordingInfo.file_path);
// Pre-process the recording and extract the samples from the audio
this.updater = updater;
double[] samples = preProcessRecording(recording);
if (cancel.isCancel()) {
throw new RuntimeException("Killed after loading data");
}
// Calculate the window start indices
LinkedList<Integer> window_start_indices_list = new LinkedList<Integer>();
int this_start = 0;
while (this_start < samples.length) {
window_start_indices_list.add(this_start);
this_start += settings.getWindowSize() - settings.getWindowOverlapOffset();
}
Integer[] window_start_indices_I = window_start_indices_list.toArray(new Integer[1]);
int[] window_start_indices = new int[window_start_indices_I.length];
// if were using a progress bar, set its max update
if (updater != null) {
updater.setFileLength(window_start_indices.length);
}
for (int i = 0; i < window_start_indices.length; i++)
window_start_indices[i] = window_start_indices_I[i];
// Extract the feature values from the samples
double[][][] window_feature_values = getFeatures(samples, window_start_indices);
// Find the feature averages and standard deviations if appropriate
if (settings.isSaveOverallRecordingFeatures()) {
aggregatorContainer.addFeaturesExtractors(featureExtractors, featuresToSave);
aggregatorContainer.aggregate(window_feature_values);
return new OverallResult(recordingInfo, aggregatorContainer.getResults());
} else {
return new PerWindowResult(recordingInfo, perWindowFeaturesValuesToMap(window_feature_values));
}
}
private Map<FeatureDefinition, double[][]> perWindowFeaturesValuesToMap(double[][][] windowFeatureValues) {
Map<FeatureDefinition, double[][]> result = new HashMap<FeatureDefinition, double[][]>();
for (int feature = 0; feature < featureExtractors.size(); feature++) {
if (featuresToSave[feature]) {
double[][] featureValueOnWindow = new double[windowFeatureValues.length][];
for (int window = 0; window < windowFeatureValues.length; window++) {
featureValueOnWindow[window] = windowFeatureValues[window][feature];
}
result.put(featureExtractors.get(feature).getFeatureDefinition(), featureValueOnWindow);
}
}
return result;
}
/* PRIVATE METHODS ******************************************************** */
/**
* Fills the featureExtractors, featureExtractorDependencies,
* maxFeatureOffsets and featuresToSave fields. This involves finding
* which features need to be extracted and in what order and finding the
* indices of dependencies and the maximum offsets for each feature.
* <p/>
* Daniel McEnnis 05-07-05 added feature offset of dependancies to
* max_offset
*
* @param allFeatureExtractors All features that can be extracted.
* @param featuresToSaveAmongAll Which features are to be saved. Entries correspond to the
*/
private void findAndOrderFeaturesToExtract(
List<FeatureExtractor> allFeatureExtractors, Boolean[] featuresToSaveAmongAll
) {
// Find the names of all features
// Find dependencies of all features marked to be extracted.
// Mark as null if features are not to be extracted. Note that will also
// be null if there are no dependencies.
String[] allFeatureNames = new String[allFeatureExtractors.size()];
String[][] dependencies = new String[allFeatureExtractors.size()][];
for (int i = 0; i < allFeatureExtractors.size(); i++) {
allFeatureNames[i] = allFeatureExtractors.get(i).getFeatureDefinition().name;
if (featuresToSaveAmongAll[i]) {
dependencies[i] = allFeatureExtractors.get(i).getDependencies();
} else {
dependencies[i] = null;
}
}
// Add dependencies to dependencies and if any features are not marked
// for
// saving but are marked as a dependency of a feature that is marked to
// be
// saved. Also fill featuresToExtract in order to know what features
// to
// extract(but not necessarily save).
boolean done = false;
Boolean[] featuresToExtract = new Boolean[dependencies.length];
System.arraycopy(featuresToSaveAmongAll, 0, featuresToExtract, 0, featuresToExtract.length);
while (!done) {
done = true;
for (int feat = 0; feat < dependencies.length; feat++)
if (dependencies[feat] != null)
for (int i = 0; i < dependencies[feat].length; i++) {
String name = dependencies[feat][i];
for (int j = 0; j < allFeatureNames.length; j++) {
if (name.equals(allFeatureNames[j])) {
if (!featuresToExtract[j]) {
featuresToExtract[j] = true;
dependencies[j] = allFeatureExtractors.get(j).getDependencies();
if (dependencies[j] != null)
done = false;
}
j = allFeatureNames.length;
}
}
}
}
// Find the correct order to extract features in by filling the
// featureExtractors field
int numberFeaturesToExtract = 0;
for (boolean aFeaturesToExtract : featuresToExtract)
if (aFeaturesToExtract)
numberFeaturesToExtract++;
FeatureExtractor[] featureExtractors = new FeatureExtractorBaseImpl[numberFeaturesToExtract];
featuresToSave = new Boolean[numberFeaturesToExtract];
Arrays.fill(featuresToSave, false);
Boolean[] featureAdded = new Boolean[dependencies.length];
Arrays.fill(featureAdded, false);
int currentPosition = 0;
done = false;
while (!done) {
done = true;
// Add all features that have no remaining dependencies and remove
// their dependencies from all not added features
for (int feat = 0; feat < dependencies.length; feat++) {
if (featuresToExtract[feat] && !featureAdded[feat])
if (dependencies[feat] == null) { // addAggregatorList feature if it has no dependencies
featureAdded[feat] = true;
featureExtractors[currentPosition] = allFeatureExtractors.get(feat);
featuresToSave[currentPosition] = featuresToSaveAmongAll[feat];
currentPosition++;
done = false;
// Remove this dependency from all features that have
// it as a dependency and are marked to be extracted
for (int i = 0; i < dependencies.length; i++)
if (featuresToExtract[i] && dependencies[i] != null) {
int num_defs = dependencies[i].length;
for (int j = 0; j < num_defs; j++) {
if (dependencies[i][j].equals(allFeatureNames[feat])) {
if (dependencies[i].length == 1) {
dependencies[i] = null;
j = num_defs;
} else {
String[] temp = new String[dependencies[i].length - 1];
int m = 0;
for (int k = 0; k < dependencies[i].length; k++) {
if (k != j) {
temp[m] = dependencies[i][k];
m++;
}
}
dependencies[i] = temp;
j--;
num_defs--;
}
}
}
}
}
}
}
// Find the indices of the feature extractor dependencies for each
// feature
// extractor
featureExtractorDependencies = new int[featureExtractors.length][];
String[] feature_names = new String[featureExtractors.length];
for (int feat = 0; feat < feature_names.length; feat++) {
feature_names[feat] = featureExtractors[feat]
.getFeatureDefinition().name;
}
String[][] feature_dependencies_str = new String[featureExtractors.length][];
for (int feat = 0; feat < feature_dependencies_str.length; feat++)
feature_dependencies_str[feat] = featureExtractors[feat]
.getDependencies();
for (int i = 0; i < feature_dependencies_str.length; i++)
if (feature_dependencies_str[i] != null) {
featureExtractorDependencies[i] = new int[feature_dependencies_str[i].length];
for (int j = 0; j < feature_dependencies_str[i].length; j++)
for (int k = 0; k < feature_names.length; k++)
if (feature_dependencies_str[i][j]
.equals(feature_names[k]))
featureExtractorDependencies[i][j] = k;
}
// Find the maximum offset for each feature
// Daniel McEnnis 5-07-05 added feature offset of dependancies to
// max_offset
maxFeatureOffsets = new int[featureExtractors.length];
for (int i = 0; i < maxFeatureOffsets.length; i++) {
if (featureExtractors[i].getDependencyOffsets() == null)
maxFeatureOffsets[i] = 0;
else {
int[] theseOffsets = featureExtractors[i].getDependencyOffsets();
maxFeatureOffsets[i] = Math.abs(theseOffsets[0] + maxFeatureOffsets[featureExtractorDependencies[i][0]]);
for (int k = 0; k < theseOffsets.length; k++) {
int val = Math.abs(theseOffsets[k])
+ maxFeatureOffsets[featureExtractorDependencies[i][k]];
if (val > maxFeatureOffsets[i]) {
maxFeatureOffsets[i] = val;
}
}
}
}
this.featureExtractors = Arrays.asList(featureExtractors);
}
/**
* Returns the samples stored in the given audio file.
* <p/>
* The samples are re-encoded using the sampling rate in the settings.samplingRate
* field. All channels are projected into one channel. Samples are
* normalised if the normalise field is true.
*
* @param recordingFile The audio file to extract samples from.
* @return The processed audio samples. Values will fall between a minimum
* of -1 and +1. The indice identifies the sample number.
* @throws Exception An exception is thrown if a problem occurs during file
* reading or pre- processing.
*/
private double[] preProcessRecording(File recordingFile) throws Exception {
// Get the original audio and its format
AudioInputStream originalStream = AudioSystem.getAudioInputStream(recordingFile);
AudioFormat originalFormat = originalStream.getFormat();
// Set the bit depth
int bitDepth = originalFormat.getSampleSizeInBits();
if (bitDepth != 8 && bitDepth != 16) {
bitDepth = 16;
}
// If the audio is not PCM signed big endian, then convert it to PCM
// signed
// This is particularly necessary when dealing with MP3s
AudioInputStream secondStream = originalStream;
if (originalFormat.getEncoding() != AudioFormat.Encoding.PCM_SIGNED || !originalFormat.isBigEndian()) {
AudioFormat newFormat = new AudioFormat(
AudioFormat.Encoding.PCM_SIGNED,
originalFormat.getSampleRate(),
bitDepth,
originalFormat.getChannels(),
originalFormat.getChannels() * (bitDepth / 8),
originalFormat.getSampleRate(),
true);
secondStream = AudioSystem.getAudioInputStream(newFormat, originalStream);
}
// Convert to the set sampling rate, if it is not already at this
// sampling rate.
// Also, convert to an appropriate bit depth if necessary.
AudioInputStream newStream = secondStream;
if (originalFormat.getSampleRate() != settings.getSamplingRate().floatValue()
|| bitDepth != originalFormat.getSampleSizeInBits() ) {
AudioFormat newFormat = new AudioFormat(
AudioFormat.Encoding.PCM_SIGNED,
settings.getSamplingRate().floatValue(),
bitDepth,
originalFormat.getChannels(),
originalFormat.getChannels() * (bitDepth / 8),
originalFormat.getSampleRate(),
true);
newStream = AudioSystem.getAudioInputStream(newFormat, secondStream);
}
// Extract data from the AudioInputStream
AudioSamples audioData = new AudioSamples(newStream, recordingFile.getPath(), false);
// Normalise samples if this option has been requested
if (settings.isNormalize()) {
audioData.normalizeMixedDownSamples();
}
// Return all channels compressed into one
return audioData.getSamplesMixedDown();
}
/**
* Breaks the given samples into the appropriate windows and extracts
* features from each window.
*
* @param samples The samples to extract features from. Sample values should
* generally be between -1 and +1.
* @param window_start_indices The indices of samples that correspond to where each window
* should start.
* @return The extracted feature values for this recording. The first indice
* identifies the window, the second identifies the feature and the
* third identifies the feature value. The third dimension will be
* null if the given feature could not be extracted for the given
* window.
* @throws Exception Throws an exception if a problem occurs.
*/
private double[][][] getFeatures(double[] samples, int[] window_start_indices) throws Exception {
// The extracted feature values for this recording. The first indice identifies the window, the second
// identifies the feature and the third identifies the feature value.
double[][][] results = new double[window_start_indices.length][featureExtractors.size()][];
// Calculate how frequently to make updates to the updater;
int updateThreshold = 1;
if (window_start_indices.length > 100) {
updateThreshold = window_start_indices.length / 100;
}
// Extract features from each window one by one and addAggregatorList save the results.
// The last window is zero-padded at the end if it falls off the edge of the provided samples.
for (int win = 0; win < window_start_indices.length; win++) {
// Do we need to update the progress bar or not
if ((updater != null) && (win % updateThreshold == 0)) {
updater.announceUpdate(win);
if (cancel.isCancel()) {
throw new RuntimeException("Killed while processing features");
}
}
// Find the samples in this window and zero-pad if necessary
double[] window = new double[settings.getWindowSize()];
int start_sample = window_start_indices[win];
int end_sample = start_sample + settings.getWindowSize() - 1;
if (end_sample < samples.length)
for (int samp = start_sample; samp <= end_sample; samp++)
window[samp - start_sample] = samples[samp];
else
for (int samp = start_sample; samp <= end_sample; samp++) {
if (samp < samples.length)
window[samp - start_sample] = samples[samp];
else
window[samp - start_sample] = 0.0;
}
// Extract the features one by one
for (int feat = 0; feat < featureExtractors.size(); feat++) {
// Only extract this feature if enough previous information is available to extract this feature
if (win >= maxFeatureOffsets[feat]) {
// Find the correct feature
FeatureExtractor feature = featureExtractors.get(feat);
// Find previously extracted feature values that this feature needs
double[][] otherFeatureValues = null;
if (featureExtractorDependencies[feat] != null) {
otherFeatureValues = new double[featureExtractorDependencies[feat].length][];
for (int i = 0; i < featureExtractorDependencies[feat].length; i++) {
int feature_indice = featureExtractorDependencies[feat][i];
int offset = feature.getDependencyOffsets()[i];
otherFeatureValues[i] = results[win + offset][feature_indice];
}
}
// Store the extracted feature values
results[win][feat] = feature.extractFeature(window, settings.getSamplingRate(), otherFeatureValues);
} else
results[win][feat] = null;
}
}
return results;
}
public Settings getSettings() {
return settings;
}
}