StereoDelayAnalyzer.java
package org.hammer.audio.localization;
import org.hammer.audio.analysis.AnalysisModule;
import org.hammer.audio.core.AudioBlock;
/** Estimates inter-channel delay in stereo audio using normalized cross-correlation. */
public final class StereoDelayAnalyzer implements AnalysisModule<StereoDelaySnapshot> {
public static final double DEFAULT_MICROPHONE_SPACING_METERS = 0.20;
public static final double DEFAULT_SPEED_OF_SOUND_METERS_PER_SECOND = 343.0;
private static final double DEFAULT_MIN_CONFIDENCE = 0.35;
private static final double SILENCE_RMS_THRESHOLD = 1.0e-4;
private final double microphoneSpacingMeters;
private final double speedOfSoundMetersPerSecond;
private final double minimumConfidence;
public StereoDelayAnalyzer() {
this(
DEFAULT_MICROPHONE_SPACING_METERS,
DEFAULT_SPEED_OF_SOUND_METERS_PER_SECOND,
DEFAULT_MIN_CONFIDENCE);
}
public StereoDelayAnalyzer(
double microphoneSpacingMeters,
double speedOfSoundMetersPerSecond,
double minimumConfidence) {
if (!(microphoneSpacingMeters > 0.0) || !Double.isFinite(microphoneSpacingMeters)) {
throw new IllegalArgumentException(
"microphoneSpacingMeters must be finite and > 0, was " + microphoneSpacingMeters);
}
if (!(speedOfSoundMetersPerSecond > 0.0) || !Double.isFinite(speedOfSoundMetersPerSecond)) {
throw new IllegalArgumentException(
"speedOfSoundMetersPerSecond must be finite and > 0, was " + speedOfSoundMetersPerSecond);
}
if (minimumConfidence < 0.0 || minimumConfidence > 1.0 || !Double.isFinite(minimumConfidence)) {
throw new IllegalArgumentException("minimumConfidence must be finite and in [0,1]");
}
this.microphoneSpacingMeters = microphoneSpacingMeters;
this.speedOfSoundMetersPerSecond = speedOfSoundMetersPerSecond;
this.minimumConfidence = minimumConfidence;
}
@Override
public StereoDelaySnapshot analyze(AudioBlock block) {
if (block.channels() < 2) {
return invalid(block, StereoDelayStatus.MONO_INPUT, 0, 0.0, new float[0], 0);
}
float[] left = block.channelView(0);
float[] right = block.channelView(1);
int frames = Math.min(left.length, right.length);
if (frames == 0
|| rms(left, frames) < SILENCE_RMS_THRESHOLD
|| rms(right, frames) < SILENCE_RMS_THRESHOLD) {
return invalid(block, StereoDelayStatus.SILENCE, 0, 0.0, new float[0], 0);
}
int maxLag = physicallyPossibleLagSamples(block.format().sampleRate(), frames);
int minLag = -maxLag;
float[] correlations = new float[maxLag - minLag + 1];
int bestLag = 0;
double bestCorrelation = 0.0;
for (int lag = minLag; lag <= maxLag; lag++) {
double correlation = normalizedCorrelation(left, right, frames, lag);
correlations[lag - minLag] = (float) correlation;
if (Math.abs(correlation) > Math.abs(bestCorrelation)) {
bestCorrelation = correlation;
bestLag = lag;
}
}
double confidence = Math.abs(bestCorrelation);
double delayMillis = samplesToMillis(bestLag, block.format().sampleRate());
double pathDifference = samplesToPathDifference(bestLag, block.format().sampleRate());
if (confidence < minimumConfidence) {
return invalid(
block, StereoDelayStatus.LOW_CORRELATION, bestLag, confidence, correlations, minLag);
}
if (Math.abs(pathDifference) > microphoneSpacingMeters) {
return invalid(
block,
StereoDelayStatus.DELAY_OUTSIDE_PHYSICAL_RANGE,
bestLag,
confidence,
correlations,
minLag);
}
double ratio = clamp(pathDifference / microphoneSpacingMeters, -1.0, 1.0);
double angleDegrees = Math.toDegrees(Math.asin(ratio));
return new StereoDelaySnapshot(
block.frameIndex(),
block.timestampNanos(),
StereoDelayStatus.VALID,
bestLag,
delayMillis,
pathDifference,
angleDegrees,
confidence,
microphoneSpacingMeters,
speedOfSoundMetersPerSecond,
minLag,
correlations);
}
public double microphoneSpacingMeters() {
return microphoneSpacingMeters;
}
public double speedOfSoundMetersPerSecond() {
return speedOfSoundMetersPerSecond;
}
private StereoDelaySnapshot invalid(
AudioBlock block,
StereoDelayStatus status,
int delaySamples,
double confidence,
float[] correlations,
int minLag) {
return new StereoDelaySnapshot(
block.frameIndex(),
block.timestampNanos(),
status,
delaySamples,
samplesToMillis(delaySamples, block.format().sampleRate()),
samplesToPathDifference(delaySamples, block.format().sampleRate()),
Double.NaN,
confidence,
microphoneSpacingMeters,
speedOfSoundMetersPerSecond,
minLag,
correlations);
}
private static double rms(float[] samples, int frames) {
double sumSquares = 0.0;
for (int i = 0; i < frames; i++) {
sumSquares += samples[i] * samples[i];
}
return Math.sqrt(sumSquares / Math.max(1, frames));
}
private int physicallyPossibleLagSamples(float sampleRate, int frames) {
double maxDelaySeconds = microphoneSpacingMeters / speedOfSoundMetersPerSecond;
int physicalLag = (int) Math.ceil(maxDelaySeconds * sampleRate);
return Math.min(physicalLag, frames - 1);
}
private static double normalizedCorrelation(float[] left, float[] right, int frames, int lag) {
int leftStart = Math.max(0, -lag);
int rightStart = Math.max(0, lag);
int overlap = frames - Math.abs(lag);
if (overlap <= 1) {
return 0.0;
}
double sum = 0.0;
double leftEnergy = 0.0;
double rightEnergy = 0.0;
for (int i = 0; i < overlap; i++) {
double leftSample = left[leftStart + i];
double rightSample = right[rightStart + i];
sum += leftSample * rightSample;
leftEnergy += leftSample * leftSample;
rightEnergy += rightSample * rightSample;
}
double denominator = Math.sqrt(leftEnergy * rightEnergy);
return denominator > 0.0 ? sum / denominator : 0.0;
}
private double samplesToMillis(int samples, float sampleRate) {
return 1000.0 * samples / sampleRate;
}
private double samplesToPathDifference(int samples, float sampleRate) {
return speedOfSoundMetersPerSecond * samples / sampleRate;
}
private static double clamp(double value, double min, double max) {
return Math.max(min, Math.min(max, value));
}
}