""" Tests for the audio classifier. Tests spectral analysis, DTMF detection, and classification logic. """ import numpy as np import pytest from config import ClassifierSettings from models.call import AudioClassification from services.audio_classifier import AudioClassifier, SAMPLE_RATE @pytest.fixture def classifier(): """Create a classifier with default settings.""" settings = ClassifierSettings() return AudioClassifier(settings) def generate_silence(duration_seconds: float = 1.0) -> bytes: """Generate silent audio (near-zero amplitude).""" samples = int(SAMPLE_RATE * duration_seconds) data = np.zeros(samples, dtype=np.int16) return data.tobytes() def generate_tone(frequency: float, duration_seconds: float = 1.0, amplitude: float = 0.5) -> bytes: """Generate a pure sine tone.""" samples = int(SAMPLE_RATE * duration_seconds) t = np.linspace(0, duration_seconds, samples, endpoint=False) signal = (amplitude * 32767 * np.sin(2 * np.pi * frequency * t)).astype(np.int16) return signal.tobytes() def generate_dtmf(digit: str, duration_seconds: float = 0.5) -> bytes: """Generate a DTMF tone for a digit.""" dtmf_freqs = { "1": (697, 1209), "2": (697, 1336), "3": (697, 1477), "4": (770, 1209), "5": (770, 1336), "6": (770, 1477), "7": (852, 1209), "8": (852, 1336), "9": (852, 1477), "*": (941, 1209), "0": (941, 1336), "#": (941, 1477), } low_freq, high_freq = dtmf_freqs[digit] samples = int(SAMPLE_RATE * duration_seconds) t = np.linspace(0, duration_seconds, samples, endpoint=False) signal = 0.5 * (np.sin(2 * np.pi * low_freq * t) + np.sin(2 * np.pi * high_freq * t)) signal = (signal * 16383).astype(np.int16) return signal.tobytes() def generate_noise(duration_seconds: float = 1.0, amplitude: float = 0.3) -> bytes: """Generate white noise.""" samples = int(SAMPLE_RATE * duration_seconds) noise = np.random.normal(0, amplitude * 32767, samples).astype(np.int16) return noise.tobytes() def generate_speech_like(duration_seconds: float = 1.0) -> bytes: """ Generate a rough approximation of speech. Mix of formant-like frequencies with amplitude modulation. """ samples = int(SAMPLE_RATE * duration_seconds) t = np.linspace(0, duration_seconds, samples, endpoint=False) # Fundamental frequency (pitch) with vibrato f0 = 150 + 10 * np.sin(2 * np.pi * 5 * t) fundamental = np.sin(2 * np.pi * f0 * t) # Formants (vowel-like) f1 = np.sin(2 * np.pi * 730 * t) * 0.5 f2 = np.sin(2 * np.pi * 1090 * t) * 0.3 f3 = np.sin(2 * np.pi * 2440 * t) * 0.1 # Amplitude modulation (syllable-like rhythm) envelope = 0.5 + 0.5 * np.sin(2 * np.pi * 3 * t) signal = envelope * (fundamental + f1 + f2 + f3) signal = (signal * 8000).astype(np.int16) return signal.tobytes() class TestSilenceDetection: """Test silence classification.""" def test_pure_silence(self, classifier): result = classifier.classify_chunk(generate_silence()) assert result.audio_type == AudioClassification.SILENCE assert result.confidence > 0.5 def test_very_quiet(self, classifier): # Near-silent audio quiet = generate_tone(440, amplitude=0.001) result = classifier.classify_chunk(quiet) assert result.audio_type == AudioClassification.SILENCE def test_empty_audio(self, classifier): result = classifier.classify_chunk(b"") assert result.audio_type == AudioClassification.SILENCE class TestToneDetection: """Test tonal audio classification.""" def test_440hz_ringback(self, classifier): """440Hz is North American ring-back tone frequency.""" tone = generate_tone(440, amplitude=0.3) result = classifier.classify_chunk(tone) # Should be detected as ringing (440Hz is in the ring-back range) assert result.audio_type in ( AudioClassification.RINGING, AudioClassification.MUSIC, ) assert result.confidence > 0.5 def test_1000hz_tone(self, classifier): """1000Hz tone — not ring-back, should be music or unknown.""" tone = generate_tone(1000, amplitude=0.3) result = classifier.classify_chunk(tone) assert result.audio_type != AudioClassification.SILENCE class TestDTMFDetection: """Test DTMF tone detection.""" def test_dtmf_digit_5(self, classifier): dtmf = generate_dtmf("5", duration_seconds=0.5) result = classifier.classify_chunk(dtmf) # DTMF detection should catch this if result.audio_type == AudioClassification.DTMF: assert result.details.get("dtmf_digit") == "5" def test_dtmf_digit_0(self, classifier): dtmf = generate_dtmf("0", duration_seconds=0.5) result = classifier.classify_chunk(dtmf) if result.audio_type == AudioClassification.DTMF: assert result.details.get("dtmf_digit") == "0" class TestMusicDetection: """Test hold music detection.""" def test_complex_tone_as_music(self, classifier): """Multiple frequencies together = more music-like.""" samples = int(SAMPLE_RATE * 2) t = np.linspace(0, 2, samples, endpoint=False) # Chord: C major (C4 + E4 + G4) signal = ( np.sin(2 * np.pi * 261.6 * t) + np.sin(2 * np.pi * 329.6 * t) * 0.8 + np.sin(2 * np.pi * 392.0 * t) * 0.6 ) signal = (signal * 6000).astype(np.int16) result = classifier.classify_chunk(signal.tobytes()) assert result.audio_type in ( AudioClassification.MUSIC, AudioClassification.RINGING, AudioClassification.UNKNOWN, ) assert result.confidence > 0.3 class TestSpeechDetection: """Test speech-like audio classification.""" def test_speech_like_audio(self, classifier): speech = generate_speech_like(2.0) result = classifier.classify_chunk(speech) assert result.audio_type in ( AudioClassification.IVR_PROMPT, AudioClassification.LIVE_HUMAN, AudioClassification.MUSIC, # Speech-like can be ambiguous AudioClassification.UNKNOWN, ) class TestClassificationHistory: """Test history-based transition detection.""" def test_hold_to_human_transition(self, classifier): """Detect the music → speech transition.""" # Simulate being on hold for _ in range(10): classifier.update_history(AudioClassification.MUSIC) # Now speech appears classifier.update_history(AudioClassification.LIVE_HUMAN) classifier.update_history(AudioClassification.LIVE_HUMAN) classifier.update_history(AudioClassification.LIVE_HUMAN) assert classifier.detect_hold_to_human_transition() def test_no_transition_during_ivr(self, classifier): """IVR prompt after silence is not a hold→human transition.""" for _ in range(5): classifier.update_history(AudioClassification.SILENCE) classifier.update_history(AudioClassification.IVR_PROMPT) classifier.update_history(AudioClassification.IVR_PROMPT) classifier.update_history(AudioClassification.IVR_PROMPT) # No music in history, so no hold→human transition assert not classifier.detect_hold_to_human_transition() def test_not_enough_history(self, classifier): """Not enough data to detect transition.""" classifier.update_history(AudioClassification.MUSIC) classifier.update_history(AudioClassification.LIVE_HUMAN) assert not classifier.detect_hold_to_human_transition() class TestFeatureExtraction: """Test individual feature extractors.""" def test_rms_silence(self, classifier): samples = np.zeros(1000, dtype=np.float32) rms = classifier._compute_rms(samples) assert rms == 0.0 def test_rms_loud(self, classifier): samples = np.ones(1000, dtype=np.float32) * 0.5 rms = classifier._compute_rms(samples) assert rms == pytest.approx(0.5, abs=0.01) def test_zcr_silence(self, classifier): samples = np.zeros(1000, dtype=np.float32) zcr = classifier._compute_zero_crossing_rate(samples) assert zcr == 0.0 def test_zcr_high_freq(self, classifier): """High frequency signal should have high ZCR.""" t = np.linspace(0, 1, SAMPLE_RATE, endpoint=False) samples = np.sin(2 * np.pi * 4000 * t).astype(np.float32) zcr = classifier._compute_zero_crossing_rate(samples) assert zcr > 0.1 def test_spectral_flatness_tone(self, classifier): """Pure tone should have low spectral flatness.""" t = np.linspace(0, 1, SAMPLE_RATE, endpoint=False) samples = np.sin(2 * np.pi * 440 * t).astype(np.float32) flatness = classifier._compute_spectral_flatness(samples) assert flatness < 0.3 def test_dominant_frequency(self, classifier): """Should find the dominant frequency of a pure tone.""" t = np.linspace(0, 1, SAMPLE_RATE, endpoint=False) samples = np.sin(2 * np.pi * 1000 * t).astype(np.float32) freq = classifier._compute_dominant_frequency(samples) assert abs(freq - 1000) < 50 # Within 50Hz