Initialize the stentor-gateway project with WebSocket-based voice pipeline orchestrating STT → Agent → TTS via OpenAI-compatible APIs. - Add FastAPI app with WebSocket endpoint for audio streaming - Add pipeline orchestration (stt_client, tts_client, agent_client) - Add Pydantic Settings configuration and message models - Add audio utilities for PCM/WAV conversion and resampling - Add health check endpoints - Add Dockerfile and pyproject.toml with dependencies - Add initial test suite (pipeline, STT, TTS, WebSocket) - Add comprehensive README covering gateway and ESP32 ear design - Clean up .gitignore for Python/uv project
121 lines
4.3 KiB
Python
121 lines
4.3 KiB
Python
"""Tests for the voice pipeline orchestrator."""
|
|
|
|
import struct
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
|
|
from stentor.agent_client import AgentClient
|
|
from stentor.pipeline import Pipeline, PipelineState
|
|
from stentor.stt_client import STTClient
|
|
from stentor.tts_client import TTSClient
|
|
|
|
|
|
class TestPipeline:
|
|
"""Tests for the Pipeline orchestrator."""
|
|
|
|
@pytest.fixture
|
|
def mock_stt(self):
|
|
"""Create a mock STT client."""
|
|
stt = AsyncMock(spec=STTClient)
|
|
stt.transcribe.return_value = "What is the weather?"
|
|
return stt
|
|
|
|
@pytest.fixture
|
|
def mock_tts(self):
|
|
"""Create a mock TTS client."""
|
|
tts = AsyncMock(spec=TTSClient)
|
|
# Return 100 samples of silence as PCM (at 24kHz, will be resampled)
|
|
tts.synthesize.return_value = struct.pack("<100h", *([0] * 100))
|
|
return tts
|
|
|
|
@pytest.fixture
|
|
def mock_agent(self):
|
|
"""Create a mock agent client."""
|
|
agent = AsyncMock(spec=AgentClient)
|
|
agent.send_message.return_value = "I don't have weather tools yet."
|
|
return agent
|
|
|
|
@pytest.fixture
|
|
def pipeline(self, settings, mock_stt, mock_tts, mock_agent):
|
|
"""Create a pipeline with mock clients."""
|
|
state = PipelineState()
|
|
return Pipeline(settings, mock_stt, mock_tts, mock_agent, state)
|
|
|
|
async def test_full_pipeline(self, pipeline, sample_pcm, mock_stt, mock_tts, mock_agent):
|
|
"""Test the complete pipeline produces expected event sequence."""
|
|
events = []
|
|
async for event in pipeline.process(sample_pcm):
|
|
events.append(event)
|
|
|
|
# Verify event sequence
|
|
event_types = [e.type for e in events]
|
|
|
|
assert "status" in event_types # transcribing status
|
|
assert "transcript.done" in event_types
|
|
assert "response.text.done" in event_types
|
|
assert "response.audio.delta" in event_types or "response.audio.done" in event_types
|
|
assert "response.done" in event_types
|
|
|
|
# Verify services were called
|
|
mock_stt.transcribe.assert_called_once()
|
|
mock_agent.send_message.assert_called_once_with("What is the weather?")
|
|
mock_tts.synthesize.assert_called_once_with("I don't have weather tools yet.")
|
|
|
|
async def test_pipeline_empty_transcript(self, settings, mock_tts, mock_agent):
|
|
"""Test pipeline handles empty transcript gracefully."""
|
|
mock_stt = AsyncMock(spec=STTClient)
|
|
mock_stt.transcribe.return_value = ""
|
|
|
|
state = PipelineState()
|
|
pipeline = Pipeline(settings, mock_stt, mock_tts, mock_agent, state)
|
|
|
|
events = []
|
|
sample_pcm = struct.pack("<100h", *([0] * 100))
|
|
async for event in pipeline.process(sample_pcm):
|
|
events.append(event)
|
|
|
|
event_types = [e.type for e in events]
|
|
assert "error" in event_types
|
|
|
|
# Agent and TTS should NOT have been called
|
|
mock_agent.send_message.assert_not_called()
|
|
mock_tts.synthesize.assert_not_called()
|
|
|
|
async def test_pipeline_empty_agent_response(self, settings, mock_stt, mock_tts):
|
|
"""Test pipeline handles empty agent response."""
|
|
mock_agent = AsyncMock(spec=AgentClient)
|
|
mock_agent.send_message.return_value = ""
|
|
|
|
state = PipelineState()
|
|
pipeline = Pipeline(settings, mock_stt, mock_tts, mock_agent, state)
|
|
|
|
events = []
|
|
sample_pcm = struct.pack("<100h", *([0] * 100))
|
|
async for event in pipeline.process(sample_pcm):
|
|
events.append(event)
|
|
|
|
event_types = [e.type for e in events]
|
|
assert "error" in event_types
|
|
mock_tts.synthesize.assert_not_called()
|
|
|
|
async def test_pipeline_metrics_recorded(self, pipeline, sample_pcm):
|
|
"""Test that pipeline metrics are recorded."""
|
|
state = pipeline._state
|
|
|
|
assert state.total_transcriptions == 0
|
|
|
|
events = []
|
|
async for event in pipeline.process(sample_pcm):
|
|
events.append(event)
|
|
|
|
assert state.total_transcriptions == 1
|
|
assert state.total_agent_requests == 1
|
|
assert state.total_tts_requests == 1
|
|
assert len(state.recent_metrics) == 1
|
|
|
|
last = state.recent_metrics[-1]
|
|
assert last.total_duration > 0
|
|
assert last.transcript == "What is the weather?"
|
|
assert last.response_text == "I don't have weather tools yet."
|