feat: scaffold stentor-gateway with FastAPI voice pipeline

Initialize the stentor-gateway project with WebSocket-based voice
pipeline orchestrating STT → Agent → TTS via OpenAI-compatible APIs.

- Add FastAPI app with WebSocket endpoint for audio streaming
- Add pipeline orchestration (stt_client, tts_client, agent_client)
- Add Pydantic Settings configuration and message models
- Add audio utilities for PCM/WAV conversion and resampling
- Add health check endpoints
- Add Dockerfile and pyproject.toml with dependencies
- Add initial test suite (pipeline, STT, TTS, WebSocket)
- Add comprehensive README covering gateway and ESP32 ear design
- Clean up .gitignore for Python/uv project
This commit is contained in:
2026-03-21 19:11:48 +00:00
parent 9ba9435883
commit 912593b796
27 changed files with 3985 additions and 138 deletions

View File

@@ -0,0 +1,120 @@
"""Tests for the voice pipeline orchestrator."""
import struct
from unittest.mock import AsyncMock
import pytest
from stentor.agent_client import AgentClient
from stentor.pipeline import Pipeline, PipelineState
from stentor.stt_client import STTClient
from stentor.tts_client import TTSClient
class TestPipeline:
"""Tests for the Pipeline orchestrator."""
@pytest.fixture
def mock_stt(self):
"""Create a mock STT client."""
stt = AsyncMock(spec=STTClient)
stt.transcribe.return_value = "What is the weather?"
return stt
@pytest.fixture
def mock_tts(self):
"""Create a mock TTS client."""
tts = AsyncMock(spec=TTSClient)
# Return 100 samples of silence as PCM (at 24kHz, will be resampled)
tts.synthesize.return_value = struct.pack("<100h", *([0] * 100))
return tts
@pytest.fixture
def mock_agent(self):
"""Create a mock agent client."""
agent = AsyncMock(spec=AgentClient)
agent.send_message.return_value = "I don't have weather tools yet."
return agent
@pytest.fixture
def pipeline(self, settings, mock_stt, mock_tts, mock_agent):
"""Create a pipeline with mock clients."""
state = PipelineState()
return Pipeline(settings, mock_stt, mock_tts, mock_agent, state)
async def test_full_pipeline(self, pipeline, sample_pcm, mock_stt, mock_tts, mock_agent):
"""Test the complete pipeline produces expected event sequence."""
events = []
async for event in pipeline.process(sample_pcm):
events.append(event)
# Verify event sequence
event_types = [e.type for e in events]
assert "status" in event_types # transcribing status
assert "transcript.done" in event_types
assert "response.text.done" in event_types
assert "response.audio.delta" in event_types or "response.audio.done" in event_types
assert "response.done" in event_types
# Verify services were called
mock_stt.transcribe.assert_called_once()
mock_agent.send_message.assert_called_once_with("What is the weather?")
mock_tts.synthesize.assert_called_once_with("I don't have weather tools yet.")
async def test_pipeline_empty_transcript(self, settings, mock_tts, mock_agent):
"""Test pipeline handles empty transcript gracefully."""
mock_stt = AsyncMock(spec=STTClient)
mock_stt.transcribe.return_value = ""
state = PipelineState()
pipeline = Pipeline(settings, mock_stt, mock_tts, mock_agent, state)
events = []
sample_pcm = struct.pack("<100h", *([0] * 100))
async for event in pipeline.process(sample_pcm):
events.append(event)
event_types = [e.type for e in events]
assert "error" in event_types
# Agent and TTS should NOT have been called
mock_agent.send_message.assert_not_called()
mock_tts.synthesize.assert_not_called()
async def test_pipeline_empty_agent_response(self, settings, mock_stt, mock_tts):
"""Test pipeline handles empty agent response."""
mock_agent = AsyncMock(spec=AgentClient)
mock_agent.send_message.return_value = ""
state = PipelineState()
pipeline = Pipeline(settings, mock_stt, mock_tts, mock_agent, state)
events = []
sample_pcm = struct.pack("<100h", *([0] * 100))
async for event in pipeline.process(sample_pcm):
events.append(event)
event_types = [e.type for e in events]
assert "error" in event_types
mock_tts.synthesize.assert_not_called()
async def test_pipeline_metrics_recorded(self, pipeline, sample_pcm):
"""Test that pipeline metrics are recorded."""
state = pipeline._state
assert state.total_transcriptions == 0
events = []
async for event in pipeline.process(sample_pcm):
events.append(event)
assert state.total_transcriptions == 1
assert state.total_agent_requests == 1
assert state.total_tts_requests == 1
assert len(state.recent_metrics) == 1
last = state.recent_metrics[-1]
assert last.total_duration > 0
assert last.transcript == "What is the weather?"
assert last.response_text == "I don't have weather tools yet."