feat: scaffold stentor-gateway with FastAPI voice pipeline

Initialize the stentor-gateway project with WebSocket-based voice pipeline orchestrating STT → Agent → TTS via OpenAI-compatible APIs. - Add FastAPI app with WebSocket endpoint for audio streaming - Add pipeline orchestration (stt_client, tts_client, agent_client) - Add Pydantic Settings configuration and message models - Add audio utilities for PCM/WAV conversion and resampling - Add health check endpoints - Add Dockerfile and pyproject.toml with dependencies - Add initial test suite (pipeline, STT, TTS, WebSocket) - Add comprehensive README covering gateway and ESP32 ear design - Clean up .gitignore for Python/uv project
2026-03-21 19:11:48 +00:00
parent 9ba9435883
commit 912593b796
27 changed files with 3985 additions and 138 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,4 @@
-# ---> Python
+# Python bytecode
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
@@ -8,169 +7,74 @@ __pycache__/
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
-.installed.cfg
+pip-wheel-metadata/
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
-# Unit test / coverage reports
+# Unit test / coverage
-htmlcov/
+.pytest_cache/
-.tox/
+.cache
 .nox/
 .coverage
 .coverage.*
-.cache
+htmlcov/
 nosetests.xml
 coverage.xml
-*.cover
+nose2-junit.xml
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
-# Translations
+# Virtual environments
-*.mo
+.venv/
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # UV
 #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #uv.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
 .pdm-python
 .pdm-build/
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env/
 env.bak/
 venv.bak/
-# Spyder project settings
+# Environment files
-.spyderproject
+.env
-.spyproject
+.env.*
 credentials.json
-# Rope project settings
+# IDEs and editors
-.ropeproject
+.vscode/
 .idea/
 *.iml
-# mkdocs documentation
+# Type checkers
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
-# pytype static type analyzer
+# Jupyter
-.pytype/
+.ipynb_checkpoints
-# Cython debug symbols
+# Logs
-cython_debug/
+*.log
-# PyCharm
+# System files
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+.DS_Store
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+Thumbs.db
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
-# Ruff stuff:
+# Node
-.ruff_cache/
+node_modules/
-# PyPI configuration file
+# Docker
-.pypirc
+docker-compose.override.yml
 # ESP-IDF / CMake / embedded build artifacts
 sdkconfig
 sdkconfig.*
 build/
 components/**/build/
 components/**/.build/
 partitions.csv
 # Misc
 *.sqlite3
 *.db
--- a/README.md
+++ b/README.md
@@ -1,2 +1,604 @@
 # stentor
 # Stentor — Voice Gateway Specification
 > *\"Stentor, whose voice was as powerful as fifty voices of other men.\"*
 > — Homer, *Iliad*, Book V
 **Stentor** is a voice gateway that connects physical audio hardware (ESP32-S3-AUDIO-Board) to AI agents (FastAgent) via speech services (Speaches), enabling real-time voice conversations with AI.
 ---
 ## Architecture Overview
 ```
 ┌──────────────────────────────────┐
 │  ESP32-S3-AUDIO-Board            │
 │  (stentor-ear)                   │
 │                                  │
 │  Mic Array ──▶ Wake Word ──▶ WS  │
 │  Speaker   ◀── PCM playback ◀──  │
 └──────────────┬───────────────────┘
               │ WebSocket (audio)
               ▼
 ┌──────────────────────────────────┐
 │  Stentor Gateway                 │
 │  (Python service)                │
 │                                  │
 │  WS Server ──▶ STT ──▶ Agent    │
 │  WS Server ◀── TTS ◀── Agent    │
 └──────┬──────────┬──────────┬─────┘
       │          │          │
       ▼          ▼          ▼
   Speaches    FastAgent   Speaches
   STT         (HTTP)      TTS
 ```
 ---
 ## Components
 ### 1. Stentor Gateway (`stentor-gateway/`)
 **Language:** Python 3.12+  
 **Framework:** FastAPI + WebSockets  
 **Deployment:** Agathos container (Caliban or dedicated)  
 The gateway is the central orchestrator. It:
 - Accepts WebSocket connections from ESP32 clients
 - Streams audio to Speaches STT for transcription
 - Sends transcripts to FastAgent for AI response
 - Sends AI response text to Speaches TTS for synthesis
 - Streams synthesized audio back to the ESP32 client
 #### Configuration
 All configuration via environment variables (12-factor):
 | Variable | Description | Default |
 |----------|-------------|---------|
 | `STENTOR_HOST` | Gateway bind address | `0.0.0.0` |
 | `STENTOR_PORT` | Gateway bind port | `8600` |
 | `STENTOR_STT_URL` | Speaches STT endpoint (OpenAI-compatible) | `http://perseus.incus:8000` |
 | `STENTOR_TTS_URL` | Speaches TTS endpoint (OpenAI-compatible) | `http://pan.incus:8000` |
 | `STENTOR_AGENT_URL` | FastAgent HTTP endpoint | `http://localhost:8001` |
 | `STENTOR_STT_MODEL` | Whisper model for STT | `Systran/faster-whisper-small` |
 | `STENTOR_TTS_MODEL` | TTS model name | `kokoro` |
 | `STENTOR_TTS_VOICE` | TTS voice ID | `af_heart` |
 | `STENTOR_AUDIO_SAMPLE_RATE` | Audio sample rate in Hz | `16000` |
 | `STENTOR_AUDIO_CHANNELS` | Audio channel count | `1` |
 | `STENTOR_AUDIO_SAMPLE_WIDTH` | Bits per sample | `16` |
 | `STENTOR_LOG_LEVEL` | Logging level | `INFO` |
 #### WebSocket Protocol
 Endpoint: `ws://{STENTOR_HOST}:{STENTOR_PORT}/ws/audio`
 ##### Client → Gateway Messages
 ```json
 // Session start (sent once on connect)
 {
  \"type\": \"session.start\",
  \"client_id\": \"esp32-kitchen\",
  \"audio_config\": {
    \"sample_rate\": 16000,
    \"channels\": 1,
    \"sample_width\": 16,
    \"encoding\": \"pcm_s16le\"
  }
 }
 // Audio data (sent continuously while listening)
 {
  \"type\": \"audio.chunk\",
  \"data\": \"<base64-encoded PCM audio>\"
 }
 // Client signals end of speech
 {
  \"type\": \"audio.end\"
 }
 // Client requests session termination
 {
  \"type\": \"session.end\"
 }
 ```
 ##### Gateway → Client Messages
 ```json
 // Acknowledge session
 {
  \"type\": \"session.ready\",
  \"session_id\": \"uuid\"
 }
 // Gateway is processing (ESP32 can show \"thinking\" LED)
 {
  \"type\": \"status\",
  \"state\": \"listening\" | \"transcribing\" | \"thinking\" | \"speaking\"
 }
 // Transcript of what user said (for debugging/display)
 {
  \"type\": \"transcript\",
  \"text\": \"What is the weather like today?\"
 }
 // AI response text (for debugging/display)
 {
  \"type\": \"response.text\",
  \"text\": \"I don\\'t have weather tools yet, but...\"
 }
 // Audio response (streamed in chunks)
 {
  \"type\": \"audio.chunk\",
  \"data\": \"<base64-encoded PCM audio>\"
 }
 // Audio response complete
 {
  \"type\": \"audio.end\"
 }
 // Error
 {
  \"type\": \"error\",
  \"message\": \"STT service unavailable\",
  \"code\": \"stt_error\"
 }
 ```
 #### Processing Pipeline
 ```
 1. ESP32 detects wake word locally (ESP-SR)
 2. ESP32 opens WebSocket, sends session.start
 3. Gateway sends session.ready
 4. ESP32 streams audio.chunk messages
 5. Gateway buffers audio (with Voice Activity Detection)
 6. ESP32 sends audio.end (or VAD detects silence)
 7. Gateway sends status: \"transcribing\"
 8. Gateway calls Speaches STT:
   POST {STENTOR_STT_URL}/v1/audio/transcriptions
   - model: {STENTOR_STT_MODEL}
   - file: collected audio as WAV
   - response_format: json
 9. Gateway sends transcript message to client
 10. Gateway sends status: \"thinking\"
 11. Gateway calls FastAgent:
    POST {STENTOR_AGENT_URL}/message
    - content: transcript text
 12. Gateway sends response.text to client
 13. Gateway sends status: \"speaking\"
 14. Gateway calls Speaches TTS:
    POST {STENTOR_TTS_URL}/v1/audio/speech
    - model: {STENTOR_TTS_MODEL}
    - voice: {STENTOR_TTS_VOICE}
    - input: agent response text
    - response_format: pcm
    - speed: 1.0
 15. Gateway streams audio.chunk messages to client
 16. Gateway sends audio.end
 17. Gateway returns to listening state (step 4)
    OR ESP32 sends session.end to disconnect
 ```
 #### Health & Observability
 - `GET /health` — Returns 200 if gateway is running
 - `GET /health/ready` — Returns 200 if STT, TTS, and Agent endpoints are reachable
 - `GET /metrics` — Prometheus-compatible metrics
  - `stentor_sessions_active` — Current active WebSocket sessions
  - `stentor_transcriptions_total` — Total STT calls
  - `stentor_tts_requests_total` — Total TTS calls
  - `stentor_agent_requests_total` — Total agent calls
  - `stentor_pipeline_duration_seconds` — Histogram of full pipeline latency
  - `stentor_stt_duration_seconds` — Histogram of STT latency
  - `stentor_tts_duration_seconds` — Histogram of TTS latency
  - `stentor_agent_duration_seconds` — Histogram of agent response latency
 #### Dependencies (Python)
 ```toml
 [project]
 name = \"stentor-gateway\"
 version = \"0.1.0\"
 requires-python = \">=3.12\"
 dependencies = [
    \"fastapi>=0.115\",
    \"uvicorn[standard]>=0.34\",
    \"websockets>=14.0\",
    \"httpx>=0.28\",
    \"pydantic>=2.10\",
    \"pydantic-settings>=2.7\",
 ]
 [project.optional-dependencies]
 dev = [
    \"pytest>=8.0\",
    \"pytest-asyncio>=0.25\",
    \"ruff>=0.9\",
 ]
 ```
 #### Project Structure
 ```
 stentor-gateway/
 ├── pyproject.toml
 ├── src/
 │   └── stentor/
 │       ├── __init__.py
 │       ├── main.py              # FastAPI app, WebSocket endpoint
 │       ├── config.py            # Pydantic Settings configuration
 │       ├── models.py            # WebSocket message models
 │       ├── pipeline.py          # Orchestration: STT → Agent → TTS
 │       ├── stt_client.py        # Speaches STT client (OpenAI-compatible)
 │       ├── tts_client.py        # Speaches TTS client (OpenAI-compatible)
 │       ├── agent_client.py      # FastAgent HTTP client
 │       ├── audio.py             # Audio utilities (PCM↔WAV, resampling)
 │       └── health.py            # Health check endpoints
 ├── tests/
 │   ├── test_pipeline.py
 │   ├── test_stt_client.py
 │   ├── test_tts_client.py
 │   └── test_websocket.py
 └── Dockerfile
 ```
 ---
 ### 2. Stentor Ear (`stentor-ear/`)
 **Platform:** ESP32-S3-AUDIO-Board (Waveshare)  
 **Framework:** ESP-IDF 5.x + ESP-ADF  
 **Language:** C  
 The ESP32 firmware handles local audio capture, wake word detection, and communication with the Stentor Gateway.
 #### Hardware Capabilities (Waveshare ESP32-S3-AUDIO-Board)
 | Component | Chip | Purpose |
 |-----------|------|---------|
 | Microphone array | 2× MEMS via ES7210 ADC | Audio capture, noise reduction |
 | Speaker output | ES8311 DAC + Class-D amp | Audio playback |
 | Display | 1.85\" LCD (ST7789V2) | Status display (optional) |
 | LEDs | WS2812B RGB ring (×2) | Visual status feedback |
 | Storage | TF card slot | Local audio cache (optional) |
 | Camera | OV2640 (2MP) | Not used for Stentor |
 | RTC | PCF85063 | Timestamping (optional) |
 | Connectivity | WiFi 802.11 b/g/n + BLE 5 | Network communication |
 | Wake word | ESP-SR | Local \"Hi ESP\" detection |
 Reference: Waveshare wiki documents ESP-SR wake word \"hi esp\" with command recognition support [2].
 #### Firmware State Machine
 ```
                    ┌──────────┐
         ┌─────────│  IDLE    │◀──────────┐
         │         │ (LEDs: dim blue)     │
         │         └────┬─────┘          │
         │              │ Wake word       │
         │              │ detected        │
         │              ▼                 │
         │         ┌──────────┐          │
         │         │ CONNECT  │          │
         │         │ (LEDs: pulse white) │
         │         └────┬─────┘          │
         │              │ WS connected    │
         │              ▼                 │
         │         ┌──────────┐          │
  Timeout│         │ LISTEN   │          │ audio.end
  / Error│         │ (LEDs: green)       │ received
         │         └────┬─────┘          │
         │              │ Silence /       │
         │              │ button press    │
         │              ▼                 │
         │         ┌──────────┐          │
         │         │ WAITING  │          │
         │         │ (LEDs: pulse yellow)│
         │         └────┬─────┘          │
         │              │ audio.chunk     │
         │              │ from gateway    │
         │              ▼                 │
         │         ┌──────────┐          │
         │         │ PLAYING  ├──────────┘
         │         │ (LEDs: pulse cyan)  │
         │         └──────────┘          │
         │                                │
         └────────────────────────────────┘
 ```
 #### Audio Configuration
 | Parameter | Value | Notes |
 |-----------|-------|-------|
 | Sample rate | 16000 Hz | Standard for speech recognition |
 | Bit depth | 16-bit | Signed, little-endian (PCM_S16LE) |
 | Channels | 1 (mono) | Downmixed from dual-mic array |
 | Chunk size | 512 samples (32ms) | Balance between latency and overhead |
 | Buffer | 4096 samples (256ms) | Ring buffer for network jitter |
 #### WiFi & Network
 - WiFi credentials via `menuconfig` or BLE provisioning
 - mDNS discovery for gateway: `_stentor._tcp.local`
 - Fallback to configured static gateway URL
 - Auto-reconnect with exponential backoff
 #### LED Feedback (WS2812B Ring)
 | State | Pattern | Color |
 |-------|---------|-------|
 | Idle / standby | Dim steady | Blue |
 | Wake word detected / connecting | Pulse | White |
 | Listening (recording) | Steady | Green |
 | Transcribing / thinking | Rotating pulse | Yellow |
 | Speaking (playback) | Pulse with audio | Cyan |
 | Error | Flash | Red |
 #### Firmware Project Structure
 ```
 stentor-ear/
 ├── CMakeLists.txt
 ├── sdkconfig.defaults
 ├── main/
 │   ├── CMakeLists.txt
 │   ├── main.c                  # App entry, state machine
 │   ├── audio_capture.c/.h      # I2S mic input via ES7210
 │   ├── audio_playback.c/.h     # I2S speaker output via ES8311
 │   ├── wake_word.c/.h          # ESP-SR integration
 │   ├── ws_client.c/.h          # WebSocket client to gateway
 │   ├── led_status.c/.h         # WS2812B LED patterns
 │   ├── wifi_manager.c/.h       # WiFi connection management
 │   └── config.h                # Compile-time configuration
 ├── components/
 │   └── (ESP-ADF / ESP-SR as managed components)
 └── partitions.csv
 ```
 ---
 ### 3. FastAgent Integration
 The Stentor Gateway communicates with FastAgent via its HTTP transport mode.
 #### Starting FastAgent for Stentor
 ```bash
 # Expose Harper (or any agent) as an HTTP server
 uv run fast-agent --transport http --port 8001
 ```
 #### Agent Request/Response
 The gateway sends a simple message and receives the agent\'s response. The exact API shape depends on FastAgent\'s HTTP transport implementation — expected to follow MCP-over-HTTP conventions:
 ```
 POST /message
 Content-Type: application/json
 {
  \"content\": \"What can you tell me about the Agathos infrastructure?\"
 }
 Response:
 {
  \"content\": \"Agathos is a set of Incus containers on a 10.10.0.0/24 network...\"
 }
 ```
 > **Note for Cline:** The FastAgent HTTP transport API may need investigation. Check the fast-agent source for the actual endpoint shape. The gateway\'s `agent_client.py` should be the thinnest possible adapter so it\'s easy to swap.
 ---
 ### 4. Speaches Integration
 Speaches provides OpenAI API-compatible STT and TTS endpoints.
 #### STT (Speech-to-Text)
 ```
 POST {STENTOR_STT_URL}/v1/audio/transcriptions
 Content-Type: multipart/form-data
 - file: audio.wav (PCM wrapped in WAV header)
 - model: \"Systran/faster-whisper-small\"
 - language: \"en\" (optional, auto-detect if omitted)
 - response_format: \"json\"
 Response:
 {
  \"text\": \"What is the weather like today?\"
 }
 ```
 #### TTS (Text-to-Speech)
 ```
 POST {STENTOR_TTS_URL}/v1/audio/speech
 Content-Type: application/json
 {
  \"model\": \"kokoro\",
  \"voice\": \"af_heart\",
  \"input\": \"I don\\'t have weather tools yet, but I can help with other things.\",
  \"response_format\": \"pcm\",
  \"speed\": 1.0
 }
 Response: Binary PCM audio stream (24kHz, mono, 16-bit)
 ```
 > **Note:** Speaches TTS may output at 24kHz. The gateway\'s `audio.py` module should handle resampling to 16kHz if the ESP32 is configured for 16kHz playback. Alternatively, configure the ESP32 for 24kHz playback.
 ---
 ## Build & Run
 ### Gateway
 ```bash
 cd stentor-gateway
 pip install -e .
 stentor
 ```
 Or with more control:
 ```bash
 python -m stentor
 # or: uvicorn stentor.main:app --host 0.0.0.0 --port 25573
 ```
 See [docs/stentor.md](docs/stentor.md) for full usage documentation.
 ### ESP32 Firmware
 ```bash
 cd stentor-ear
 idf.py set-target esp32s3
 idf.py menuconfig  # Set WiFi creds, gateway URL
 idf.py build flash monitor
 ```
 ---
 ## Phase 2 (Future)
 These are explicitly **out of scope** for v0.1 but documented for planning:
 - [ ] **Streaming STT** — Use Speaches SSE streaming for real-time partial transcripts
 - [ ] **Streaming TTS** — Stream TTS audio chunks as they\'re generated (reduce time-to-first-audio)
 - [ ] **Barge-in / Interruption** — Allow user to interrupt AI playback with new speech
 - [ ] **Multi-turn context** — Maintain conversation history across turns within a session
 - [ ] **PJSUA2 SIP bridge** — Enable calling Stentor from a phone via SIP
 - [ ] **Multiple agents** — Route to different FastAgent instances based on wake word or command
 - [ ] **Local fallback** — ESP32 handles simple commands locally when gateway is unreachable
 - [ ] **OTA firmware updates** — Update stentor-ear firmware over WiFi
 - [ ] **LCD UI** — Show transcript and response text on the 1.85\" display
 ---
 ## Design Decisions & Rationale
 | Decision | Rationale |
 |----------|-----------|
 | WebSocket over SIP/RTP | No SIP infrastructure needed; ESP-IDF has native WS support; Speaches speaks HTTP not SIP |
 | Base64 audio in JSON | Simplicity for v0.1; binary WS frames are a Phase 2 optimization |
 | Wake word on ESP32 | Zero-latency wake detection; no audio streaming when idle; privacy-preserving |
 | Gateway as separate service | Decouples hardware from AI; can swap agents, add SIP later, scale independently |
 | PCM_S16LE format | Universal, no codec overhead, both ESP32 and Speaches handle it natively |
 | Pydantic Settings for config | Type-safe, env-var-driven, zero config files to manage |
 ---
 ## Notes for Cline
 1. **Start with the gateway.** The ESP32 firmware is more complex and needs the gateway to test against.
 2. **Use `httpx.AsyncClient`** for all HTTP calls — it supports streaming responses which you\'ll need for TTS.
 3. **The WebSocket protocol is intentionally simple.** JSON messages with base64 audio. Optimize to binary frames later.
 4. **Test with a Python WebSocket client first** before touching ESP32 hardware. Write a `test_client.py` that sends a WAV file over the WebSocket and plays back the response.
 5. **FastAgent HTTP transport** needs investigation — check the fast-agent repo for actual endpoint signatures.
 6. **Audio resampling** — Speaches TTS may output 24kHz; ESP32 may expect 16kHz. Handle this in `audio.py`.
 7. **Don\'t overthink error handling in v0.1** — log errors, send error messages to the client, keep the pipeline simple.
 SPECEOF
 echo 'SPEC.md written'"}}
 </tool_call>
 <tool_result>
 SPEC.md written
 </tool_result>
 Now let me push it to the repo:
 <tool_call>
 {"name": "execute_command", "arguments": {"command": "cd /tmp/stentor && git init && git remote add origin https://gitea.service.consul/robert/stentor.git && git fetch origin && git checkout main && cp SPEC.md . && git add SPEC.md && git commit -m 'Add Stentor voice gateway specification' && git push origin main"}}
 </tool_call>
 <tool_result>
 hint: Using 'master' as the name for the initial branch. This name is subject to change. To
 hint: configure the initial branch name to use in all of your new repositories, which will
 hint: suppress this warning, call:
 hint: 
 hint: 	git config --global init.defaultBranch <name>
 hint: 
 hint: Names commonly chosen instead of 'master' are 'main', 'trunk' and 'development'. The
 hint: just-created branch can be renamed via this command:
 hint: 
 hint: 	git branch -m <name>
 From https://gitea.service.consul/robert/stentor
 * [new branch]      main       -> origin/main
 branch 'main' set up to track 'origin/main'.
 Already on 'main'
 [main 2e4e0b5] Add Stentor voice gateway specification
 1 file changed, 362 insertions(+)
 create mode 100644 SPEC.md
 To https://gitea.service.consul/robert/stentor.git
   7e5e7b2..2e4e0b5  main -> main
 </tool_result>
 Now let me also create a basic README and scaffold the directory structure:
 <tool_call>
 {"name": "execute_command", "arguments": {"command": "cd /tmp/stentor && cat > README.md << 'EOF'
 # Στέντωρ (Stentor)
 > *\"Stentor, whose voice was as powerful as fifty voices of other men.\"*
 > — Homer, *Iliad*, Book V
 Voice gateway for AI agents. Bridges physical audio hardware to AI agents via speech services.
 ## Architecture
 ```
 ESP32-S3-AUDIO-Board  ◀──WebSocket──▶  Stentor Gateway  ◀──HTTP──▶  Speaches (STT/TTS)
     (stentor-ear)                      (Python)                     FastAgent (AI)
 ```
 **Stentor Gateway** orchestrates the voice pipeline:
 1. Receives audio from ESP32 via WebSocket
 2. Transcribes speech via Speaches STT (OpenAI-compatible API)
 3. Sends transcript to FastAgent for AI response
 4. Synthesizes speech via Speaches TTS
 5. Streams audio back to ESP32 for playback
 **Stentor Ear** is the ESP32-S3 firmware:
 - Local wake word detection (\"Hi ESP\" via ESP-SR)
 - Audio capture from dual-mic array
 - Audio playback via onboard speaker
 - LED ring feedback for conversation state
 ## Quick Start
 ### Gateway
 ```bash
 cd stentor-gateway
 uv sync
 uv run uvicorn stentor.main:app --host 0.0.0.0 --port 8600
 ```
 ### ESP32 Firmware
 ```bash
 cd stentor-ear
 idf.py set-target esp32s3
 idf.py menuconfig  # Set WiFi creds, gateway URL
 idf.py build flash monitor
 ```
 ## Documentation
 - [Full Specification](SPEC.md)
 ## Status
 🚧 **Pre-alpha** — Under active development.
 ## License
 MIT
--- a/docs/api-reference.md
+++ b/docs/api-reference.md
@@ -0,0 +1,315 @@
 # Stentor Gateway API Reference
 > Version 0.1.0
 ## Endpoints
 | Endpoint | Method | Description |
 |----------|--------|-------------|
 | `/` | GET | Dashboard (Bootstrap UI) |
 | `/api/v1/realtime` | WebSocket | Real-time audio conversation |
 | `/api/v1/info` | GET | Gateway information and configuration |
 | `/api/live/` | GET | Liveness probe (Kubernetes) |
 | `/api/ready/` | GET | Readiness probe (Kubernetes) |
 | `/api/metrics` | GET | Prometheus-compatible metrics |
 | `/api/docs` | GET | Interactive API documentation (Swagger UI) |
 | `/api/openapi.json` | GET | OpenAPI schema |
 ---
 ## WebSocket: `/api/v1/realtime`
 Real-time voice conversation endpoint. Protocol inspired by the OpenAI Realtime API.
 ### Connection
 ```
 ws://{host}:{port}/api/v1/realtime
 ```
 ### Client Events
 #### `session.start`
 Initiates a new conversation session. Must be sent first.
 ```json
 {
  "type": "session.start",
  "client_id": "esp32-kitchen",
  "audio_config": {
    "sample_rate": 16000,
    "channels": 1,
    "sample_width": 16,
    "encoding": "pcm_s16le"
  }
 }
 ```
 | Field | Type | Required | Description |
 |-------|------|----------|-------------|
 | `type` | string | ✔ | Must be `"session.start"` |
 | `client_id` | string | | Client identifier for tracking |
 | `audio_config` | object | | Audio format configuration |
 #### `input_audio_buffer.append`
 Sends a chunk of audio data. Stream continuously while user is speaking.
 ```json
 {
  "type": "input_audio_buffer.append",
  "audio": "<base64-encoded PCM audio>"
 }
 ```
 | Field | Type | Required | Description |
 |-------|------|----------|-------------|
 | `type` | string | ✔ | Must be `"input_audio_buffer.append"` |
 | `audio` | string | ✔ | Base64-encoded PCM S16LE audio |
 #### `input_audio_buffer.commit`
 Signals end of speech. Triggers the STT → Agent → TTS pipeline.
 ```json
 {
  "type": "input_audio_buffer.commit"
 }
 ```
 #### `session.close`
 Requests session termination. The WebSocket connection will close.
 ```json
 {
  "type": "session.close"
 }
 ```
 ### Server Events
 #### `session.created`
 Acknowledges session creation.
 ```json
 {
  "type": "session.created",
  "session_id": "550e8400-e29b-41d4-a716-446655440000"
 }
 ```
 #### `status`
 Processing status update. Use for LED feedback on ESP32.
 ```json
 {
  "type": "status",
  "state": "listening"
 }
 ```
 | State | Description | Suggested LED |
 |-------|-------------|--------------|
 | `listening` | Ready for audio input | Green |
 | `transcribing` | Running STT | Yellow |
 | `thinking` | Waiting for agent response | Yellow |
 | `speaking` | Playing TTS audio | Cyan |
 #### `transcript.done`
 Transcript of what the user said.
 ```json
 {
  "type": "transcript.done",
  "text": "What is the weather like today?"
 }
 ```
 #### `response.text.done`
 AI agent's response text.
 ```json
 {
  "type": "response.text.done",
  "text": "I don't have weather tools yet, but I can help with other things."
 }
 ```
 #### `response.audio.delta`
 Streamed audio response chunk.
 ```json
 {
  "type": "response.audio.delta",
  "delta": "<base64-encoded PCM audio>"
 }
 ```
 #### `response.audio.done`
 Audio response streaming complete.
 ```json
 {
  "type": "response.audio.done"
 }
 ```
 #### `response.done`
 Full response cycle complete. Gateway returns to listening state.
 ```json
 {
  "type": "response.done"
 }
 ```
 #### `error`
 Error event.
 ```json
 {
  "type": "error",
  "message": "STT service unavailable",
  "code": "stt_error"
 }
 ```
 | Code | Description |
 |------|-------------|
 | `invalid_json` | Client sent malformed JSON |
 | `validation_error` | Message failed schema validation |
 | `no_session` | Action requires an active session |
 | `empty_buffer` | Audio buffer was empty on commit |
 | `empty_transcript` | STT returned no speech |
 | `empty_response` | Agent returned empty response |
 | `pipeline_error` | Internal pipeline failure |
 | `unknown_event` | Unrecognized event type |
 | `internal_error` | Unexpected server error |
 ---
 ## REST: `/api/v1/info`
 Returns gateway information and current configuration.
 **Response:**
 ```json
 {
  "name": "stentor-gateway",
  "version": "0.1.0",
  "endpoints": {
    "realtime": "/api/v1/realtime",
    "live": "/api/live/",
    "ready": "/api/ready/",
    "metrics": "/api/metrics"
  },
  "config": {
    "stt_url": "http://perseus.incus:8000",
    "tts_url": "http://pan.incus:8000",
    "agent_url": "http://localhost:8001",
    "stt_model": "Systran/faster-whisper-small",
    "tts_model": "kokoro",
    "tts_voice": "af_heart",
    "audio_sample_rate": 16000,
    "audio_channels": 1,
    "audio_sample_width": 16
  }
 }
 ```
 ---
 ## REST: `/api/live/`
 Kubernetes liveness probe.
 **Response (200):**
 ```json
 {
  "status": "ok"
 }
 ```
 ---
 ## REST: `/api/ready/`
 Kubernetes readiness probe. Checks connectivity to STT, TTS, and Agent services.
 **Response (200 — all services reachable):**
 ```json
 {
  "status": "ready",
  "checks": {
    "stt": true,
    "tts": true,
    "agent": true
  }
 }
 ```
 **Response (503 — one or more services unavailable):**
 ```json
 {
  "status": "not_ready",
  "checks": {
    "stt": true,
    "tts": false,
    "agent": true
  }
 }
 ```
 ---
 ## REST: `/api/metrics`
 Prometheus-compatible metrics in text exposition format.
 **Metrics exported:**
 | Metric | Type | Description |
 |--------|------|-------------|
 | `stentor_sessions_active` | Gauge | Current active WebSocket sessions |
 | `stentor_transcriptions_total` | Counter | Total STT transcription calls |
 | `stentor_tts_requests_total` | Counter | Total TTS synthesis calls |
 | `stentor_agent_requests_total` | Counter | Total agent message calls |
 | `stentor_pipeline_duration_seconds` | Histogram | Full pipeline latency |
 | `stentor_stt_duration_seconds` | Histogram | STT transcription latency |
 | `stentor_tts_duration_seconds` | Histogram | TTS synthesis latency |
 | `stentor_agent_duration_seconds` | Histogram | Agent response latency |
 ---
 ## Configuration
 All configuration via environment variables (12-factor):
 | Variable | Description | Default |
 |----------|-------------|---------|
 | `STENTOR_HOST` | Gateway bind address | `0.0.0.0` |
 | `STENTOR_PORT` | Gateway bind port | `8600` |
 | `STENTOR_STT_URL` | Speaches STT endpoint | `http://perseus.incus:8000` |
 | `STENTOR_TTS_URL` | Speaches TTS endpoint | `http://pan.incus:8000` |
 | `STENTOR_AGENT_URL` | FastAgent HTTP endpoint | `http://localhost:8001` |
 | `STENTOR_STT_MODEL` | Whisper model for STT | `Systran/faster-whisper-small` |
 | `STENTOR_TTS_MODEL` | TTS model name | `kokoro` |
 | `STENTOR_TTS_VOICE` | TTS voice ID | `af_heart` |
 | `STENTOR_AUDIO_SAMPLE_RATE` | Audio sample rate in Hz | `16000` |
 | `STENTOR_AUDIO_CHANNELS` | Audio channel count | `1` |
 | `STENTOR_AUDIO_SAMPLE_WIDTH` | Bits per sample | `16` |
 | `STENTOR_LOG_LEVEL` | Logging level | `INFO` |
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -0,0 +1,222 @@
 # Stentor Architecture
 > Version 0.2.0 — Daedalus-integrated architecture
 ## Overview
 Stentor is a voice interface that connects physical audio hardware to AI agents via speech services. The system consists of two main components:
 1. **stentor-ear** — ESP32-S3 firmware handling microphone input, speaker output, wake word detection, and VAD
 2. **Daedalus voice module** — Python code integrated into the Daedalus FastAPI backend, handling the STT → Agent → TTS pipeline
 The Python gateway that was previously a standalone service (`stentor-gateway/`) has been merged into the Daedalus backend as `daedalus/backend/daedalus/voice/`. See [daedalus/docs/stentor_integration.md](../../daedalus/docs/stentor_integration.md) for the full integration specification.
 ## System Architecture
 ```mermaid
 graph TB
    subgraph "ESP32-S3-AUDIO-Board"
        MIC["Mic Array<br/>ES7210 ADC"]
        WW["Wake Word<br/>ESP-SR"]
        VAD["VAD<br/>On-Device"]
        SPK["Speaker<br/>ES8311 DAC"]
        LED["LED Ring<br/>WS2812B"]
        NVS["NVS<br/>Device UUID"]
        MIC --> WW
        MIC --> VAD
    end
    subgraph "Daedalus Backend (puck.incus)"
        REG["Device Registry<br/>/api/v1/voice/devices"]
        WS["WebSocket Server<br/>/api/v1/voice/realtime"]
        PIPE["Voice Pipeline<br/>STT → MCP → TTS"]
        DB["PostgreSQL<br/>Conversations & Messages"]
        MCP["MCP Connection Manager<br/>Pallas Agents"]
    end
    subgraph "Speech Services"
        STT["Speaches STT<br/>Whisper (perseus)"]
        TTS["Speaches TTS<br/>Kokoro (perseus)"]
    end
    subgraph "AI Agents"
        PALLAS["Pallas MCP Servers<br/>Research · Infra · Orchestrator"]
    end
    NVS -->|"POST /register"| REG
    WW -->|"WebSocket<br/>JSON + base64 audio"| WS
    VAD -->|"commit on silence"| WS
    WS --> PIPE
    PIPE -->|"POST /v1/audio/transcriptions"| STT
    PIPE -->|"MCP call_tool"| MCP
    MCP -->|"MCP Streamable HTTP"| PALLAS
    PIPE -->|"POST /v1/audio/speech"| TTS
    STT -->|"transcript text"| PIPE
    PALLAS -->|"response text"| MCP
    MCP --> PIPE
    TTS -->|"PCM audio stream"| PIPE
    PIPE --> DB
    PIPE --> WS
    WS -->|"audio + status"| SPK
    WS -->|"status events"| LED
 ```
 ## Device Registration & Lifecycle
 ```mermaid
 sequenceDiagram
    participant ESP as ESP32
    participant DAE as Daedalus
    participant UI as Daedalus Web UI
    Note over ESP: First boot — generate UUID, store in NVS
    ESP->>DAE: POST /api/v1/voice/devices/register {device_id, firmware}
    DAE->>ESP: {status: "registered"}
    Note over UI: User sees new device in Settings → Voice Devices
    UI->>DAE: PUT /api/v1/voice/devices/{id} {name, workspace, agent}
    Note over ESP: Wake word detected
    ESP->>DAE: WS /api/v1/voice/realtime?device_id=uuid
    ESP->>DAE: session.start
    DAE->>ESP: session.created {session_id, conversation_id}
 ```
 ## Voice Pipeline
 ```mermaid
 sequenceDiagram
    participant ESP as ESP32
    participant GW as Daedalus Voice
    participant STT as Speaches STT
    participant MCP as MCP Manager
    participant PALLAS as Pallas Agent
    participant TTS as Speaches TTS
    participant DB as PostgreSQL
    Note over ESP: VAD: speech detected
    loop Audio streaming
        ESP->>GW: input_audio_buffer.append (base64 PCM)
    end
    Note over ESP: VAD: silence detected
    ESP->>GW: input_audio_buffer.commit
    GW->>ESP: status: transcribing
    GW->>STT: POST /v1/audio/transcriptions (WAV)
    STT->>GW: {"text": "..."}
    GW->>ESP: transcript.done
    GW->>DB: Save Message(role="user", content=transcript)
    GW->>ESP: status: thinking
    GW->>MCP: call_tool(workspace, agent, tool, {message})
    MCP->>PALLAS: MCP Streamable HTTP
    PALLAS->>MCP: CallToolResult
    MCP->>GW: response text
    GW->>ESP: response.text.done
    GW->>DB: Save Message(role="assistant", content=response)
    GW->>ESP: status: speaking
    GW->>TTS: POST /v1/audio/speech
    TTS->>GW: PCM audio stream
    loop Audio chunks
        GW->>ESP: response.audio.delta (base64 PCM)
    end
    GW->>ESP: response.audio.done
    GW->>ESP: response.done
    GW->>ESP: status: listening
    Note over GW: Timeout timer starts (120s default)
    alt Timeout — no speech
        GW->>ESP: session.end {reason: "timeout"}
    else Agent ends conversation
        GW->>ESP: session.end {reason: "agent"}
    else User speaks again
        Note over ESP: VAD triggers next turn (same conversation)
    end
 ```
 ## Component Communication
 | Source | Destination | Protocol | Format |
 |--------|------------|----------|--------|
 | ESP32 | Daedalus | WebSocket | JSON + base64 PCM |
 | ESP32 | Daedalus | HTTP POST | JSON (device registration) |
 | Daedalus | Speaches STT | HTTP POST | multipart/form-data (WAV) |
 | Daedalus | Pallas Agents | MCP Streamable HTTP | MCP call_tool |
 | Daedalus | Speaches TTS | HTTP POST | JSON request, binary PCM response |
 | Daedalus | PostgreSQL | SQL | Conversations + Messages |
 ## Network Topology
 ```mermaid
 graph LR
    ESP["ESP32<br/>WiFi"]
    DAE["Daedalus<br/>puck.incus:8000"]
    STT["Speaches STT<br/>perseus.helu.ca:22070"]
    TTS["Speaches TTS<br/>perseus.helu.ca:22070"]
    PALLAS["Pallas Agents<br/>puck.incus:23031-33"]
    DB["PostgreSQL<br/>portia.incus:5432"]
    ESP <-->|"WS :22181<br/>(via Nginx)"| DAE
    DAE -->|"HTTP"| STT
    DAE -->|"HTTP"| TTS
    DAE -->|"MCP"| PALLAS
    DAE -->|"SQL"| DB
 ```
 ## Audio Flow
 ```mermaid
 graph LR
    MIC["Microphone<br/>16kHz/16-bit/mono"] -->|"PCM S16LE"| B64["Base64 Encode"]
    B64 -->|"WebSocket JSON"| GW["Daedalus Voice<br/>Audio Buffer"]
    GW -->|"WAV header wrap"| STT["Speaches STT"]
    TTS["Speaches TTS"] -->|"PCM 24kHz"| RESAMPLE["Resample<br/>24kHz → 16kHz"]
    RESAMPLE -->|"PCM 16kHz"| B64OUT["Base64 Encode"]
    B64OUT -->|"WebSocket JSON"| SPK["Speaker<br/>16kHz/16-bit/mono"]
 ```
 ## Key Design Decisions
 | Decision | Why |
 |----------|-----|
 | Gateway merged into Daedalus | Shares MCP connections, DB, auth, metrics, frontend — no duplicate infrastructure |
 | Agent calls via MCP (not POST /message) | Same Pallas path as text chat; unified connection management and health checks |
 | Device self-registration with UUID in NVS | Plug-and-play; user configures workspace assignment in web UI |
 | VAD on ESP32, not server-side | Reduces bandwidth; ESP32-SR provides reliable on-device VAD |
 | JSON + base64 over WebSocket | Simple for v1; binary frames planned for future |
 | One conversation per WebSocket session | Multi-turn within a session; natural mapping to voice interaction |
 | Timeout + LLM-initiated end | Two natural ways to close: silence timeout or agent recognizes goodbye |
 | No audio storage | Only transcripts persisted; audio processed in-memory and discarded |
 ## Repository Structure
 ```
 stentor/                        # This repository
 ├── docs/
 │   ├── stentor.md              # Usage guide (updated)
 │   └── architecture.md         # This file
 ├── stentor-ear/                # ESP32 firmware
 │   ├── main/
 │   ├── components/
 │   └── ...
 ├── stentor-gateway/            # Legacy — gateway code migrated to Daedalus
 │   └── ...
 └── README.md
 daedalus/                       # Separate repository
 ├── backend/daedalus/voice/     # Voice module (migrated from stentor-gateway)
 │   ├── audio.py
 │   ├── models.py
 │   ├── pipeline.py
 │   ├── stt_client.py
 │   └── tts_client.py
 ├── backend/daedalus/api/v1/
 │   └── voice.py                # Voice REST + WebSocket endpoints
 └── docs/
    └── stentor_integration.md  # Full integration specification
 ```
--- a/docs/stentor.md
+++ b/docs/stentor.md
@@ -0,0 +1,315 @@
 # Stentor — Usage Guide
 > *"Stentor, whose voice was as powerful as fifty voices of other men."*
 > — Homer, *Iliad*, Book V
 Stentor is a voice interface that connects physical audio hardware (ESP32-S3-AUDIO-Board) to AI agents via speech services. The voice gateway runs as part of the **Daedalus** web application backend — there is no separate Stentor server process.
 ---
 ## Table of Contents
 - [How It Works](#how-it-works)
 - [Components](#components)
 - [ESP32 Device Setup](#esp32-device-setup)
 - [Daedalus Configuration](#daedalus-configuration)
 - [Device Registration Flow](#device-registration-flow)
 - [Voice Conversation Flow](#voice-conversation-flow)
 - [WebSocket Protocol](#websocket-protocol)
 - [API Endpoints](#api-endpoints)
 - [Observability](#observability)
 - [Troubleshooting](#troubleshooting)
 - [Architecture Overview](#architecture-overview)
 ---
 ## How It Works
 1. An ESP32-S3-AUDIO-Board generates a UUID on first boot and registers itself with Daedalus
 2. A user assigns the device to a workspace and Pallas agent via the Daedalus web UI
 3. When the ESP32 detects a wake word, it opens a WebSocket to Daedalus and starts a voice session
 4. On-device VAD (Voice Activity Detection) detects speech and silence
 5. Audio streams to Daedalus, which runs: **Speaches STT** → **Pallas Agent (MCP)** → **Speaches TTS**
 6. The response audio streams back to the ESP32 speaker
 7. Transcripts are saved as conversations in PostgreSQL — visible in the Daedalus web UI alongside text conversations
 ---
 ## Components
 | Component | Location | Purpose |
 |-----------|----------|---------|
 | **stentor-ear** | `stentor/stentor-ear/` | ESP32-S3 firmware — microphone, speaker, wake word, VAD |
 | **Daedalus voice module** | `daedalus/backend/daedalus/voice/` | Voice pipeline — STT, MCP agent calls, TTS |
 | **Daedalus voice API** | `daedalus/backend/daedalus/api/v1/voice.py` | WebSocket + REST endpoints for devices and sessions |
 | **Daedalus web UI** | `daedalus/frontend/` | Device management, conversation history |
 The Python gateway code that was previously in `stentor/stentor-gateway/` has been merged into Daedalus. That directory is retained for reference but is no longer deployed as a standalone service.
 ---
 ## ESP32 Device Setup
 The ESP32-S3-AUDIO-Board firmware needs one configuration value:
 | Setting | Description | Example |
 |---------|-------------|---------|
 | Daedalus URL | Base URL of the Daedalus instance | `http://puck.incus:22181` |
 On first boot, the device:
 1. Generates a UUID v4 and stores it in NVS (non-volatile storage)
 2. Registers with Daedalus via `POST /api/v1/voice/devices/register`
 3. The UUID persists across reboots — the device keeps its identity
 ---
 ## Daedalus Configuration
 Voice settings are configured via environment variables with the `DAEDALUS_` prefix:
 | Variable | Description | Default |
 |----------|-------------|---------|
 | `DAEDALUS_VOICE_STT_URL` | Speaches STT endpoint | `http://perseus.helu.ca:22070` |
 | `DAEDALUS_VOICE_TTS_URL` | Speaches TTS endpoint | `http://perseus.helu.ca:22070` |
 | `DAEDALUS_VOICE_STT_MODEL` | Whisper model for STT | `Systran/faster-whisper-small` |
 | `DAEDALUS_VOICE_TTS_MODEL` | TTS model name | `kokoro` |
 | `DAEDALUS_VOICE_TTS_VOICE` | TTS voice ID | `af_heart` |
 | `DAEDALUS_VOICE_AUDIO_SAMPLE_RATE` | Sample rate in Hz | `16000` |
 | `DAEDALUS_VOICE_AUDIO_CHANNELS` | Audio channels | `1` |
 | `DAEDALUS_VOICE_AUDIO_SAMPLE_WIDTH` | Bits per sample | `16` |
 | `DAEDALUS_VOICE_CONVERSATION_TIMEOUT` | Seconds of silence before auto-end | `120` |
 ---
 ## Device Registration Flow
 ```
 ESP32                              Daedalus
  │                                    │
  │  [First boot — UUID generated]     │
  ├─ POST /api/v1/voice/devices/register ▶│
  │   {device_id, firmware_version}    │
  │◀─ {status: "registered"} ─────────┤
  │                                    │
  │  [Device appears in Daedalus       │
  │   Settings → Voice Devices]        │
  │                                    │
  │  [User assigns workspace + agent   │
  │   via web UI]                      │
  │                                    │
  │  [Subsequent boots — same UUID]    │
  ├─ POST /api/v1/voice/devices/register ▶│
  │   {device_id, firmware_version}    │
  │◀─ {status: "already_registered"} ──┤
  │                                    │
 ```
 After registration, the device appears in the Daedalus settings page. The user assigns it:
 - A **name** (e.g. "Kitchen Speaker")
 - A **description** (optional)
 - A **workspace** (which workspace voice conversations go to)
 - An **agent** (which Pallas agent to target)
 Until assigned, the device cannot process voice.
 ---
 ## Voice Conversation Flow
 A voice conversation is a multi-turn session driven by on-device VAD:
 ```
 ESP32                              Daedalus
  │                                    │
  ├─ [Wake word detected]             │
  ├─ WS /api/v1/voice/realtime ──────▶│
  ├─ session.start ───────────────────▶│  → Create Conversation in DB
  │◀──── session.created ─────────────┤    {session_id, conversation_id}
  │◀──── status: listening ────────────┤
  │                                    │
  │  [VAD: user speaks]                │
  ├─ input_audio_buffer.append ×N ────▶│
  │  [VAD: silence detected]           │
  ├─ input_audio_buffer.commit ───────▶│
  │◀──── status: transcribing ────────┤  → STT
  │◀──── transcript.done ─────────────┤  → Save user message
  │◀──── status: thinking ────────────┤  → MCP call to Pallas
  │◀──── response.text.done ──────────┤  → Save assistant message
  │◀──── status: speaking ────────────┤  → TTS
  │◀──── response.audio.delta ×N ─────┤
  │◀──── response.audio.done ─────────┤
  │◀──── response.done ───────────────┤
  │◀──── status: listening ────────────┤
  │                                    │
  │  [VAD: user speaks again]          │  (same conversation)
  ├─ (next turn cycle) ──────────────▶│
  │                                    │
  │  [Conversation ends by:]           │
  │  • 120s silence → timeout          │
  │  • Agent says goodbye              │
  │  • WebSocket disconnect            │
  │◀──── session.end ─────────────────┤
 ```
 ### Conversation End
 A conversation ends in three ways:
 1. **Inactivity timeout** — no speech for `VOICE_CONVERSATION_TIMEOUT` seconds (default 120)
 2. **Agent-initiated** — the Pallas agent recognizes the conversation is over and signals it
 3. **Client disconnect** — ESP32 sends `session.close` or WebSocket drops
 All conversations are saved in PostgreSQL and visible in the Daedalus workspace chat history.
 ---
 ## WebSocket Protocol
 ### Connection
 ```
 WS /api/v1/voice/realtime?device_id={uuid}
 ```
 ### Client → Gateway Messages
 | Type | Description | Fields |
 |------|-------------|--------|
 | `session.start` | Start a new conversation | `client_id` (optional), `audio_config` (optional) |
 | `input_audio_buffer.append` | Audio chunk | `audio` (base64 PCM) |
 | `input_audio_buffer.commit` | End of speech, trigger pipeline | — |
 | `session.close` | End the session | — |
 ### Gateway → Client Messages
 | Type | Description | Fields |
 |------|-------------|--------|
 | `session.created` | Session started | `session_id`, `conversation_id` |
 | `status` | Processing state | `state` (`listening` / `transcribing` / `thinking` / `speaking`) |
 | `transcript.done` | User's speech as text | `text` |
 | `response.text.done` | Agent's text response | `text` |
 | `response.audio.delta` | Audio chunk (streamed) | `delta` (base64 PCM) |
 | `response.audio.done` | Audio streaming complete | — |
 | `response.done` | Turn complete | — |
 | `session.end` | Conversation ended | `reason` (`timeout` / `agent` / `client`) |
 | `error` | Error occurred | `message`, `code` |
 ### Audio Format
 All audio is **PCM signed 16-bit little-endian** (`pcm_s16le`), base64-encoded in JSON:
 - **Sample rate:** 16,000 Hz
 - **Channels:** 1 (mono)
 - **Bit depth:** 16-bit
 ---
 ## API Endpoints
 All endpoints are served by the Daedalus FastAPI backend.
 ### Voice Device Management
 | Method | Route | Purpose |
 |--------|-------|---------|
 | `POST` | `/api/v1/voice/devices/register` | ESP32 self-registration (idempotent) |
 | `GET` | `/api/v1/voice/devices` | List all registered devices |
 | `GET` | `/api/v1/voice/devices/{id}` | Get device details |
 | `PUT` | `/api/v1/voice/devices/{id}` | Update device (name, description, workspace, agent) |
 | `DELETE` | `/api/v1/voice/devices/{id}` | Remove a device |
 ### Voice Sessions
 | Method | Route | Purpose |
 |--------|-------|---------|
 | `WS` | `/api/v1/voice/realtime?device_id={id}` | WebSocket for audio conversations |
 | `GET` | `/api/v1/voice/sessions` | List active voice sessions |
 ### Voice Configuration & Health
 | Method | Route | Purpose |
 |--------|-------|---------|
 | `GET` | `/api/v1/voice/config` | Current voice configuration |
 | `PUT` | `/api/v1/voice/config` | Update voice settings |
 | `GET` | `/api/v1/voice/health` | STT + TTS reachability check |
 ---
 ## Observability
 ### Prometheus Metrics
 Voice metrics are exposed at Daedalus's `GET /metrics` endpoint with the `daedalus_voice_` prefix:
 | Metric | Type | Description |
 |--------|------|-------------|
 | `daedalus_voice_sessions_active` | gauge | Active WebSocket sessions |
 | `daedalus_voice_pipeline_duration_seconds` | histogram | Full pipeline latency |
 | `daedalus_voice_stt_duration_seconds` | histogram | STT latency |
 | `daedalus_voice_tts_duration_seconds` | histogram | TTS latency |
 | `daedalus_voice_agent_duration_seconds` | histogram | Agent (MCP) latency |
 | `daedalus_voice_transcriptions_total` | counter | Total STT calls |
 | `daedalus_voice_conversations_total` | counter | Conversations by end reason |
 | `daedalus_voice_devices_online` | gauge | Currently connected devices |
 ### Logs
 Voice events flow through the standard Daedalus logging pipeline: structlog → stdout → syslog → Alloy → Loki.
 Key log events: `voice_device_registered`, `voice_session_started`, `voice_pipeline_complete`, `voice_conversation_ended`, `voice_pipeline_error`.
 ---
 ## Troubleshooting
 ### Device not appearing in Daedalus settings
 - Check the ESP32 can reach the Daedalus URL
 - Verify the registration endpoint responds: `curl -X POST http://puck.incus:22181/api/v1/voice/devices/register -H 'Content-Type: application/json' -d '{"device_id":"test","firmware_version":"1.0"}'`
 ### Device registered but voice doesn't work
 - Assign the device to a workspace and agent in **Settings → Voice Devices**
 - Unassigned devices get: `{"type": "error", "code": "no_workspace"}`
 ### STT returns empty transcripts
 - Check Speaches STT is running: `curl http://perseus.helu.ca:22070/v1/models`
 - Check the voice health endpoint: `curl http://puck.incus:22181/api/v1/voice/health`
 ### High latency
 - Check `daedalus_voice_pipeline_duration_seconds` in Prometheus/Grafana
 - Breakdown by stage: STT, Agent, TTS histograms identify the bottleneck
 - Agent latency depends on the Pallas agent and its downstream MCP servers
 ### Audio sounds wrong (chipmunk / slow)
 - Speaches TTS outputs at 24 kHz; the pipeline resamples to 16 kHz
 - Verify `DAEDALUS_VOICE_AUDIO_SAMPLE_RATE` matches the ESP32's playback rate
 ---
 ## Architecture Overview
 ```
 ┌──────────────────┐       WebSocket         ┌──────────────────────────────────────┐
 │  ESP32-S3 Board  │◀══════════════════════▶ │   Daedalus Backend (FastAPI)         │
 │  (stentor-ear)   │   JSON + base64 audio   │   puck.incus                         │
 │  UUID in NVS     │                         │                                      │
 │  Wake Word + VAD │                         │   voice/ module:                     │
 └──────────────────┘                         │     STT → MCP (Pallas) → TTS        │
                                             │     Conversations → PostgreSQL       │
                                             └──────┬──────────┬────────┬───────────┘
                                                    │          │        │
                                              MCP   │    HTTP  │  HTTP  │
                                                    ▼          ▼        ▼
                                             ┌──────────┐ ┌────────┐ ┌────────┐
                                             │  Pallas  │ │Speaches│ │Speaches│
                                             │  Agents  │ │  STT   │ │  TTS   │
                                             └──────────┘ └────────┘ └────────┘
 ```
 For full architectural details including Mermaid diagrams, see [architecture.md](architecture.md).
 For the complete integration specification, see [daedalus/docs/stentor_integration.md](../../daedalus/docs/stentor_integration.md).
--- a/stentor-gateway/Dockerfile
+++ b/stentor-gateway/Dockerfile
@@ -0,0 +1,49 @@
 # Stentor Gateway — Multi-stage Dockerfile
 # Builds a minimal production image for the voice gateway.
 # --- Build stage ---
 FROM python:3.12-slim AS builder
 WORKDIR /build
 # Install uv for fast dependency resolution
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
 # Copy project files
 COPY pyproject.toml .
 COPY src/ src/
 # Install dependencies into a virtual environment
 RUN uv venv /opt/venv && \
    uv pip install --python /opt/venv/bin/python . --no-cache
 # --- Runtime stage ---
 FROM python:3.12-slim AS runtime
 LABEL maintainer="robert"
 LABEL description="Stentor Gateway — Voice gateway for AI agents"
 LABEL version="0.1.0"
 # Copy virtual environment from builder
 COPY --from=builder /opt/venv /opt/venv
 # Copy application source (for templates/static)
 COPY src/ /app/src/
 WORKDIR /app
 # Use the venv Python
 ENV PATH="/opt/venv/bin:$PATH"
 ENV PYTHONPATH="/app/src"
 ENV PYTHONUNBUFFERED=1
 # Default configuration
 ENV STENTOR_HOST=0.0.0.0
 ENV STENTOR_PORT=8600
 EXPOSE 8600
 HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
    CMD python -c "import httpx; r = httpx.get('http://localhost:8600/api/live/'); r.raise_for_status()"
 CMD ["uvicorn", "stentor.main:app", "--host", "0.0.0.0", "--port", "8600"]
--- a/stentor-gateway/pyproject.toml
+++ b/stentor-gateway/pyproject.toml
@@ -0,0 +1,45 @@
 [project]
 name = "stentor-gateway"
 version = "0.1.0"
 description = "Voice gateway connecting ESP32 audio hardware to AI agents via speech services"
 requires-python = ">=3.12"
 license = "MIT"
 dependencies = [
    "fastapi>=0.115",
    "uvicorn[standard]>=0.34",
    "websockets>=14.0",
    "httpx>=0.28",
    "pydantic>=2.10",
    "pydantic-settings>=2.7",
    "jinja2>=3.1",
    "prometheus-client>=0.21",
 ]
 [project.optional-dependencies]
 dev = [
    "pytest>=8.0",
    "pytest-asyncio>=0.25",
    "pytest-httpx>=0.35",
    "ruff>=0.9",
 ]
 [project.scripts]
 stentor = "stentor.main:main"
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
 [tool.hatch.build.targets.wheel]
 packages = ["src/stentor"]
 [tool.ruff]
 target-version = "py312"
 line-length = 100
 [tool.ruff.lint]
 select = ["E", "F", "I", "N", "W", "UP", "B", "SIM", "RUF"]
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
 testpaths = ["tests"]
--- a/stentor-gateway/src/stentor/init.py
+++ b/stentor-gateway/src/stentor/init.py
@@ -0,0 +1,3 @@
 """Stentor — Voice Gateway for AI Agents."""
 __version__ = "0.1.0"
--- a/stentor-gateway/src/stentor/main.py
+++ b/stentor-gateway/src/stentor/main.py
@@ -0,0 +1,5 @@
 """Allow running Stentor Gateway with ``python -m stentor``."""
 from stentor.main import main
 main()
--- a/stentor-gateway/src/stentor/agent_client.py
+++ b/stentor-gateway/src/stentor/agent_client.py
@@ -0,0 +1,62 @@
 """FastAgent HTTP client.
 Thin adapter for communicating with FastAgent's HTTP transport.
 Designed to be easily swappable if the API shape changes.
 """
 import logging
 import time
 import httpx
 from stentor.config import Settings
 logger = logging.getLogger(__name__)
 class AgentClient:
    """Async client for FastAgent HTTP transport."""
    def __init__(self, settings: Settings, http_client: httpx.AsyncClient) -> None:
        self._settings = settings
        self._http = http_client
        self._url = f"{settings.agent_url.rstrip('/')}/message"
    async def send_message(self, content: str) -> str:
        """Send a message to the agent and return the response.
        Args:
            content: The user's transcribed message.
        Returns:
            The agent's response text.
        Raises:
            httpx.HTTPStatusError: If the agent returns an error.
        """
        start = time.monotonic()
        payload = {"content": content}
        logger.debug("Agent request to %s: %r", self._url, content[:80])
        response = await self._http.post(self._url, json=payload, timeout=60.0)
        response.raise_for_status()
        result = response.json()
        text = result.get("content", "")
        elapsed = time.monotonic() - start
        logger.info("Agent responded in %.2fs: %r", elapsed, text[:80])
        return text
    async def is_available(self) -> bool:
        """Check if the agent service is reachable."""
        try:
            base_url = self._settings.agent_url.rstrip("/")
            response = await self._http.get(base_url, timeout=5.0)
            # Accept any non-5xx response as "available"
            return response.status_code < 500
        except (httpx.ConnectError, httpx.TimeoutException):
            return False
--- a/stentor-gateway/src/stentor/audio.py
+++ b/stentor-gateway/src/stentor/audio.py
@@ -0,0 +1,127 @@
 """Audio utilities for Stentor Gateway.
 Handles PCM↔WAV conversion, resampling, and base64 encoding/decoding.
 """
 import base64
 import io
 import struct
 import wave
 def pcm_to_wav(
    pcm_data: bytes,
    sample_rate: int = 16000,
    channels: int = 1,
    sample_width: int = 2,
 ) -> bytes:
    """Wrap raw PCM bytes in a WAV header.
    Args:
        pcm_data: Raw PCM audio bytes (signed 16-bit little-endian).
        sample_rate: Sample rate in Hz.
        channels: Number of audio channels.
        sample_width: Bytes per sample (2 for 16-bit).
    Returns:
        Complete WAV file as bytes.
    """
    buf = io.BytesIO()
    with wave.open(buf, "wb") as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(sample_width)
        wf.setframerate(sample_rate)
        wf.writeframes(pcm_data)
    return buf.getvalue()
 def wav_to_pcm(wav_data: bytes) -> tuple[bytes, int, int, int]:
    """Extract raw PCM data from a WAV file.
    Args:
        wav_data: Complete WAV file as bytes.
    Returns:
        Tuple of (pcm_data, sample_rate, channels, sample_width).
    """
    buf = io.BytesIO(wav_data)
    with wave.open(buf, "rb") as wf:
        pcm_data = wf.readframes(wf.getnframes())
        return pcm_data, wf.getframerate(), wf.getnchannels(), wf.getsampwidth()
 def resample_pcm(
    pcm_data: bytes,
    src_rate: int,
    dst_rate: int,
    sample_width: int = 2,
    channels: int = 1,
 ) -> bytes:
    """Resample PCM audio using linear interpolation.
    Simple resampler with no external dependencies. Adequate for speech audio
    where the primary use case is converting TTS output (24kHz) to the ESP32
    playback rate (16kHz).
    Args:
        pcm_data: Raw PCM bytes (signed 16-bit little-endian).
        src_rate: Source sample rate in Hz.
        dst_rate: Destination sample rate in Hz.
        sample_width: Bytes per sample (must be 2 for 16-bit).
        channels: Number of audio channels.
    Returns:
        Resampled PCM bytes.
    """
    if src_rate == dst_rate:
        return pcm_data
    if sample_width != 2:
        msg = f"Only 16-bit PCM supported for resampling, got {sample_width * 8}-bit"
        raise ValueError(msg)
    fmt = "<h"
    frame_size = sample_width * channels
    num_frames = len(pcm_data) // frame_size
    ratio = src_rate / dst_rate
    dst_frames = int(num_frames / ratio)
    result = bytearray()
    for ch in range(channels):
        # Extract samples for this channel
        samples = []
        for i in range(num_frames):
            offset = i * frame_size + ch * sample_width
            (sample,) = struct.unpack_from(fmt, pcm_data, offset)
            samples.append(sample)
        # Resample via linear interpolation
        resampled = []
        for i in range(dst_frames):
            src_pos = i * ratio
            idx = int(src_pos)
            frac = src_pos - idx
            if idx + 1 < len(samples):
                value = samples[idx] * (1.0 - frac) + samples[idx + 1] * frac
            else:
                value = samples[idx] if idx < len(samples) else 0
            resampled.append(max(-32768, min(32767, int(value))))
        # Pack resampled samples back
        for sample in resampled:
            result.extend(struct.pack(fmt, sample))
    return bytes(result)
 def encode_audio(pcm_data: bytes) -> str:
    """Base64-encode PCM audio data for WebSocket transmission."""
    return base64.b64encode(pcm_data).decode("ascii")
 def decode_audio(b64_data: str) -> bytes:
    """Decode base64-encoded PCM audio data from WebSocket."""
    return base64.b64decode(b64_data)
--- a/stentor-gateway/src/stentor/config.py
+++ b/stentor-gateway/src/stentor/config.py
@@ -0,0 +1,43 @@
 """Pydantic Settings configuration for Stentor Gateway.
 All configuration is driven by environment variables (12-factor).
 """
 from pydantic_settings import BaseSettings
 class Settings(BaseSettings):
    """Stentor Gateway configuration."""
    model_config = {"env_prefix": "STENTOR_"}
    # Server
    host: str = "0.0.0.0"
    port: int = 8600
    # Speaches endpoints (OpenAI-compatible)
    stt_url: str = "http://perseus.helu.ca:22070"
    tts_url: str = "http://perseus.helu.ca:22070"
    # FastAgent endpoint
    agent_url: str = "http://localhost:8001"
    # STT configuration
    stt_model: str = "Systran/faster-whisper-small"
    # TTS configuration
    tts_model: str = "kokoro"
    tts_voice: str = "af_heart"
    # Audio configuration
    audio_sample_rate: int = 16000
    audio_channels: int = 1
    audio_sample_width: int = 16
    # Logging
    log_level: str = "DEBUG"
 def get_settings() -> Settings:
    """Create and return a Settings instance."""
    return Settings()
--- a/stentor-gateway/src/stentor/health.py
+++ b/stentor-gateway/src/stentor/health.py
@@ -0,0 +1,166 @@
 """Health check and metrics endpoints for Stentor Gateway.
 Follows Kubernetes health check conventions:
 - /api/live/ — Liveness probe (is the process alive?)
 - /api/ready/ — Readiness probe (are dependencies reachable?)
 - /api/metrics — Prometheus-compatible metrics
 """
 import time
 from fastapi import APIRouter, Response
 from prometheus_client import (
    Counter,
    Gauge,
    Histogram,
    generate_latest,
 )
 from stentor.agent_client import AgentClient
 from stentor.stt_client import STTClient
 from stentor.tts_client import TTSClient
 router = APIRouter(prefix="/api")
 # ---------------------------------------------------------------------------
 # Prometheus metrics
 # ---------------------------------------------------------------------------
 SESSIONS_ACTIVE = Gauge(
    "stentor_sessions_active",
    "Current active WebSocket sessions",
 )
 TRANSCRIPTIONS_TOTAL = Counter(
    "stentor_transcriptions_total",
    "Total STT transcription calls",
 )
 TTS_REQUESTS_TOTAL = Counter(
    "stentor_tts_requests_total",
    "Total TTS synthesis calls",
 )
 AGENT_REQUESTS_TOTAL = Counter(
    "stentor_agent_requests_total",
    "Total agent message calls",
 )
 PIPELINE_DURATION = Histogram(
    "stentor_pipeline_duration_seconds",
    "Full pipeline latency (STT + Agent + TTS)",
    buckets=(0.5, 1.0, 2.0, 3.0, 5.0, 10.0, 20.0, 30.0),
 )
 STT_DURATION = Histogram(
    "stentor_stt_duration_seconds",
    "STT transcription latency",
    buckets=(0.1, 0.25, 0.5, 1.0, 2.0, 5.0, 10.0),
 )
 TTS_DURATION = Histogram(
    "stentor_tts_duration_seconds",
    "TTS synthesis latency",
    buckets=(0.1, 0.25, 0.5, 1.0, 2.0, 5.0, 10.0),
 )
 AGENT_DURATION = Histogram(
    "stentor_agent_duration_seconds",
    "Agent response latency",
    buckets=(0.1, 0.25, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0),
 )
 # ---------------------------------------------------------------------------
 # Startup time tracking
 # ---------------------------------------------------------------------------
 _start_time: float = 0.0
 def record_start_time() -> None:
    """Record the application start time."""
    global _start_time
    _start_time = time.monotonic()
 def get_uptime() -> float:
    """Return uptime in seconds."""
    if _start_time == 0.0:
        return 0.0
    return time.monotonic() - _start_time
 # ---------------------------------------------------------------------------
 # Dependency references (set during app startup)
 # ---------------------------------------------------------------------------
 _stt_client: STTClient | None = None
 _tts_client: TTSClient | None = None
 _agent_client: AgentClient | None = None
 def set_clients(stt: STTClient, tts: TTSClient, agent: AgentClient) -> None:
    """Register service clients for health checks."""
    global _stt_client, _tts_client, _agent_client
    _stt_client = stt
    _tts_client = tts
    _agent_client = agent
 # ---------------------------------------------------------------------------
 # Endpoints
 # ---------------------------------------------------------------------------
@router.get("/live/")
 async def liveness() -> dict:
    """Liveness probe — is the process alive and responding?
    Returns 200 if the application is running.
    Used by Kubernetes to determine if the pod should be restarted.
    """
    return {"status": "ok"}
@router.get("/ready/")
 async def readiness(response: Response) -> dict:
    """Readiness probe — are all dependencies reachable?
    Checks connectivity to STT, TTS, and Agent services.
    Returns 200 if ready, 503 if any dependency is unavailable.
    Used by load balancers to determine if pod should receive traffic.
    """
    checks: dict[str, bool] = {}
    if _stt_client:
        checks["stt"] = await _stt_client.is_available()
    else:
        checks["stt"] = False
    if _tts_client:
        checks["tts"] = await _tts_client.is_available()
    else:
        checks["tts"] = False
    if _agent_client:
        checks["agent"] = await _agent_client.is_available()
    else:
        checks["agent"] = False
    all_ready = all(checks.values())
    if not all_ready:
        response.status_code = 503
    return {
        "status": "ready" if all_ready else "not_ready",
        "checks": checks,
    }
@router.get("/metrics")
 async def metrics() -> Response:
    """Prometheus-compatible metrics endpoint."""
    return Response(
        content=generate_latest(),
        media_type="text/plain; version=0.0.4; charset=utf-8",
    )
--- a/stentor-gateway/src/stentor/main.py
+++ b/stentor-gateway/src/stentor/main.py
@@ -0,0 +1,421 @@
 """Stentor Gateway — FastAPI application.
 Central orchestrator connecting ESP32 audio hardware to AI agents via speech services.
 """
 import json
 import logging
 import uuid
 from contextlib import asynccontextmanager, suppress
 from dataclasses import dataclass, field
 from pathlib import Path
 import httpx
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
 from fastapi.responses import HTMLResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from pydantic import ValidationError
 from starlette.requests import Request
 from stentor import __version__
 from stentor.agent_client import AgentClient
 from stentor.audio import decode_audio
 from stentor.config import Settings, get_settings
 from stentor.health import (
    AGENT_DURATION,
    AGENT_REQUESTS_TOTAL,
    PIPELINE_DURATION,
    SESSIONS_ACTIVE,
    STT_DURATION,
    TRANSCRIPTIONS_TOTAL,
    TTS_DURATION,
    TTS_REQUESTS_TOTAL,
    get_uptime,
    record_start_time,
    set_clients,
 )
 from stentor.health import (
    router as health_router,
 )
 from stentor.models import (
    ErrorEvent,
    InputAudioBufferAppend,
    SessionCreated,
    SessionStart,
    StatusUpdate,
 )
 from stentor.pipeline import Pipeline, PipelineState
 from stentor.stt_client import STTClient
 from stentor.tts_client import TTSClient
 logger = logging.getLogger(__name__)
 # Paths for templates and static files
 BASE_DIR = Path(__file__).resolve().parent
 TEMPLATES_DIR = BASE_DIR / "templates"
 STATIC_DIR = BASE_DIR / "static"
 # ---------------------------------------------------------------------------
 # Session tracking
 # ---------------------------------------------------------------------------
@dataclass
 class Session:
    """Active WebSocket session."""
    session_id: str
    client_id: str
    websocket: WebSocket
    audio_buffer: bytearray = field(default_factory=bytearray)
    def reset_buffer(self) -> bytes:
        """Return buffered audio and reset the buffer."""
        data = bytes(self.audio_buffer)
        self.audio_buffer.clear()
        return data
 # Active sessions keyed by session_id
 _sessions: dict[str, Session] = {}
 def get_active_sessions() -> dict[str, Session]:
    """Return the active sessions dict."""
    return _sessions
 # ---------------------------------------------------------------------------
 # Application lifecycle
 # ---------------------------------------------------------------------------
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """Application startup and shutdown."""
    settings = get_settings()
    # Configure logging
    logging.basicConfig(
        level=getattr(logging, settings.log_level.upper(), logging.INFO),
        format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
    )
    logger.info("Stentor Gateway v%s starting", __version__)
    logger.info("STT: %s (model: %s)", settings.stt_url, settings.stt_model)
    logger.info(
        "TTS: %s (model: %s, voice: %s)",
        settings.tts_url, settings.tts_model, settings.tts_voice,
    )
    logger.info("Agent: %s", settings.agent_url)
    record_start_time()
    # Create shared HTTP client
    http_client = httpx.AsyncClient()
    # Create service clients
    stt_client = STTClient(settings, http_client)
    tts_client = TTSClient(settings, http_client)
    agent_client = AgentClient(settings, http_client)
    # Register clients for health checks
    set_clients(stt_client, tts_client, agent_client)
    # Create pipeline state and pipeline
    pipeline_state = PipelineState()
    pipeline = Pipeline(settings, stt_client, tts_client, agent_client, pipeline_state)
    # Store on app state for access in endpoints
    app.state.settings = settings
    app.state.http_client = http_client
    app.state.stt_client = stt_client
    app.state.tts_client = tts_client
    app.state.agent_client = agent_client
    app.state.pipeline = pipeline
    app.state.pipeline_state = pipeline_state
    logger.info("Stentor Gateway ready on %s:%d", settings.host, settings.port)
    yield
    # Shutdown
    logger.info("Stentor Gateway shutting down")
    await http_client.aclose()
 # ---------------------------------------------------------------------------
 # FastAPI app
 # ---------------------------------------------------------------------------
 app = FastAPI(
    title="Stentor Gateway",
    version=__version__,
    description="Voice gateway connecting ESP32 audio hardware to AI agents via speech services.",
    lifespan=lifespan,
    docs_url="/api/docs",
    openapi_url="/api/openapi.json",
 )
 # Include health/metrics routes
 app.include_router(health_router)
 # Mount static files
 if STATIC_DIR.exists():
    app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
 # Templates
 templates = Jinja2Templates(directory=str(TEMPLATES_DIR))
 # ---------------------------------------------------------------------------
 # Dashboard
 # ---------------------------------------------------------------------------
@app.get("/", response_class=HTMLResponse)
 async def dashboard(request: Request) -> HTMLResponse:
    """Serve the Bootstrap dashboard."""
    settings: Settings = request.app.state.settings
    pipeline_state: PipelineState = request.app.state.pipeline_state
    return templates.TemplateResponse(
        "dashboard.html",
        {
            "request": request,
            "version": __version__,
            "settings": settings,
            "active_sessions": len(_sessions),
            "sessions": {
                sid: {"client_id": s.client_id, "buffer_size": len(s.audio_buffer)}
                for sid, s in _sessions.items()
            },
            "uptime": get_uptime(),
            "pipeline_state": pipeline_state,
        },
    )
 # ---------------------------------------------------------------------------
 # API info endpoint
 # ---------------------------------------------------------------------------
@app.get("/api/v1/info")
 async def api_info() -> dict:
    """Return gateway information."""
    settings: Settings = app.state.settings
    return {
        "name": "stentor-gateway",
        "version": __version__,
        "endpoints": {
            "realtime": "/api/v1/realtime",
            "live": "/api/live/",
            "ready": "/api/ready/",
            "metrics": "/api/metrics",
        },
        "config": {
            "stt_url": settings.stt_url,
            "tts_url": settings.tts_url,
            "agent_url": settings.agent_url,
            "stt_model": settings.stt_model,
            "tts_model": settings.tts_model,
            "tts_voice": settings.tts_voice,
            "audio_sample_rate": settings.audio_sample_rate,
            "audio_channels": settings.audio_channels,
            "audio_sample_width": settings.audio_sample_width,
        },
    }
 # ---------------------------------------------------------------------------
 # WebSocket endpoint (OpenAI Realtime API-inspired)
 # ---------------------------------------------------------------------------
@app.websocket("/api/v1/realtime")
 async def realtime_websocket(websocket: WebSocket) -> None:
    """WebSocket endpoint for real-time audio conversations.
    Protocol inspired by the OpenAI Realtime API:
    - Client sends session.start, then streams input_audio_buffer.append events
    - Client sends input_audio_buffer.commit to trigger pipeline
    - Gateway responds with status updates, transcript, response text, and audio
    """
    await websocket.accept()
    session: Session | None = None
    pipeline: Pipeline = websocket.app.state.pipeline
    try:
        while True:
            raw = await websocket.receive_text()
            try:
                data = json.loads(raw)
            except json.JSONDecodeError:
                await _send_error(websocket, "Invalid JSON", "invalid_json")
                continue
            msg_type = data.get("type", "")
            # --- session.start ---
            if msg_type == "session.start":
                try:
                    msg = SessionStart.model_validate(data)
                except ValidationError as e:
                    await _send_error(websocket, str(e), "validation_error")
                    continue
                session_id = str(uuid.uuid4())
                session = Session(
                    session_id=session_id,
                    client_id=msg.client_id,
                    websocket=websocket,
                )
                _sessions[session_id] = session
                SESSIONS_ACTIVE.inc()
                logger.info(
                    "Session started: %s (client: %s)",
                    session_id,
                    msg.client_id,
                )
                await websocket.send_text(
                    SessionCreated(session_id=session_id).model_dump_json()
                )
                await websocket.send_text(
                    StatusUpdate(state="listening").model_dump_json()
                )
            # --- input_audio_buffer.append ---
            elif msg_type == "input_audio_buffer.append":
                if not session:
                    await _send_error(websocket, "No active session", "no_session")
                    continue
                try:
                    msg = InputAudioBufferAppend.model_validate(data)
                except ValidationError as e:
                    await _send_error(websocket, str(e), "validation_error")
                    continue
                pcm_chunk = decode_audio(msg.audio)
                session.audio_buffer.extend(pcm_chunk)
            # --- input_audio_buffer.commit ---
            elif msg_type == "input_audio_buffer.commit":
                if not session:
                    await _send_error(websocket, "No active session", "no_session")
                    continue
                audio_data = session.reset_buffer()
                if not audio_data:
                    await _send_error(websocket, "Empty audio buffer", "empty_buffer")
                    await websocket.send_text(
                        StatusUpdate(state="listening").model_dump_json()
                    )
                    continue
                logger.info(
                    "Processing %d bytes of audio for session %s",
                    len(audio_data),
                    session.session_id,
                )
                # Run the pipeline and stream events back
                async for event in pipeline.process(audio_data):
                    await websocket.send_text(event.model_dump_json())
                    # Record Prometheus metrics from pipeline events
                    if event.type == "transcript.done":
                        TRANSCRIPTIONS_TOTAL.inc()
                    elif event.type == "response.text.done":
                        AGENT_REQUESTS_TOTAL.inc()
                    elif event.type == "response.audio.done":
                        TTS_REQUESTS_TOTAL.inc()
                # Record pipeline duration from state
                pipeline_state: PipelineState = websocket.app.state.pipeline_state
                if pipeline_state.recent_metrics:
                    last = pipeline_state.recent_metrics[-1]
                    PIPELINE_DURATION.observe(last.total_duration)
                    STT_DURATION.observe(last.stt_duration)
                    AGENT_DURATION.observe(last.agent_duration)
                    TTS_DURATION.observe(last.tts_duration)
                # Return to listening state
                await websocket.send_text(
                    StatusUpdate(state="listening").model_dump_json()
                )
            # --- session.close ---
            elif msg_type == "session.close":
                logger.info("Session close requested")
                break
            else:
                await _send_error(
                    websocket, f"Unknown event type: {msg_type}", "unknown_event"
                )
    except WebSocketDisconnect:
        logger.info("WebSocket disconnected")
    except Exception:
        logger.exception("WebSocket error")
        with suppress(Exception):
            await _send_error(websocket, "Internal server error", "internal_error")
    finally:
        if session and session.session_id in _sessions:
            del _sessions[session.session_id]
            SESSIONS_ACTIVE.dec()
            logger.info("Session ended: %s", session.session_id)
 async def _send_error(websocket: WebSocket, message: str, code: str) -> None:
    """Send an error event to the client."""
    event = ErrorEvent(message=message, code=code)
    await websocket.send_text(event.model_dump_json())
 # ---------------------------------------------------------------------------
 # CLI entry point
 # ---------------------------------------------------------------------------
 def main() -> None:
    """Start the Stentor Gateway server.
    This is the main entry point used by:
    - ``python -m stentor``
    - The ``stentor`` console script (installed via pip)
    - ``python main.py`` (when run directly)
    """
    import uvicorn
    settings = get_settings()
    # Configure root logging early so startup messages are visible
    logging.basicConfig(
        level=getattr(logging, settings.log_level.upper(), logging.INFO),
        format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
    )
    logger.info("Starting Stentor Gateway v%s on %s:%d", __version__, settings.host, settings.port)
    uvicorn.run(
        "stentor.main:app",
        host=settings.host,
        port=settings.port,
        log_level=settings.log_level.lower(),
    )
 if __name__ == "__main__":
    main()
--- a/stentor-gateway/src/stentor/models.py
+++ b/stentor-gateway/src/stentor/models.py
@@ -0,0 +1,129 @@
 """WebSocket message models for Stentor Gateway.
 Inspired by the OpenAI Realtime API event naming conventions.
 Messages are JSON with base64-encoded audio data.
 """
 from __future__ import annotations
 from typing import Annotated, Literal
 from pydantic import BaseModel, Field
 # ---------------------------------------------------------------------------
 # Client → Gateway events
 # ---------------------------------------------------------------------------
 class AudioConfig(BaseModel):
    """Audio configuration sent by the client on session start."""
    sample_rate: int = 16000
    channels: int = 1
    sample_width: int = 16
    encoding: str = "pcm_s16le"
 class SessionStart(BaseModel):
    """Client requests a new session."""
    type: Literal["session.start"] = "session.start"
    client_id: str = ""
    audio_config: AudioConfig = Field(default_factory=AudioConfig)
 class InputAudioBufferAppend(BaseModel):
    """Client sends a chunk of audio data."""
    type: Literal["input_audio_buffer.append"] = "input_audio_buffer.append"
    audio: str  # base64-encoded PCM
 class InputAudioBufferCommit(BaseModel):
    """Client signals end of speech / commits the audio buffer."""
    type: Literal["input_audio_buffer.commit"] = "input_audio_buffer.commit"
 class SessionClose(BaseModel):
    """Client requests session termination."""
    type: Literal["session.close"] = "session.close"
 ClientEvent = Annotated[
    SessionStart | InputAudioBufferAppend | InputAudioBufferCommit | SessionClose,
    Field(discriminator="type"),
 ]
 # ---------------------------------------------------------------------------
 # Gateway → Client events
 # ---------------------------------------------------------------------------
 class SessionCreated(BaseModel):
    """Acknowledge session creation."""
    type: Literal["session.created"] = "session.created"
    session_id: str
 class StatusUpdate(BaseModel):
    """Gateway processing status update."""
    type: Literal["status"] = "status"
    state: Literal["listening", "transcribing", "thinking", "speaking"]
 class TranscriptDone(BaseModel):
    """Transcript of what the user said."""
    type: Literal["transcript.done"] = "transcript.done"
    text: str
 class ResponseTextDone(BaseModel):
    """AI agent response text."""
    type: Literal["response.text.done"] = "response.text.done"
    text: str
 class ResponseAudioDelta(BaseModel):
    """Streamed audio response chunk."""
    type: Literal["response.audio.delta"] = "response.audio.delta"
    delta: str  # base64-encoded PCM
 class ResponseAudioDone(BaseModel):
    """Audio response streaming complete."""
    type: Literal["response.audio.done"] = "response.audio.done"
 class ResponseDone(BaseModel):
    """Full response cycle complete."""
    type: Literal["response.done"] = "response.done"
 class ErrorEvent(BaseModel):
    """Error event."""
    type: Literal["error"] = "error"
    message: str
    code: str = "unknown_error"
 ServerEvent = (
    SessionCreated
    | StatusUpdate
    | TranscriptDone
    | ResponseTextDone
    | ResponseAudioDelta
    | ResponseAudioDone
    | ResponseDone
    | ErrorEvent
 )
--- a/stentor-gateway/src/stentor/pipeline.py
+++ b/stentor-gateway/src/stentor/pipeline.py
@@ -0,0 +1,175 @@
 """Voice pipeline orchestrator: STT → Agent → TTS.
 Ties together the three service clients into a single processing pipeline.
 """
 import logging
 import time
 from collections.abc import AsyncIterator
 from dataclasses import dataclass, field
 from stentor.agent_client import AgentClient
 from stentor.audio import encode_audio, pcm_to_wav, resample_pcm
 from stentor.config import Settings
 from stentor.models import (
    ErrorEvent,
    ResponseAudioDelta,
    ResponseAudioDone,
    ResponseDone,
    ResponseTextDone,
    ServerEvent,
    StatusUpdate,
    TranscriptDone,
 )
 from stentor.stt_client import STTClient
 from stentor.tts_client import TTS_OUTPUT_SAMPLE_RATE, TTSClient
 logger = logging.getLogger(__name__)
 # Audio chunk size for streaming to client (2KB ≈ 64ms at 16kHz/16bit/mono)
 AUDIO_CHUNK_SIZE = 2048
@dataclass
 class PipelineMetrics:
    """Accumulated metrics for a single pipeline run."""
    stt_duration: float = 0.0
    agent_duration: float = 0.0
    tts_duration: float = 0.0
    total_duration: float = 0.0
    transcript: str = ""
    response_text: str = ""
    audio_bytes_sent: int = 0
@dataclass
 class PipelineState:
    """Shared state for pipeline metrics collection."""
    total_transcriptions: int = 0
    total_tts_requests: int = 0
    total_agent_requests: int = 0
    recent_metrics: list[PipelineMetrics] = field(default_factory=list)
    def record(self, metrics: PipelineMetrics) -> None:
        """Record metrics from a pipeline run."""
        self.total_transcriptions += 1
        self.total_agent_requests += 1
        self.total_tts_requests += 1
        self.recent_metrics.append(metrics)
        # Keep only last 100 runs
        if len(self.recent_metrics) > 100:
            self.recent_metrics = self.recent_metrics[-100:]
 class Pipeline:
    """Orchestrates the STT → Agent → TTS voice pipeline."""
    def __init__(
        self,
        settings: Settings,
        stt: STTClient,
        tts: TTSClient,
        agent: AgentClient,
        state: PipelineState,
    ) -> None:
        self._settings = settings
        self._stt = stt
        self._tts = tts
        self._agent = agent
        self._state = state
    async def process(self, audio_buffer: bytes) -> AsyncIterator[ServerEvent]:
        """Run the full voice pipeline on buffered audio.
        Yields server events as the pipeline progresses through each stage:
        status updates, transcript, response text, audio chunks, and completion.
        Args:
            audio_buffer: Raw PCM audio bytes from the client.
        Yields:
            ServerEvent instances to send back to the client.
        """
        pipeline_start = time.monotonic()
        metrics = PipelineMetrics()
        try:
            # --- Stage 1: STT ---
            yield StatusUpdate(state="transcribing")
            wav_data = pcm_to_wav(
                audio_buffer,
                sample_rate=self._settings.audio_sample_rate,
                channels=self._settings.audio_channels,
            )
            stt_start = time.monotonic()
            transcript = await self._stt.transcribe(wav_data)
            metrics.stt_duration = time.monotonic() - stt_start
            metrics.transcript = transcript
            if not transcript:
                logger.warning("STT returned empty transcript")
                yield ErrorEvent(message="No speech detected", code="empty_transcript")
                return
            yield TranscriptDone(text=transcript)
            # --- Stage 2: Agent ---
            yield StatusUpdate(state="thinking")
            agent_start = time.monotonic()
            response_text = await self._agent.send_message(transcript)
            metrics.agent_duration = time.monotonic() - agent_start
            metrics.response_text = response_text
            if not response_text:
                logger.warning("Agent returned empty response")
                yield ErrorEvent(message="Agent returned empty response", code="empty_response")
                return
            yield ResponseTextDone(text=response_text)
            # --- Stage 3: TTS ---
            yield StatusUpdate(state="speaking")
            tts_start = time.monotonic()
            tts_audio = await self._tts.synthesize(response_text)
            metrics.tts_duration = time.monotonic() - tts_start
            # Resample from TTS output rate to client playback rate if needed
            target_rate = self._settings.audio_sample_rate
            if target_rate != TTS_OUTPUT_SAMPLE_RATE:
                tts_audio = resample_pcm(
                    tts_audio,
                    src_rate=TTS_OUTPUT_SAMPLE_RATE,
                    dst_rate=target_rate,
                )
            # Stream audio in chunks
            offset = 0
            while offset < len(tts_audio):
                chunk = tts_audio[offset : offset + AUDIO_CHUNK_SIZE]
                yield ResponseAudioDelta(delta=encode_audio(chunk))
                metrics.audio_bytes_sent += len(chunk)
                offset += AUDIO_CHUNK_SIZE
            yield ResponseAudioDone()
            yield ResponseDone()
        except Exception:
            logger.exception("Pipeline error")
            yield ErrorEvent(message="Internal pipeline error", code="pipeline_error")
        finally:
            metrics.total_duration = time.monotonic() - pipeline_start
            self._state.record(metrics)
            logger.info(
                "Pipeline complete: stt=%.2fs agent=%.2fs tts=%.2fs total=%.2fs",
                metrics.stt_duration,
                metrics.agent_duration,
                metrics.tts_duration,
                metrics.total_duration,
            )
--- a/stentor-gateway/src/stentor/static/.gitkeep
+++ b/stentor-gateway/src/stentor/static/.gitkeep
--- a/stentor-gateway/src/stentor/stt_client.py
+++ b/stentor-gateway/src/stentor/stt_client.py
@@ -0,0 +1,68 @@
 """Speaches STT client (OpenAI-compatible).
 Posts audio as multipart/form-data to the /v1/audio/transcriptions endpoint.
 """
 import logging
 import time
 import httpx
 from stentor.config import Settings
 logger = logging.getLogger(__name__)
 class STTClient:
    """Async client for Speaches Speech-to-Text service."""
    def __init__(self, settings: Settings, http_client: httpx.AsyncClient) -> None:
        self._settings = settings
        self._http = http_client
        self._url = f"{settings.stt_url.rstrip('/')}/v1/audio/transcriptions"
    async def transcribe(self, wav_data: bytes, language: str | None = None) -> str:
        """Send audio to Speaches STT and return the transcript.
        Args:
            wav_data: Complete WAV file bytes.
            language: Optional language code (e.g., "en"). Auto-detect if omitted.
        Returns:
            Transcribed text.
        Raises:
            httpx.HTTPStatusError: If the STT service returns an error.
        """
        start = time.monotonic()
        files = {"file": ("audio.wav", wav_data, "audio/wav")}
        data: dict[str, str] = {
            "model": self._settings.stt_model,
            "response_format": "json",
        }
        if language:
            data["language"] = language
        logger.debug("STT request to %s (model=%s)", self._url, self._settings.stt_model)
        response = await self._http.post(self._url, files=files, data=data, timeout=30.0)
        response.raise_for_status()
        result = response.json()
        text = result.get("text", "").strip()
        elapsed = time.monotonic() - start
        logger.info("STT completed in %.2fs: %r", elapsed, text[:80])
        return text
    async def is_available(self) -> bool:
        """Check if the STT service is reachable."""
        try:
            # Hit the base URL to check availability
            base_url = self._settings.stt_url.rstrip("/")
            response = await self._http.get(f"{base_url}/v1/models", timeout=5.0)
            return response.status_code == 200
        except (httpx.ConnectError, httpx.TimeoutException):
            return False
--- a/stentor-gateway/src/stentor/templates/dashboard.html
+++ b/stentor-gateway/src/stentor/templates/dashboard.html
@@ -0,0 +1,361 @@
 <!DOCTYPE html>
 <html lang="en" data-bs-theme="dark">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Stentor Gateway — Dashboard</title>
    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet">
    <link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.min.css" rel="stylesheet">
    <style>
        .status-dot {
            width: 12px;
            height: 12px;
            border-radius: 50%;
            display: inline-block;
            margin-right: 8px;
        }
        .status-dot.ok { background-color: #198754; }
        .status-dot.error { background-color: #dc3545; }
        .status-dot.unknown { background-color: #6c757d; }
        .status-dot.checking { background-color: #ffc107; animation: pulse 1s infinite; }
        @keyframes pulse {
            0%, 100% { opacity: 1; }
            50% { opacity: 0.4; }
        }
        .card-metric {
            font-size: 2rem;
            font-weight: 700;
        }
        .card-metric-label {
            font-size: 0.85rem;
            color: var(--bs-secondary-color);
            text-transform: uppercase;
            letter-spacing: 0.05em;
        }
        .hero-quote {
            font-style: italic;
            color: var(--bs-secondary-color);
            font-size: 0.9rem;
        }
    </style>
 </head>
 <body>
    <!-- Navbar -->
    <nav class="navbar navbar-expand-lg bg-body-tertiary border-bottom">
        <div class="container-fluid">
            <a class="navbar-brand d-flex align-items-center" href="/">
                <i class="bi bi-soundwave me-2 fs-4"></i>
                <strong>Stentor</strong>&nbsp;Gateway
            </a>
            <span class="navbar-text">
                <span class="badge bg-secondary">v{{ version }}</span>
            </span>
        </div>
    </nav>
    <div class="container-fluid py-4">
        <!-- Header -->
        <div class="row mb-4">
            <div class="col">
                <p class="hero-quote mb-0">
                    &ldquo;Stentor, whose voice was as powerful as fifty voices of other men.&rdquo;
                    &mdash; Homer, <em>Iliad</em>, Book V
                </p>
            </div>
        </div>
        <!-- Service Connectivity -->
        <div class="row mb-4">
            <div class="col-12">
                <h5 class="mb-3"><i class="bi bi-diagram-3 me-2"></i>Service Connectivity</h5>
            </div>
            <div class="col-md-4 mb-3">
                <div class="card h-100">
                    <div class="card-body">
                        <div class="d-flex align-items-center mb-2">
                            <span class="status-dot checking" id="stt-status-dot"></span>
                            <h6 class="card-title mb-0">Speaches STT</h6>
                        </div>
                        <p class="card-text text-body-secondary small mb-1">
                            <i class="bi bi-mic me-1"></i>Speech-to-Text
                        </p>
                        <code class="small">{{ settings.stt_url }}</code>
                        <div class="mt-2 small">
                            <span class="text-body-secondary">Model:</span>
                            <span>{{ settings.stt_model }}</span>
                        </div>
                        <div class="mt-2" id="stt-status-text">
                            <span class="badge bg-warning"><i class="bi bi-hourglass-split me-1"></i>Checking...</span>
                        </div>
                    </div>
                </div>
            </div>
            <div class="col-md-4 mb-3">
                <div class="card h-100">
                    <div class="card-body">
                        <div class="d-flex align-items-center mb-2">
                            <span class="status-dot checking" id="tts-status-dot"></span>
                            <h6 class="card-title mb-0">Speaches TTS</h6>
                        </div>
                        <p class="card-text text-body-secondary small mb-1">
                            <i class="bi bi-volume-up me-1"></i>Text-to-Speech
                        </p>
                        <code class="small">{{ settings.tts_url }}</code>
                        <div class="mt-2 small">
                            <span class="text-body-secondary">Model:</span>
                            <span>{{ settings.tts_model }}</span>
                            <span class="text-body-secondary ms-2">Voice:</span>
                            <span>{{ settings.tts_voice }}</span>
                        </div>
                        <div class="mt-2" id="tts-status-text">
                            <span class="badge bg-warning"><i class="bi bi-hourglass-split me-1"></i>Checking...</span>
                        </div>
                    </div>
                </div>
            </div>
            <div class="col-md-4 mb-3">
                <div class="card h-100">
                    <div class="card-body">
                        <div class="d-flex align-items-center mb-2">
                            <span class="status-dot checking" id="agent-status-dot"></span>
                            <h6 class="card-title mb-0">FastAgent</h6>
                        </div>
                        <p class="card-text text-body-secondary small mb-1">
                            <i class="bi bi-robot me-1"></i>AI Agent
                        </p>
                        <code class="small">{{ settings.agent_url }}</code>
                        <div class="mt-2" id="agent-status-text">
                            <span class="badge bg-warning"><i class="bi bi-hourglass-split me-1"></i>Checking...</span>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        <!-- Metrics Row -->
        <div class="row mb-4">
            <div class="col-12">
                <h5 class="mb-3"><i class="bi bi-speedometer2 me-2"></i>Metrics</h5>
            </div>
            <div class="col-sm-6 col-lg-3 mb-3">
                <div class="card text-center h-100">
                    <div class="card-body">
                        <div class="card-metric" id="metric-sessions">{{ active_sessions }}</div>
                        <div class="card-metric-label">Active Sessions</div>
                    </div>
                </div>
            </div>
            <div class="col-sm-6 col-lg-3 mb-3">
                <div class="card text-center h-100">
                    <div class="card-body">
                        <div class="card-metric" id="metric-transcriptions">{{ pipeline_state.total_transcriptions }}</div>
                        <div class="card-metric-label">Transcriptions</div>
                    </div>
                </div>
            </div>
            <div class="col-sm-6 col-lg-3 mb-3">
                <div class="card text-center h-100">
                    <div class="card-body">
                        <div class="card-metric" id="metric-tts">{{ pipeline_state.total_tts_requests }}</div>
                        <div class="card-metric-label">TTS Requests</div>
                    </div>
                </div>
            </div>
            <div class="col-sm-6 col-lg-3 mb-3">
                <div class="card text-center h-100">
                    <div class="card-body">
                        <div class="card-metric" id="metric-uptime">—</div>
                        <div class="card-metric-label">Uptime</div>
                    </div>
                </div>
            </div>
        </div>
        <!-- Connected Clients -->
        <div class="row mb-4">
            <div class="col-lg-6 mb-3">
                <h5 class="mb-3"><i class="bi bi-speaker me-2"></i>Connected Clients</h5>
                <div class="card">
                    <div class="card-body">
                        {% if sessions %}
                        <div class="table-responsive">
                            <table class="table table-sm table-hover mb-0">
                                <thead>
                                    <tr>
                                        <th>Session ID</th>
                                        <th>Client ID</th>
                                        <th>Buffer</th>
                                    </tr>
                                </thead>
                                <tbody>
                                    {% for sid, info in sessions.items() %}
                                    <tr>
                                        <td><code class="small">{{ sid[:8] }}...</code></td>
                                        <td>{{ info.client_id or "—" }}</td>
                                        <td>{{ info.buffer_size }} bytes</td>
                                    </tr>
                                    {% endfor %}
                                </tbody>
                            </table>
                        </div>
                        {% else %}
                        <p class="text-body-secondary mb-0">
                            <i class="bi bi-info-circle me-1"></i>No clients connected.
                        </p>
                        {% endif %}
                    </div>
                </div>
            </div>
            <!-- Audio Configuration -->
            <div class="col-lg-6 mb-3">
                <h5 class="mb-3"><i class="bi bi-sliders me-2"></i>Audio Configuration</h5>
                <div class="card">
                    <div class="card-body">
                        <table class="table table-sm mb-0">
                            <tbody>
                                <tr>
                                    <td class="text-body-secondary">Sample Rate</td>
                                    <td>{{ settings.audio_sample_rate }} Hz</td>
                                </tr>
                                <tr>
                                    <td class="text-body-secondary">Channels</td>
                                    <td>{{ settings.audio_channels }} (mono)</td>
                                </tr>
                                <tr>
                                    <td class="text-body-secondary">Sample Width</td>
                                    <td>{{ settings.audio_sample_width }}-bit</td>
                                </tr>
                                <tr>
                                    <td class="text-body-secondary">Encoding</td>
                                    <td>PCM S16LE</td>
                                </tr>
                                <tr>
                                    <td class="text-body-secondary">WebSocket</td>
                                    <td><code>/api/v1/realtime</code></td>
                                </tr>
                            </tbody>
                        </table>
                    </div>
                </div>
            </div>
        </div>
        <!-- Recent Pipeline Runs -->
        <div class="row mb-4">
            <div class="col-12">
                <h5 class="mb-3"><i class="bi bi-clock-history me-2"></i>Recent Pipeline Runs</h5>
                <div class="card">
                    <div class="card-body">
                        {% if pipeline_state.recent_metrics %}
                        <div class="table-responsive">
                            <table class="table table-sm table-hover mb-0">
                                <thead>
                                    <tr>
                                        <th>Transcript</th>
                                        <th>Response</th>
                                        <th>STT</th>
                                        <th>Agent</th>
                                        <th>TTS</th>
                                        <th>Total</th>
                                    </tr>
                                </thead>
                                <tbody>
                                    {% for m in pipeline_state.recent_metrics[-10:]|reverse %}
                                    <tr>
                                        <td class="small">{{ m.transcript[:50] }}{% if m.transcript|length > 50 %}...{% endif %}</td>
                                        <td class="small">{{ m.response_text[:50] }}{% if m.response_text|length > 50 %}...{% endif %}</td>
                                        <td><span class="badge bg-info">{{ "%.2f"|format(m.stt_duration) }}s</span></td>
                                        <td><span class="badge bg-primary">{{ "%.2f"|format(m.agent_duration) }}s</span></td>
                                        <td><span class="badge bg-success">{{ "%.2f"|format(m.tts_duration) }}s</span></td>
                                        <td><span class="badge bg-secondary">{{ "%.2f"|format(m.total_duration) }}s</span></td>
                                    </tr>
                                    {% endfor %}
                                </tbody>
                            </table>
                        </div>
                        {% else %}
                        <p class="text-body-secondary mb-0">
                            <i class="bi bi-info-circle me-1"></i>No pipeline runs yet.
                        </p>
                        {% endif %}
                    </div>
                </div>
            </div>
        </div>
        <!-- Footer -->
        <div class="row">
            <div class="col text-center">
                <p class="text-body-secondary small">
                    Stentor Gateway v{{ version }} &middot;
                    <a href="/api/docs" class="text-decoration-none">API Docs</a> &middot;
                    <a href="/api/metrics" class="text-decoration-none">Metrics</a> &middot;
                    <a href="/api/ready/" class="text-decoration-none">Readiness</a>
                </p>
            </div>
        </div>
    </div>
    <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js"></script>
    <script>
        // Format uptime
        function formatUptime(seconds) {
            if (seconds < 60) return Math.floor(seconds) + 's';
            if (seconds < 3600) return Math.floor(seconds / 60) + 'm ' + Math.floor(seconds % 60) + 's';
            const h = Math.floor(seconds / 3600);
            const m = Math.floor((seconds % 3600) / 60);
            return h + 'h ' + m + 'm';
        }
        // Update uptime display
        const uptimeSeconds = {{ uptime }};
        const startTime = Date.now() / 1000 - uptimeSeconds;
        function updateUptime() {
            const now = Date.now() / 1000;
            document.getElementById('metric-uptime').textContent = formatUptime(now - startTime);
        }
        updateUptime();
        setInterval(updateUptime, 1000);
        // Check service connectivity
        async function checkServices() {
            try {
                const resp = await fetch('/api/ready/');
                const data = await resp.json();
                updateServiceStatus('stt', data.checks.stt);
                updateServiceStatus('tts', data.checks.tts);
                updateServiceStatus('agent', data.checks.agent);
            } catch (e) {
                console.error('Failed to check services:', e);
                updateServiceStatus('stt', null);
                updateServiceStatus('tts', null);
                updateServiceStatus('agent', null);
            }
        }
        function updateServiceStatus(service, available) {
            const dot = document.getElementById(service + '-status-dot');
            const text = document.getElementById(service + '-status-text');
            dot.className = 'status-dot';
            if (available === true) {
                dot.classList.add('ok');
                text.innerHTML = '<span class="badge bg-success"><i class="bi bi-check-circle me-1"></i>Connected</span>';
            } else if (available === false) {
                dot.classList.add('error');
                text.innerHTML = '<span class="badge bg-danger"><i class="bi bi-x-circle me-1"></i>Unavailable</span>';
            } else {
                dot.classList.add('unknown');
                text.innerHTML = '<span class="badge bg-secondary"><i class="bi bi-question-circle me-1"></i>Unknown</span>';
            }
        }
        // Check on load and every 30 seconds
        checkServices();
        setInterval(checkServices, 30000);
    </script>
 </body>
 </html>
--- a/stentor-gateway/src/stentor/tts_client.py
+++ b/stentor-gateway/src/stentor/tts_client.py
@@ -0,0 +1,102 @@
 """Speaches TTS client (OpenAI-compatible).
 Posts JSON to the /v1/audio/speech endpoint and streams back PCM audio.
 """
 import logging
 import time
 from collections.abc import AsyncIterator
 import httpx
 from stentor.config import Settings
 logger = logging.getLogger(__name__)
 # Speaches TTS typically outputs at 24kHz
 TTS_OUTPUT_SAMPLE_RATE = 24000
 # Chunk size for streaming TTS audio (4KB ≈ 85ms at 24kHz/16bit/mono)
 TTS_STREAM_CHUNK_SIZE = 4096
 class TTSClient:
    """Async client for Speaches Text-to-Speech service."""
    def __init__(self, settings: Settings, http_client: httpx.AsyncClient) -> None:
        self._settings = settings
        self._http = http_client
        self._url = f"{settings.tts_url.rstrip('/')}/v1/audio/speech"
    async def synthesize(self, text: str) -> bytes:
        """Synthesize speech from text, returning complete PCM audio.
        Args:
            text: Text to synthesize.
        Returns:
            Raw PCM audio bytes (24kHz, mono, 16-bit signed LE).
        Raises:
            httpx.HTTPStatusError: If the TTS service returns an error.
        """
        start = time.monotonic()
        payload = {
            "model": self._settings.tts_model,
            "voice": self._settings.tts_voice,
            "input": text,
            "response_format": "pcm",
            "speed": 1.0,
        }
        logger.debug(
            "TTS request to %s (model=%s, voice=%s): %r",
            self._url,
            self._settings.tts_model,
            self._settings.tts_voice,
            text[:80],
        )
        response = await self._http.post(self._url, json=payload, timeout=60.0)
        response.raise_for_status()
        pcm_data = response.content
        elapsed = time.monotonic() - start
        logger.info("TTS completed in %.2fs (%d bytes)", elapsed, len(pcm_data))
        return pcm_data
    async def synthesize_stream(self, text: str) -> AsyncIterator[bytes]:
        """Synthesize speech and yield PCM audio chunks as they arrive.
        Args:
            text: Text to synthesize.
        Yields:
            Chunks of raw PCM audio bytes.
        """
        payload = {
            "model": self._settings.tts_model,
            "voice": self._settings.tts_voice,
            "input": text,
            "response_format": "pcm",
            "speed": 1.0,
        }
        logger.debug("TTS streaming request: %r", text[:80])
        async with self._http.stream("POST", self._url, json=payload, timeout=60.0) as response:
            response.raise_for_status()
            async for chunk in response.aiter_bytes(chunk_size=TTS_STREAM_CHUNK_SIZE):
                yield chunk
    async def is_available(self) -> bool:
        """Check if the TTS service is reachable."""
        try:
            base_url = self._settings.tts_url.rstrip("/")
            response = await self._http.get(f"{base_url}/v1/models", timeout=5.0)
            return response.status_code == 200
        except (httpx.ConnectError, httpx.TimeoutException):
            return False
--- a/stentor-gateway/test_client.py
+++ b/stentor-gateway/test_client.py
@@ -0,0 +1,226 @@
 #!/usr/bin/env python3
 """Stentor Gateway test client.
 Sends a WAV file over WebSocket to the Stentor Gateway and plays back
 or saves the audio response. Useful for testing without ESP32 hardware.
 Usage:
    # Send a WAV file and save the response
    python test_client.py --input recording.wav --output response.pcm
    # Send a WAV file to a custom gateway URL
    python test_client.py --input recording.wav --gateway ws://10.10.0.5:8600
    # Generate silent audio for testing connectivity
    python test_client.py --test-silence
 """
 import argparse
 import asyncio
 import base64
 import json
 import struct
 import sys
 import wave
 from pathlib import Path
 async def run_client(
    gateway_url: str,
    audio_data: bytes,
    client_id: str = "test-client",
    output_file: str | None = None,
 ) -> None:
    """Connect to the gateway, send audio, and receive the response.
    Args:
        gateway_url: WebSocket URL of the Stentor Gateway.
        audio_data: Raw PCM audio bytes to send.
        client_id: Client identifier.
        output_file: Optional path to save response PCM audio.
    """
    try:
        import websockets
    except ImportError:
        print("Error: 'websockets' package required. Install with: pip install websockets")
        sys.exit(1)
    print(f"Connecting to {gateway_url}...")
    async with websockets.connect(gateway_url) as ws:
        # 1. Start session
        await ws.send(json.dumps({
            "type": "session.start",
            "client_id": client_id,
            "audio_config": {
                "sample_rate": 16000,
                "channels": 1,
                "sample_width": 16,
                "encoding": "pcm_s16le",
            },
        }))
        # Wait for session.created
        msg = json.loads(await ws.recv())
        assert msg["type"] == "session.created", f"Expected session.created, got {msg}"
        session_id = msg["session_id"]
        print(f"Session created: {session_id}")
        # Wait for listening status
        msg = json.loads(await ws.recv())
        print(f"Status: {msg.get('state', msg)}")
        # 2. Stream audio in chunks (32ms chunks at 16kHz = 1024 bytes)
        chunk_size = 1024
        total_chunks = 0
        for offset in range(0, len(audio_data), chunk_size):
            chunk = audio_data[offset : offset + chunk_size]
            b64_chunk = base64.b64encode(chunk).decode("ascii")
            await ws.send(json.dumps({
                "type": "input_audio_buffer.append",
                "audio": b64_chunk,
            }))
            total_chunks += 1
        print(f"Sent {total_chunks} audio chunks ({len(audio_data)} bytes)")
        # 3. Commit the audio buffer
        await ws.send(json.dumps({"type": "input_audio_buffer.commit"}))
        print("Audio committed, waiting for response...")
        # 4. Receive response events
        response_audio = bytearray()
        done = False
        while not done:
            raw = await ws.recv()
            msg = json.loads(raw)
            msg_type = msg.get("type", "")
            if msg_type == "status":
                print(f"  Status: {msg['state']}")
            elif msg_type == "transcript.done":
                print(f"  Transcript: {msg['text']}")
            elif msg_type == "response.text.done":
                print(f"  Response: {msg['text']}")
            elif msg_type == "response.audio.delta":
                chunk = base64.b64decode(msg["delta"])
                response_audio.extend(chunk)
                print(f"  Audio chunk: {len(chunk)} bytes", end="\r")
            elif msg_type == "response.audio.done":
                print(f"\n  Audio complete: {len(response_audio)} bytes total")
            elif msg_type == "response.done":
                print("  Response complete!")
                done = True
            elif msg_type == "error":
                print(f"  ERROR [{msg.get('code', '?')}]: {msg['message']}")
                done = True
            else:
                print(f"  Unknown event: {msg_type}")
        # 5. Save response audio
        if response_audio:
            if output_file:
                out_path = Path(output_file)
                if out_path.suffix == ".wav":
                    # Write as WAV
                    with wave.open(str(out_path), "wb") as wf:
                        wf.setnchannels(1)
                        wf.setsampwidth(2)
                        wf.setframerate(16000)
                        wf.writeframes(bytes(response_audio))
                else:
                    # Write raw PCM
                    out_path.write_bytes(bytes(response_audio))
                print(f"Response audio saved to {output_file}")
            else:
                print("(Use --output to save response audio)")
        # 6. Close session
        await ws.send(json.dumps({"type": "session.close"}))
        print("Session closed.")
 def load_wav_as_pcm(wav_path: str) -> bytes:
    """Load a WAV file and return raw PCM data."""
    with wave.open(wav_path, "rb") as wf:
        print(f"Input: {wav_path}")
        print(f"  Channels: {wf.getnchannels()}")
        print(f"  Sample rate: {wf.getframerate()} Hz")
        print(f"  Sample width: {wf.getsampwidth() * 8}-bit")
        print(f"  Frames: {wf.getnframes()}")
        print(f"  Duration: {wf.getnframes() / wf.getframerate():.2f}s")
        if wf.getframerate() != 16000:
            print(f"  WARNING: Expected 16kHz, got {wf.getframerate()} Hz")
        if wf.getnchannels() != 1:
            print(f"  WARNING: Expected mono, got {wf.getnchannels()} channels")
        if wf.getsampwidth() != 2:
            print(f"  WARNING: Expected 16-bit, got {wf.getsampwidth() * 8}-bit")
        return wf.readframes(wf.getnframes())
 def generate_silence(duration_ms: int = 2000) -> bytes:
    """Generate silent PCM audio for testing."""
    num_samples = int(16000 * duration_ms / 1000)
    print(f"Generated {duration_ms}ms of silence ({num_samples} samples)")
    return struct.pack(f"<{num_samples}h", *([0] * num_samples))
 def main():
    parser = argparse.ArgumentParser(
        description="Stentor Gateway test client",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__,
    )
    parser.add_argument(
        "--gateway",
        default="ws://localhost:8600/api/v1/realtime",
        help="Gateway WebSocket URL (default: ws://localhost:8600/api/v1/realtime)",
    )
    parser.add_argument(
        "--input", "-i",
        help="Path to input WAV file (16kHz, mono, 16-bit)",
    )
    parser.add_argument(
        "--output", "-o",
        help="Path to save response audio (.wav or .pcm)",
    )
    parser.add_argument(
        "--client-id",
        default="test-client",
        help="Client identifier (default: test-client)",
    )
    parser.add_argument(
        "--test-silence",
        action="store_true",
        help="Send 2 seconds of silence (for connectivity testing)",
    )
    args = parser.parse_args()
    if args.test_silence:
        audio_data = generate_silence()
    elif args.input:
        audio_data = load_wav_as_pcm(args.input)
    else:
        parser.error("Specify --input WAV_FILE or --test-silence")
    asyncio.run(run_client(
        gateway_url=args.gateway,
        audio_data=audio_data,
        client_id=args.client_id,
        output_file=args.output,
    ))
 if __name__ == "__main__":
    main()
--- a/stentor-gateway/tests/init.py
+++ b/stentor-gateway/tests/init.py
@@ -0,0 +1 @@
 """Stentor Gateway tests."""
--- a/stentor-gateway/tests/conftest.py
+++ b/stentor-gateway/tests/conftest.py
@@ -0,0 +1,34 @@
 """Shared test fixtures for Stentor Gateway tests."""
 import pytest
 from stentor.config import Settings
@pytest.fixture
 def settings() -> Settings:
    """Test settings with localhost endpoints."""
    return Settings(
        stt_url="http://localhost:9001",
        tts_url="http://localhost:9002",
        agent_url="http://localhost:9003",
        log_level="DEBUG",
    )
@pytest.fixture
 def sample_pcm() -> bytes:
    """Generate 1 second of silent PCM audio (16kHz, mono, 16-bit)."""
    import struct
    num_samples = 16000  # 1 second at 16kHz
    return struct.pack(f"<{num_samples}h", *([0] * num_samples))
@pytest.fixture
 def sample_pcm_short() -> bytes:
    """Generate 100ms of silent PCM audio."""
    import struct
    num_samples = 1600  # 100ms at 16kHz
    return struct.pack(f"<{num_samples}h", *([0] * num_samples))
--- a/stentor-gateway/tests/test_pipeline.py
+++ b/stentor-gateway/tests/test_pipeline.py
@@ -0,0 +1,120 @@
 """Tests for the voice pipeline orchestrator."""
 import struct
 from unittest.mock import AsyncMock
 import pytest
 from stentor.agent_client import AgentClient
 from stentor.pipeline import Pipeline, PipelineState
 from stentor.stt_client import STTClient
 from stentor.tts_client import TTSClient
 class TestPipeline:
    """Tests for the Pipeline orchestrator."""
    @pytest.fixture
    def mock_stt(self):
        """Create a mock STT client."""
        stt = AsyncMock(spec=STTClient)
        stt.transcribe.return_value = "What is the weather?"
        return stt
    @pytest.fixture
    def mock_tts(self):
        """Create a mock TTS client."""
        tts = AsyncMock(spec=TTSClient)
        # Return 100 samples of silence as PCM (at 24kHz, will be resampled)
        tts.synthesize.return_value = struct.pack("<100h", *([0] * 100))
        return tts
    @pytest.fixture
    def mock_agent(self):
        """Create a mock agent client."""
        agent = AsyncMock(spec=AgentClient)
        agent.send_message.return_value = "I don't have weather tools yet."
        return agent
    @pytest.fixture
    def pipeline(self, settings, mock_stt, mock_tts, mock_agent):
        """Create a pipeline with mock clients."""
        state = PipelineState()
        return Pipeline(settings, mock_stt, mock_tts, mock_agent, state)
    async def test_full_pipeline(self, pipeline, sample_pcm, mock_stt, mock_tts, mock_agent):
        """Test the complete pipeline produces expected event sequence."""
        events = []
        async for event in pipeline.process(sample_pcm):
            events.append(event)
        # Verify event sequence
        event_types = [e.type for e in events]
        assert "status" in event_types  # transcribing status
        assert "transcript.done" in event_types
        assert "response.text.done" in event_types
        assert "response.audio.delta" in event_types or "response.audio.done" in event_types
        assert "response.done" in event_types
        # Verify services were called
        mock_stt.transcribe.assert_called_once()
        mock_agent.send_message.assert_called_once_with("What is the weather?")
        mock_tts.synthesize.assert_called_once_with("I don't have weather tools yet.")
    async def test_pipeline_empty_transcript(self, settings, mock_tts, mock_agent):
        """Test pipeline handles empty transcript gracefully."""
        mock_stt = AsyncMock(spec=STTClient)
        mock_stt.transcribe.return_value = ""
        state = PipelineState()
        pipeline = Pipeline(settings, mock_stt, mock_tts, mock_agent, state)
        events = []
        sample_pcm = struct.pack("<100h", *([0] * 100))
        async for event in pipeline.process(sample_pcm):
            events.append(event)
        event_types = [e.type for e in events]
        assert "error" in event_types
        # Agent and TTS should NOT have been called
        mock_agent.send_message.assert_not_called()
        mock_tts.synthesize.assert_not_called()
    async def test_pipeline_empty_agent_response(self, settings, mock_stt, mock_tts):
        """Test pipeline handles empty agent response."""
        mock_agent = AsyncMock(spec=AgentClient)
        mock_agent.send_message.return_value = ""
        state = PipelineState()
        pipeline = Pipeline(settings, mock_stt, mock_tts, mock_agent, state)
        events = []
        sample_pcm = struct.pack("<100h", *([0] * 100))
        async for event in pipeline.process(sample_pcm):
            events.append(event)
        event_types = [e.type for e in events]
        assert "error" in event_types
        mock_tts.synthesize.assert_not_called()
    async def test_pipeline_metrics_recorded(self, pipeline, sample_pcm):
        """Test that pipeline metrics are recorded."""
        state = pipeline._state
        assert state.total_transcriptions == 0
        events = []
        async for event in pipeline.process(sample_pcm):
            events.append(event)
        assert state.total_transcriptions == 1
        assert state.total_agent_requests == 1
        assert state.total_tts_requests == 1
        assert len(state.recent_metrics) == 1
        last = state.recent_metrics[-1]
        assert last.total_duration > 0
        assert last.transcript == "What is the weather?"
        assert last.response_text == "I don't have weather tools yet."
--- a/stentor-gateway/tests/test_stt_client.py
+++ b/stentor-gateway/tests/test_stt_client.py
@@ -0,0 +1,89 @@
 """Tests for the Speaches STT client."""
 import httpx
 import pytest
 from stentor.audio import pcm_to_wav
 from stentor.stt_client import STTClient
 class TestSTTClient:
    """Tests for STTClient."""
    @pytest.fixture
    def stt_client(self, settings):
        """Create an STT client with a mock HTTP client."""
        http_client = httpx.AsyncClient()
        return STTClient(settings, http_client)
    async def test_transcribe_success(self, settings, sample_pcm, httpx_mock):
        """Test successful transcription."""
        httpx_mock.add_response(
            url=f"{settings.stt_url}/v1/audio/transcriptions",
            method="POST",
            json={"text": "Hello world"},
        )
        async with httpx.AsyncClient() as http_client:
            client = STTClient(settings, http_client)
            wav_data = pcm_to_wav(sample_pcm)
            result = await client.transcribe(wav_data)
        assert result == "Hello world"
    async def test_transcribe_with_language(self, settings, sample_pcm, httpx_mock):
        """Test transcription with explicit language."""
        httpx_mock.add_response(
            url=f"{settings.stt_url}/v1/audio/transcriptions",
            method="POST",
            json={"text": "Bonjour le monde"},
        )
        async with httpx.AsyncClient() as http_client:
            client = STTClient(settings, http_client)
            wav_data = pcm_to_wav(sample_pcm)
            result = await client.transcribe(wav_data, language="fr")
        assert result == "Bonjour le monde"
    async def test_transcribe_empty_result(self, settings, sample_pcm, httpx_mock):
        """Test transcription returning empty text."""
        httpx_mock.add_response(
            url=f"{settings.stt_url}/v1/audio/transcriptions",
            method="POST",
            json={"text": "  "},
        )
        async with httpx.AsyncClient() as http_client:
            client = STTClient(settings, http_client)
            wav_data = pcm_to_wav(sample_pcm)
            result = await client.transcribe(wav_data)
        assert result == ""
    async def test_is_available_success(self, settings, httpx_mock):
        """Test availability check when service is up."""
        httpx_mock.add_response(
            url=f"{settings.stt_url}/v1/models",
            method="GET",
            json={"models": []},
        )
        async with httpx.AsyncClient() as http_client:
            client = STTClient(settings, http_client)
            available = await client.is_available()
        assert available is True
    async def test_is_available_failure(self, settings, httpx_mock):
        """Test availability check when service is down."""
        httpx_mock.add_exception(
            httpx.ConnectError("Connection refused"),
            url=f"{settings.stt_url}/v1/models",
        )
        async with httpx.AsyncClient() as http_client:
            client = STTClient(settings, http_client)
            available = await client.is_available()
        assert available is False
--- a/stentor-gateway/tests/test_tts_client.py
+++ b/stentor-gateway/tests/test_tts_client.py
@@ -0,0 +1,78 @@
 """Tests for the Speaches TTS client."""
 import struct
 import httpx
 from stentor.tts_client import TTSClient
 class TestTTSClient:
    """Tests for TTSClient."""
    async def test_synthesize_success(self, settings, httpx_mock):
        """Test successful TTS synthesis."""
        # Generate fake PCM audio (100 samples of silence)
        fake_pcm = struct.pack("<100h", *([0] * 100))
        httpx_mock.add_response(
            url=f"{settings.tts_url}/v1/audio/speech",
            method="POST",
            content=fake_pcm,
        )
        async with httpx.AsyncClient() as http_client:
            client = TTSClient(settings, http_client)
            result = await client.synthesize("Hello world")
        assert result == fake_pcm
    async def test_synthesize_uses_correct_params(self, settings, httpx_mock):
        """Test that TTS requests include correct parameters."""
        httpx_mock.add_response(
            url=f"{settings.tts_url}/v1/audio/speech",
            method="POST",
            content=b"\x00\x00",
        )
        async with httpx.AsyncClient() as http_client:
            client = TTSClient(settings, http_client)
            await client.synthesize("Test text")
        request = httpx_mock.get_request()
        assert request is not None
        import json
        body = json.loads(request.content)
        assert body["model"] == settings.tts_model
        assert body["voice"] == settings.tts_voice
        assert body["input"] == "Test text"
        assert body["response_format"] == "pcm"
        assert body["speed"] == 1.0
    async def test_is_available_success(self, settings, httpx_mock):
        """Test availability check when service is up."""
        httpx_mock.add_response(
            url=f"{settings.tts_url}/v1/models",
            method="GET",
            json={"models": []},
        )
        async with httpx.AsyncClient() as http_client:
            client = TTSClient(settings, http_client)
            available = await client.is_available()
        assert available is True
    async def test_is_available_failure(self, settings, httpx_mock):
        """Test availability check when service is down."""
        httpx_mock.add_exception(
            httpx.ConnectError("Connection refused"),
            url=f"{settings.tts_url}/v1/models",
        )
        async with httpx.AsyncClient() as http_client:
            client = TTSClient(settings, http_client)
            available = await client.is_available()
        assert available is False
--- a/stentor-gateway/tests/test_websocket.py
+++ b/stentor-gateway/tests/test_websocket.py
@@ -0,0 +1,185 @@
 """Tests for the WebSocket endpoint and message models."""
 import json
 import pytest
 from fastapi.testclient import TestClient
 from stentor.audio import encode_audio
 from stentor.models import (
    AudioConfig,
    ErrorEvent,
    InputAudioBufferAppend,
    ResponseAudioDelta,
    ResponseAudioDone,
    ResponseDone,
    ResponseTextDone,
    SessionCreated,
    SessionStart,
    StatusUpdate,
    TranscriptDone,
 )
 class TestMessageModels:
    """Tests for WebSocket message serialization."""
    def test_session_start_defaults(self):
        """Test SessionStart with default values."""
        msg = SessionStart()
        data = msg.model_dump()
        assert data["type"] == "session.start"
        assert data["client_id"] == ""
        assert data["audio_config"]["sample_rate"] == 16000
    def test_session_start_custom(self):
        """Test SessionStart with custom values."""
        msg = SessionStart(
            client_id="esp32-kitchen",
            audio_config=AudioConfig(sample_rate=24000),
        )
        assert msg.client_id == "esp32-kitchen"
        assert msg.audio_config.sample_rate == 24000
    def test_input_audio_buffer_append(self):
        """Test audio append message."""
        audio_b64 = encode_audio(b"\x00\x00\x01\x00")
        msg = InputAudioBufferAppend(audio=audio_b64)
        data = msg.model_dump()
        assert data["type"] == "input_audio_buffer.append"
        assert data["audio"] == audio_b64
    def test_session_created(self):
        """Test session created response."""
        msg = SessionCreated(session_id="test-uuid")
        data = json.loads(msg.model_dump_json())
        assert data["type"] == "session.created"
        assert data["session_id"] == "test-uuid"
    def test_status_update(self):
        """Test status update message."""
        for state in ("listening", "transcribing", "thinking", "speaking"):
            msg = StatusUpdate(state=state)
            assert msg.state == state
    def test_transcript_done(self):
        """Test transcript done message."""
        msg = TranscriptDone(text="Hello world")
        data = json.loads(msg.model_dump_json())
        assert data["type"] == "transcript.done"
        assert data["text"] == "Hello world"
    def test_response_text_done(self):
        """Test response text done message."""
        msg = ResponseTextDone(text="I can help with that.")
        data = json.loads(msg.model_dump_json())
        assert data["type"] == "response.text.done"
        assert data["text"] == "I can help with that."
    def test_response_audio_delta(self):
        """Test audio delta message."""
        msg = ResponseAudioDelta(delta="AAAA")
        data = json.loads(msg.model_dump_json())
        assert data["type"] == "response.audio.delta"
        assert data["delta"] == "AAAA"
    def test_response_audio_done(self):
        """Test audio done message."""
        msg = ResponseAudioDone()
        assert msg.type == "response.audio.done"
    def test_response_done(self):
        """Test response done message."""
        msg = ResponseDone()
        assert msg.type == "response.done"
    def test_error_event(self):
        """Test error event message."""
        msg = ErrorEvent(message="Something went wrong", code="test_error")
        data = json.loads(msg.model_dump_json())
        assert data["type"] == "error"
        assert data["message"] == "Something went wrong"
        assert data["code"] == "test_error"
    def test_error_event_default_code(self):
        """Test error event with default code."""
        msg = ErrorEvent(message="Oops")
        assert msg.code == "unknown_error"
 class TestWebSocketEndpoint:
    """Tests for the /api/v1/realtime WebSocket endpoint."""
    @pytest.fixture
    def client(self):
        """Create a test client with lifespan to populate app.state."""
        from stentor.main import app
        with TestClient(app) as c:
            yield c
    def test_health_live(self, client):
        """Test liveness endpoint."""
        response = client.get("/api/live/")
        assert response.status_code == 200
        assert response.json()["status"] == "ok"
    def test_api_info(self, client):
        """Test API info endpoint."""
        response = client.get("/api/v1/info")
        assert response.status_code == 200
        data = response.json()
        assert data["name"] == "stentor-gateway"
        assert data["version"] == "0.1.0"
        assert "realtime" in data["endpoints"]
    def test_websocket_session_lifecycle(self, client):
        """Test basic WebSocket session start and close."""
        with client.websocket_connect("/api/v1/realtime") as ws:
            # Send session.start
            ws.send_json({
                "type": "session.start",
                "client_id": "test-client",
            })
            # Receive session.created
            msg = ws.receive_json()
            assert msg["type"] == "session.created"
            assert "session_id" in msg
            # Receive initial status: listening
            msg = ws.receive_json()
            assert msg["type"] == "status"
            assert msg["state"] == "listening"
            # Send session.close
            ws.send_json({"type": "session.close"})
    def test_websocket_no_session_error(self, client):
        """Test sending audio without starting a session."""
        with client.websocket_connect("/api/v1/realtime") as ws:
            ws.send_json({
                "type": "input_audio_buffer.append",
                "audio": "AAAA",
            })
            msg = ws.receive_json()
            assert msg["type"] == "error"
            assert msg["code"] == "no_session"
    def test_websocket_invalid_json(self, client):
        """Test sending invalid JSON."""
        with client.websocket_connect("/api/v1/realtime") as ws:
            ws.send_text("not valid json{{{")
            msg = ws.receive_json()
            assert msg["type"] == "error"
            assert msg["code"] == "invalid_json"
    def test_websocket_unknown_event(self, client):
        """Test sending unknown event type."""
        with client.websocket_connect("/api/v1/realtime") as ws:
            ws.send_json({"type": "scooby.dooby.doo"})
            msg = ws.receive_json()
            assert msg["type"] == "error"
            assert msg["code"] == "unknown_event"
		`@@ -0,0 +1,3 @@`
							`"""Stentor — Voice Gateway for AI Agents."""`

							`__version__ = "0.1.0"`