feat: add call history API endpoints and TTS service client
Adds read-only access to persisted call records for the dashboard and implements a client for the Rhema text-to-speech service. - api/call_history.py: New router providing paged call lists and detailed call records with transcript metadata. - services/tts.py: Async client for OpenAI-compatible TTS endpoints (Rhema/Kokoro) used for call-flow steps.
This commit is contained in:
@@ -24,6 +24,7 @@ from models.call_flow import ActionType, CallFlow, CallFlowStep
|
||||
from models.events import EventType, GatewayEvent
|
||||
from services.audio_classifier import AudioClassifier
|
||||
from services.transcription import TranscriptionService
|
||||
from services.tts import TTSService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -68,6 +69,7 @@ class HoldSlayerService:
|
||||
classifier: AudioClassifier,
|
||||
transcription: TranscriptionService,
|
||||
settings: Settings,
|
||||
tts: Optional[TTSService] = None,
|
||||
):
|
||||
self.gateway = gateway
|
||||
self.call_manager = call_manager
|
||||
@@ -75,6 +77,7 @@ class HoldSlayerService:
|
||||
self.classifier = classifier
|
||||
self.transcription = transcription
|
||||
self.settings = settings
|
||||
self.tts = tts
|
||||
|
||||
async def run(
|
||||
self,
|
||||
@@ -257,10 +260,7 @@ class HoldSlayerService:
|
||||
current_step_id = step.next_step
|
||||
|
||||
elif step.action == ActionType.SPEAK:
|
||||
# Say something into the call (TTS)
|
||||
# TODO: Implement TTS integration
|
||||
logger.info(f"🗣️ Would say: '{step.action_value}' (TTS not yet implemented)")
|
||||
await asyncio.sleep(3.0)
|
||||
await self._speak(call, sip_leg_id, step.action_value or "")
|
||||
current_step_id = step.next_step
|
||||
|
||||
elif step.action == ActionType.TRANSFER:
|
||||
@@ -715,3 +715,53 @@ class HoldSlayerService:
|
||||
logger.error(f"Failed to load call flow '{flow_id}': {e}")
|
||||
|
||||
return None
|
||||
|
||||
async def _speak(self, call: ActiveCall, sip_leg_id: str, text: str) -> bool:
|
||||
"""
|
||||
Synthesize `text` via TTS and play it into the call leg.
|
||||
|
||||
Falls back to a brief sleep if TTS is unavailable so a SPEAK step
|
||||
doesn't block the flow indefinitely.
|
||||
"""
|
||||
if not text.strip():
|
||||
return False
|
||||
|
||||
if not self.tts or not getattr(self.gateway, "media_pipeline", None):
|
||||
logger.warning(f"🗣️ TTS unavailable, skipping SPEAK: '{text[:60]}'")
|
||||
await asyncio.sleep(2.0)
|
||||
return False
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
fd, tmp_path = tempfile.mkstemp(suffix=".wav", prefix=f"speak_{call.id}_")
|
||||
os.close(fd)
|
||||
|
||||
try:
|
||||
ok = await self.tts.synthesize_to_file(text, tmp_path)
|
||||
if not ok:
|
||||
logger.warning(f"🗣️ TTS synthesis returned no audio for: '{text[:60]}'")
|
||||
return False
|
||||
|
||||
logger.info(f"🗣️ Speaking: '{text[:80]}'")
|
||||
await self.gateway.media_pipeline.play_wav(sip_leg_id, tmp_path)
|
||||
|
||||
# Publish event so the dashboard/transcript shows what we said.
|
||||
try:
|
||||
await self.gateway.event_bus.publish(
|
||||
GatewayEvent(
|
||||
type=EventType.SPEAK_PLAYED,
|
||||
call_id=call.id,
|
||||
data={"text": text},
|
||||
message=f"Played TTS: {text[:80]}",
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return True
|
||||
finally:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user