chore(ansible): update model endpoints and enable Rommie deployment

- Bump Qwen model from 3.5 to 3.6 and update inference endpoints (nyx:22079→22072, pan:22078→22076) for caliban and puck hosts - Add Rommie MCP server deployment to site.yml - Update Rommie docs to reflect new port (20361), model versions, and health check accepting 200/406 status codes
2026-05-28 12:17:23 -04:00
parent a01feee663
commit 3bdb11dc72
4 changed files with 15 additions and 12 deletions
--- a/ansible/inventory/host_vars/caliban.incus.yml
+++ b/ansible/inventory/host_vars/caliban.incus.yml
@@ -24,11 +24,11 @@ rommie_port: 20361
 rommie_host: "0.0.0.0"
 rommie_display: ":10"
 rommie_allowed_hosts: "caliban.incus,rommie.ouranos.helu.ca"
-rommie_model: Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf
-rommie_model_url: "http://nyx.helu.ca:22079"
+rommie_model: Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf
+rommie_model_url: "http://nyx.helu.ca:22072"
 rommie_provider: "openai"
 rommie_ground_provider: "huggingface"
-rommie_ground_url: "http://pan.helu.ca:22078"
+rommie_ground_url: "http://pan.helu.ca:22076"
 rommie_ground_model: "UI-TARS-7B-DPO-Q6_K_L.gguf"
 rommie_grounding_width: 1024
 rommie_grounding_height: 1024
--- a/ansible/inventory/host_vars/puck.incus.yml
+++ b/ansible/inventory/host_vars/puck.incus.yml
@@ -79,8 +79,8 @@ pallas_log_level: INFO
 kottos_fastagent_log_level: info

 # LLM provider — the same OpenAI-compatible Qwen endpoint Kottos uses today.
-kottos_default_model: "openai.Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf"
-kottos_openai_base_url: "http://nyx.helu.ca:22079/v1"
+kottos_default_model: "openai.Qwen3.6-35B-A3B-UD-Q4_K_XL.gguf"
+kottos_openai_base_url: "http://nyx.helu.ca:22072/v1"
 kottos_model_vision: true
 kottos_model_context_window: 192000
 kottos_model_max_output_tokens: 16384
--- a/ansible/site.yml
+++ b/ansible/site.yml
@@ -48,6 +48,9 @@
 - name: Deploy Agent S
  import_playbook: agent_s/deploy.yml

+- name: Deploy Rommie MCP Server
+  import_playbook: rommie/deploy.yml
+
 - name: Stage Kottos (Pallas FastAgent runtime)
  import_playbook: kottos/stage.yml