feat: add health check endpoints and metrics to MCP server

2026-03-28 21:54:19 +00:00
parent 482657492d
commit 1fd2865c89
3 changed files with 72 additions and 4 deletions
--- a/README.md
+++ b/README.md
@@ -231,3 +231,46 @@ The free-api-live-football-data RapidAPI pricing:
 | Mega | $49.99/mo | 500,000 |

 Nike uses a 5-minute in-memory TTL cache to minimize API calls during conversations.
+
+
+
+## File Management Philosophy
+
+Following red panda-approved practices:
+
+- **Under 500 lines:** ✅ Perfect! Red pandas are happy
+- **500-1000 lines:** ⚠️ Acceptable, but watch carefully  
+- **Over 1000 lines:** 🚨 Time to refactor! Split into multiple files
+
+Large files burn through LLM context and produce poor results. Keep files focused and modular.
+
+## Monitoring & Health Check Endpoints
+Follow standard Kubernetes health check endpoints for container orchestration:
+
+### /ready/ - Readiness probe checks if the application is ready to serve traffic
+Validates database connectivity
+Validates cache connectivity
+Returns 200 if ready, 503 if dependencies are unavailable
+Used by load balancers to determine if pod should receive traffic
+
+### /live/ - Liveness probe checks if the application process is alive
+Simple health check with minimal logic
+Returns 200 if Django is responding to requests
+Used by Kubernetes to determine if pod should be restarted
+
+### /metrics 
+Detailed metrics, use Prometheus and Alloy integration for logs rather than custom health endpoints.
+
+## Documentation
+Place documentation in the docs directory of the repository
+
+/docs/
+
+HTML documents must follow docs/documentation_style_guide.html
+
+### Diagrams
+When creating or updating diagrams in HTML files, use MERMAID 
+
+---
+
+*Remember: Red pandas are tough critics, but they reward quality with their approval. Strive for MCP servers that would make a red panda proud!* 🐾
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,10 +2,7 @@ services:
  nike:
    build: .
    ports:
-      - "${NIKE_PORT:-8000}:8000"
+      - "${NIKE_PORT}:8000"
    env_file:
      - .env
-    environment:
-      NIKE_HOST: "0.0.0.0"
-      NIKE_PORT: "8000"
    restart: unless-stopped
--- a/nike/server.py
+++ b/nike/server.py
@@ -816,6 +816,34 @@ async def api_run(body: _RunRequest):
        return JSONResponse({"ok": False, "error": str(exc)}, status_code=500)


+# ── Health endpoints ──────────────────────────────────────
+
+
+@dashboard.get("/live/")
+async def liveness():
+    return JSONResponse({"status": "ok"})
+
+
+@dashboard.get("/ready/")
+async def readiness():
+    db_check = db.check_connection()
+    if not db_check.get("connected"):
+        return JSONResponse({"status": "unavailable", "database": db_check}, status_code=503)
+    return JSONResponse({"status": "ready", "database": db_check})
+
+
+@dashboard.get("/metrics")
+async def metrics():
+    db_check = db.check_connection()
+    counts = db.get_table_counts()
+    uptime_s = int((datetime.now(timezone.utc) - _SERVER_START).total_seconds())
+    return JSONResponse({
+        "uptime_seconds": uptime_s,
+        "database": db_check,
+        "table_counts": counts,
+    })
+
+
 # ── Mount MCP onto dashboard ──────────────────────────────
 dashboard.mount("/mcp", _mcp_app)