docs: rewrite README with structured overview and quick start guide

Replaces the minimal project description with a comprehensive README including a component overview table, quick start instructions, common Ansible operations, and links to detailed documentation. Aligns with Red Panda Approval™ standards.
2026-03-03 12:49:06 +00:00
parent c7be03a743
commit b4d60f2f38
219 changed files with 34586 additions and 2 deletions
--- a/ansible/arke/.env.example
+++ b/ansible/arke/.env.example
@@ -0,0 +1,243 @@
+# Arke Configuration Example
+# Copy this file to .env and update with your values
+
+# ============================================================================
+# Server Configuration
+# ============================================================================
+HOST=0.0.0.0
+PORT=8000
+DEBUG=false
+LOG_LEVEL=info
+RELOAD=false
+
+# ============================================================================
+# PostgreSQL Database Configuration
+# ============================================================================
+
+# Database environment variables (same as above)
+DB_HOST=localhost
+DB_PORT=5432
+DB_NAME=arke
+DB_USER=arke
+DB_PASSWORD=your_secure_password
+
+# ============================================================================
+# Memcached Configuration
+# ============================================================================
+MEMCACHED_HOST=localhost
+MEMCACHED_PORT=11211
+
+# ============================================================================
+# Multi-Backend Configuration (Environment Variable Format)
+# ============================================================================
+# Ansible-friendly configuration using individual environment variables
+# No JSON escaping issues, works perfectly with Ansible Vault
+
+# --- NTTh Backend (Token Pool) ---
+# NTTh is treated specially as it manages a pool of tokens with session limits
+NTTH_BACKEND_ENABLED=true
+NTTH_SESSION_LIMIT=90
+NTTH_SESSION_TTL=3600
+NTTH_TOKEN_CACHE_TTL=82800
+
+# NTTh Tokens (numbered, add as many as needed)
+NTTH_TOKEN_1_APP_ID=your_app_id_1
+NTTH_TOKEN_1_APP_SECRET=your_secret_1
+NTTH_TOKEN_1_NAME=production-primary
+
+NTTH_TOKEN_2_APP_ID=your_app_id_2
+NTTH_TOKEN_2_APP_SECRET=your_secret_2
+NTTH_TOKEN_2_NAME=production-backup
+
+# Add more tokens as needed:
+# NTTH_TOKEN_3_APP_ID=your_app_id_3
+# NTTH_TOKEN_3_APP_SECRET=your_secret_3
+# NTTH_TOKEN_3_NAME=production-tertiary
+
+# --- Standard Backends (OpenAI-Compatible, etc.) ---
+# Backend 1: Nyx (llama-cpp instance)
+BACKEND_1_NAME=nyx
+BACKEND_1_TYPE=openai-compatible
+BACKEND_1_ENABLED=true
+BACKEND_1_BASE_URL=http://nyx.helu.ca:8080/v1
+BACKEND_1_API_KEY=not-needed
+BACKEND_1_MODEL_PREFIX=nyx
+BACKEND_1_TIMEOUT=60
+
+# Backend 2: Athena (llama-cpp instance)
+BACKEND_2_NAME=athena
+BACKEND_2_TYPE=openai-compatible
+BACKEND_2_ENABLED=true
+BACKEND_2_BASE_URL=http://athena.helu.ca:8080/v1
+BACKEND_2_API_KEY=not-needed
+BACKEND_2_MODEL_PREFIX=athena
+BACKEND_2_TIMEOUT=60
+
+# ============================================================================
+# Future Backend Examples (Reference Only - Not Active)
+# ============================================================================
+# These examples show how to configure other backend types when needed
+
+# --- Anthropic Backend Example ---
+# BACKEND_3_NAME=anthropic
+# BACKEND_3_TYPE=anthropic
+# BACKEND_3_ENABLED=true
+# BACKEND_3_BASE_URL=https://api.anthropic.com
+# BACKEND_3_API_KEY=sk-ant-api03-xxxxx
+# BACKEND_3_MODEL_PREFIX=anthropic
+# BACKEND_3_TIMEOUT=60
+
+# --- Azure OpenAI Backend Example ---
+# BACKEND_4_NAME=azure-openai
+# BACKEND_4_TYPE=azure-openai
+# BACKEND_4_ENABLED=true
+# BACKEND_4_BASE_URL=https://your-resource.openai.azure.com
+# BACKEND_4_API_KEY=your-azure-key
+# BACKEND_4_MODEL_PREFIX=azure
+# BACKEND_4_DEPLOYMENT_NAME=gpt-4
+# BACKEND_4_API_VERSION=2024-02-15-preview
+# BACKEND_4_TIMEOUT=60
+
+# --- AWS Bedrock Backend Example ---
+# BACKEND_5_NAME=bedrock
+# BACKEND_5_TYPE=bedrock
+# BACKEND_5_ENABLED=true
+# BACKEND_5_AWS_REGION=us-east-1
+# BACKEND_5_AWS_ACCESS_KEY_ID=AKIA...
+# BACKEND_5_AWS_SECRET_ACCESS_KEY=secret...
+# BACKEND_5_MODEL_PREFIX=bedrock
+# BACKEND_5_TIMEOUT=60
+
+# --- OpenAI Direct Backend Example ---
+# BACKEND_6_NAME=openai
+# BACKEND_6_TYPE=openai-compatible
+# BACKEND_6_ENABLED=true
+# BACKEND_6_BASE_URL=https://api.openai.com/v1
+# BACKEND_6_API_KEY=sk-...
+# BACKEND_6_MODEL_PREFIX=openai
+# BACKEND_6_TIMEOUT=60
+
+# ============================================================================
+# Embedding Provider Configuration
+# ============================================================================
+# Choose your embedding provider: 'ollama' or 'openai'
+EMBEDDING_PROVIDER=ollama
+
+# --- Ollama Configuration (when EMBEDDING_PROVIDER=ollama) ---
+OLLAMA_HOST=nyx.helu.ca
+OLLAMA_PORT=11434
+EMBEDDING_MODEL=nomic-embed-text
+
+# --- OpenAI-Compatible Configuration (when EMBEDDING_PROVIDER=openai) ---
+# Works with OpenAI API, llama-cpp, LocalAI, and other compatible services
+OPENAI_EMBEDDING_BASE_URL=http://localhost:8080
+OPENAI_EMBEDDING_API_KEY=
+OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
+
+# --- Common Embedding Configuration ---
+EMBEDDING_TIMEOUT=30.0
+
+# --- Batch Chunking Configuration (for llama-cpp) ---
+# These settings optimize embedding requests for llama-cpp's context limits
+EMBEDDING_BATCH_SIZE=512
+EMBEDDING_UBATCH_SIZE=512
+EMBEDDING_MAX_CONTEXT=8192
+
+# ============================================================================
+# Memory System Configuration
+# ============================================================================
+MEMORY_ENABLED=true
+MAX_CONTEXT_TOKENS=8000
+SIMILARITY_THRESHOLD=0.7
+MIN_IMPORTANCE_SCORE=0.7
+
+# ============================================================================
+# Message Size Limits
+# ============================================================================
+# Maximum tokens allowed for incoming messages (default: 32768)
+# This limit prevents excessively large requests that could overwhelm the system
+MESSAGE_MAX_TOKENS=32768
+
+# ============================================================================
+# Background Task Configuration (Async Embedding Generation)
+# ============================================================================
+# Enable background task processing for async operations
+BACKGROUND_TASKS_ENABLED=true
+
+# Number of worker threads for background tasks
+BACKGROUND_TASK_WORKERS=5
+
+# Maximum retry attempts for failed tasks
+BACKGROUND_TASK_MAX_RETRIES=3
+
+# Initial retry delay in seconds (uses exponential backoff)
+BACKGROUND_TASK_RETRY_DELAY=1.0
+
+# Cleanup interval for old completed/failed tasks (hours)
+BACKGROUND_TASK_CLEANUP_HOURS=24
+
+# --- Async Embedding Configuration ---
+# Enable async embedding generation (non-blocking)
+ASYNC_EMBEDDINGS_ENABLED=true
+
+# Number of messages to batch together for embedding generation
+ASYNC_EMBEDDING_BATCH_SIZE=50
+
+# Priority level for embedding tasks: LOW, NORMAL, HIGH, CRITICAL
+ASYNC_EMBEDDING_PRIORITY=NORMAL
+
+# --- Async Deduplication Configuration ---
+# Enable async document enhancement (non-blocking embedding generation for deduplicated documents)
+ASYNC_DEDUPLICATION_ENABLED=true
+
+# Number of documents to batch together for enhancement
+DEDUPLICATION_BATCH_SIZE=20
+
+# Priority level for document enhancement tasks: LOW, NORMAL, HIGH, CRITICAL
+DEDUPLICATION_ENHANCEMENT_PRIORITY=NORMAL
+
+# Enable HTML content extraction and processing
+HTML_CONTENT_EXTRACTION=true
+
+# Minimum token count for document deduplication
+MIN_TOKENS_FOR_DEDUP=500
+
+# Semantic similarity threshold for duplicate detection (0.0-1.0)
+DEDUPLICATION_THRESHOLD=0.95
+
+# Reference expansion strategy: smart, full, summary, minimal
+REFERENCE_EXPANSION_STRATEGY=smart
+
+# ============================================================================
+# Monitoring Configuration
+# ============================================================================
+PROMETHEUS_ENABLED=true
+METRICS_PORT=9090
+
+# ============================================================================
+# Example Configurations for Different Setups
+# ============================================================================
+
+# Example 1: Using Ollama (default)
+# EMBEDDING_PROVIDER=ollama
+# OLLAMA_HOST=localhost
+# OLLAMA_PORT=11434
+# EMBEDDING_MODEL=nomic-embed-text
+
+# Example 2: Using llama-cpp with OpenAI-compatible API
+# EMBEDDING_PROVIDER=openai
+# OPENAI_EMBEDDING_BASE_URL=http://localhost:8080
+# OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
+# OPENAI_EMBEDDING_API_KEY=  # Optional, leave empty if not required
+
+# Example 3: Using actual OpenAI API
+# EMBEDDING_PROVIDER=openai
+# OPENAI_EMBEDDING_BASE_URL=https://api.openai.com
+# OPENAI_EMBEDDING_MODEL=text-embedding-3-small
+# OPENAI_EMBEDDING_API_KEY=sk-your-openai-api-key
+
+# Example 4: Using LocalAI
+# EMBEDDING_PROVIDER=openai
+# OPENAI_EMBEDDING_BASE_URL=http://localhost:8080
+# OPENAI_EMBEDDING_MODEL=bert-embeddings
+# OPENAI_EMBEDDING_API_KEY=  # Optional