docs: rewrite README with structured overview and quick start guide

Replaces the minimal project description with a comprehensive README
including a component overview table, quick start instructions, common
Ansible operations, and links to detailed documentation. Aligns with
Red Panda Approval™ standards.
This commit is contained in:
2026-03-03 12:49:06 +00:00
parent c7be03a743
commit b4d60f2f38
219 changed files with 34586 additions and 2 deletions

243
ansible/arke/.env.example Normal file
View File

@@ -0,0 +1,243 @@
# Arke Configuration Example
# Copy this file to .env and update with your values
# ============================================================================
# Server Configuration
# ============================================================================
HOST=0.0.0.0
PORT=8000
DEBUG=false
LOG_LEVEL=info
RELOAD=false
# ============================================================================
# PostgreSQL Database Configuration
# ============================================================================
# Database environment variables (same as above)
DB_HOST=localhost
DB_PORT=5432
DB_NAME=arke
DB_USER=arke
DB_PASSWORD=your_secure_password
# ============================================================================
# Memcached Configuration
# ============================================================================
MEMCACHED_HOST=localhost
MEMCACHED_PORT=11211
# ============================================================================
# Multi-Backend Configuration (Environment Variable Format)
# ============================================================================
# Ansible-friendly configuration using individual environment variables
# No JSON escaping issues, works perfectly with Ansible Vault
# --- NTTh Backend (Token Pool) ---
# NTTh is treated specially as it manages a pool of tokens with session limits
NTTH_BACKEND_ENABLED=true
NTTH_SESSION_LIMIT=90
NTTH_SESSION_TTL=3600
NTTH_TOKEN_CACHE_TTL=82800
# NTTh Tokens (numbered, add as many as needed)
NTTH_TOKEN_1_APP_ID=your_app_id_1
NTTH_TOKEN_1_APP_SECRET=your_secret_1
NTTH_TOKEN_1_NAME=production-primary
NTTH_TOKEN_2_APP_ID=your_app_id_2
NTTH_TOKEN_2_APP_SECRET=your_secret_2
NTTH_TOKEN_2_NAME=production-backup
# Add more tokens as needed:
# NTTH_TOKEN_3_APP_ID=your_app_id_3
# NTTH_TOKEN_3_APP_SECRET=your_secret_3
# NTTH_TOKEN_3_NAME=production-tertiary
# --- Standard Backends (OpenAI-Compatible, etc.) ---
# Backend 1: Nyx (llama-cpp instance)
BACKEND_1_NAME=nyx
BACKEND_1_TYPE=openai-compatible
BACKEND_1_ENABLED=true
BACKEND_1_BASE_URL=http://nyx.helu.ca:8080/v1
BACKEND_1_API_KEY=not-needed
BACKEND_1_MODEL_PREFIX=nyx
BACKEND_1_TIMEOUT=60
# Backend 2: Athena (llama-cpp instance)
BACKEND_2_NAME=athena
BACKEND_2_TYPE=openai-compatible
BACKEND_2_ENABLED=true
BACKEND_2_BASE_URL=http://athena.helu.ca:8080/v1
BACKEND_2_API_KEY=not-needed
BACKEND_2_MODEL_PREFIX=athena
BACKEND_2_TIMEOUT=60
# ============================================================================
# Future Backend Examples (Reference Only - Not Active)
# ============================================================================
# These examples show how to configure other backend types when needed
# --- Anthropic Backend Example ---
# BACKEND_3_NAME=anthropic
# BACKEND_3_TYPE=anthropic
# BACKEND_3_ENABLED=true
# BACKEND_3_BASE_URL=https://api.anthropic.com
# BACKEND_3_API_KEY=sk-ant-api03-xxxxx
# BACKEND_3_MODEL_PREFIX=anthropic
# BACKEND_3_TIMEOUT=60
# --- Azure OpenAI Backend Example ---
# BACKEND_4_NAME=azure-openai
# BACKEND_4_TYPE=azure-openai
# BACKEND_4_ENABLED=true
# BACKEND_4_BASE_URL=https://your-resource.openai.azure.com
# BACKEND_4_API_KEY=your-azure-key
# BACKEND_4_MODEL_PREFIX=azure
# BACKEND_4_DEPLOYMENT_NAME=gpt-4
# BACKEND_4_API_VERSION=2024-02-15-preview
# BACKEND_4_TIMEOUT=60
# --- AWS Bedrock Backend Example ---
# BACKEND_5_NAME=bedrock
# BACKEND_5_TYPE=bedrock
# BACKEND_5_ENABLED=true
# BACKEND_5_AWS_REGION=us-east-1
# BACKEND_5_AWS_ACCESS_KEY_ID=AKIA...
# BACKEND_5_AWS_SECRET_ACCESS_KEY=secret...
# BACKEND_5_MODEL_PREFIX=bedrock
# BACKEND_5_TIMEOUT=60
# --- OpenAI Direct Backend Example ---
# BACKEND_6_NAME=openai
# BACKEND_6_TYPE=openai-compatible
# BACKEND_6_ENABLED=true
# BACKEND_6_BASE_URL=https://api.openai.com/v1
# BACKEND_6_API_KEY=sk-...
# BACKEND_6_MODEL_PREFIX=openai
# BACKEND_6_TIMEOUT=60
# ============================================================================
# Embedding Provider Configuration
# ============================================================================
# Choose your embedding provider: 'ollama' or 'openai'
EMBEDDING_PROVIDER=ollama
# --- Ollama Configuration (when EMBEDDING_PROVIDER=ollama) ---
OLLAMA_HOST=nyx.helu.ca
OLLAMA_PORT=11434
EMBEDDING_MODEL=nomic-embed-text
# --- OpenAI-Compatible Configuration (when EMBEDDING_PROVIDER=openai) ---
# Works with OpenAI API, llama-cpp, LocalAI, and other compatible services
OPENAI_EMBEDDING_BASE_URL=http://localhost:8080
OPENAI_EMBEDDING_API_KEY=
OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
# --- Common Embedding Configuration ---
EMBEDDING_TIMEOUT=30.0
# --- Batch Chunking Configuration (for llama-cpp) ---
# These settings optimize embedding requests for llama-cpp's context limits
EMBEDDING_BATCH_SIZE=512
EMBEDDING_UBATCH_SIZE=512
EMBEDDING_MAX_CONTEXT=8192
# ============================================================================
# Memory System Configuration
# ============================================================================
MEMORY_ENABLED=true
MAX_CONTEXT_TOKENS=8000
SIMILARITY_THRESHOLD=0.7
MIN_IMPORTANCE_SCORE=0.7
# ============================================================================
# Message Size Limits
# ============================================================================
# Maximum tokens allowed for incoming messages (default: 32768)
# This limit prevents excessively large requests that could overwhelm the system
MESSAGE_MAX_TOKENS=32768
# ============================================================================
# Background Task Configuration (Async Embedding Generation)
# ============================================================================
# Enable background task processing for async operations
BACKGROUND_TASKS_ENABLED=true
# Number of worker threads for background tasks
BACKGROUND_TASK_WORKERS=5
# Maximum retry attempts for failed tasks
BACKGROUND_TASK_MAX_RETRIES=3
# Initial retry delay in seconds (uses exponential backoff)
BACKGROUND_TASK_RETRY_DELAY=1.0
# Cleanup interval for old completed/failed tasks (hours)
BACKGROUND_TASK_CLEANUP_HOURS=24
# --- Async Embedding Configuration ---
# Enable async embedding generation (non-blocking)
ASYNC_EMBEDDINGS_ENABLED=true
# Number of messages to batch together for embedding generation
ASYNC_EMBEDDING_BATCH_SIZE=50
# Priority level for embedding tasks: LOW, NORMAL, HIGH, CRITICAL
ASYNC_EMBEDDING_PRIORITY=NORMAL
# --- Async Deduplication Configuration ---
# Enable async document enhancement (non-blocking embedding generation for deduplicated documents)
ASYNC_DEDUPLICATION_ENABLED=true
# Number of documents to batch together for enhancement
DEDUPLICATION_BATCH_SIZE=20
# Priority level for document enhancement tasks: LOW, NORMAL, HIGH, CRITICAL
DEDUPLICATION_ENHANCEMENT_PRIORITY=NORMAL
# Enable HTML content extraction and processing
HTML_CONTENT_EXTRACTION=true
# Minimum token count for document deduplication
MIN_TOKENS_FOR_DEDUP=500
# Semantic similarity threshold for duplicate detection (0.0-1.0)
DEDUPLICATION_THRESHOLD=0.95
# Reference expansion strategy: smart, full, summary, minimal
REFERENCE_EXPANSION_STRATEGY=smart
# ============================================================================
# Monitoring Configuration
# ============================================================================
PROMETHEUS_ENABLED=true
METRICS_PORT=9090
# ============================================================================
# Example Configurations for Different Setups
# ============================================================================
# Example 1: Using Ollama (default)
# EMBEDDING_PROVIDER=ollama
# OLLAMA_HOST=localhost
# OLLAMA_PORT=11434
# EMBEDDING_MODEL=nomic-embed-text
# Example 2: Using llama-cpp with OpenAI-compatible API
# EMBEDDING_PROVIDER=openai
# OPENAI_EMBEDDING_BASE_URL=http://localhost:8080
# OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
# OPENAI_EMBEDDING_API_KEY= # Optional, leave empty if not required
# Example 3: Using actual OpenAI API
# EMBEDDING_PROVIDER=openai
# OPENAI_EMBEDDING_BASE_URL=https://api.openai.com
# OPENAI_EMBEDDING_MODEL=text-embedding-3-small
# OPENAI_EMBEDDING_API_KEY=sk-your-openai-api-key
# Example 4: Using LocalAI
# EMBEDDING_PROVIDER=openai
# OPENAI_EMBEDDING_BASE_URL=http://localhost:8080
# OPENAI_EMBEDDING_MODEL=bert-embeddings
# OPENAI_EMBEDDING_API_KEY= # Optional

147
ansible/arke/.env.j2 Normal file
View File

@@ -0,0 +1,147 @@
# Arke Environment Configuration
# Edit these values as needed before deployment
# ============================================================================
# Server Configuration
# ============================================================================
HOST=0.0.0.0
PORT={{ arke_port }}
DEBUG=false
LOG_LEVEL=info
RELOAD={{ arke_reload | default('false') }}
# ============================================================================
# PostgreSQL Database Configuration
# ============================================================================
DB_HOST={{ arke_db_host }}
DB_PORT={{ arke_db_port }}
DB_NAME={{ arke_db_name }}
DB_USER={{ arke_db_user }}
DB_PASSWORD={{ arke_db_password }}
# ============================================================================
# Memcached Configuration
# ============================================================================
MEMCACHED_HOST={{ arke_memcached_host | default('localhost') }}
MEMCACHED_PORT={{ arke_memcached_port | default('11211') }}
# ============================================================================
# NTTh API Configuration
# ============================================================================
# --- NTTh Backend (Token Pool) ---
# NTTh is treated specially as it manages a pool of tokens with session limits
NTTH_BACKEND_ENABLED=true
NTTH_SESSION_LIMIT=90
NTTH_SESSION_TTL=3600
NTTH_TOKEN_CACHE_TTL=82800
# NTTh Tokens (numbered, add as many as needed)
NTTH_TOKEN_1_NAME={{ntth_token_1_app_name}}
NTTH_TOKEN_1_APP_ID={{ntth_token_1_app_id}}
NTTH_TOKEN_1_APP_SECRET={{ntth_token_1_app_secret}}
NTTH_TOKEN_2_NAME={{ntth_token_2_app_name}}
NTTH_TOKEN_2_APP_ID={{ntth_token_2_app_id}}
NTTH_TOKEN_2_APP_SECRET={{ntth_token_2_app_secret}}
NTTH_TOKEN_3_NAME={{ntth_token_3_app_name}}
NTTH_TOKEN_3_APP_ID={{ntth_token_3_app_id}}
NTTH_TOKEN_3_APP_SECRET={{ntth_token_3_app_secret}}
NTTH_TOKEN_4_NAME={{ntth_token_4_app_name}}
NTTH_TOKEN_4_APP_ID={{ntth_token_4_app_id}}
NTTH_TOKEN_4_APP_SECRET={{ntth_token_4_app_secret}}
# Session Management
SESSION_LIMIT={{ arke_session_limit | default('90') }}
SESSION_TTL={{ arke_session_ttl | default('3600') }}
TOKEN_CACHE_TTL={{ arke_token_cache_ttl | default('82800') }}
# ============================================================================
# Embedding Provider Configuration
# ============================================================================
# Choose your embedding provider: 'ollama' or 'openai'
EMBEDDING_PROVIDER={{arke_embedding_provider}}
# --- OpenAI-Compatible Configuration (when EMBEDDING_PROVIDER=openai) ---
# Works with OpenAI API, llama-cpp, LocalAI, and other compatible services
OPENAI_EMBEDDING_BASE_URL={{arke_openai_embedding_base_url}}
OPENAI_EMBEDDING_API_KEY={{arke_openai_embedding_api_key}}
OPENAI_EMBEDDING_MODEL={{arke_openai_embedding_model}}
# --- Embedding Configuration ---
EMBEDDING_TIMEOUT={{ arke_embedding_timeout | default('30.0') }}
EMBEDDING_BATCH_SIZE={{arke_embedding_batch_size}}
EMBEDDING_UBATCH_SIZE={{arke_embedding_ubatch_size}}
EMBEDDING_MAX_CONTEXT={{arke_embedding_max_context}}
# ============================================================================
# Memory System Configuration
# ============================================================================
MEMORY_ENABLED={{ arke_memory_enabled | default('true') }}
MAX_CONTEXT_TOKENS={{ arke_max_context_tokens | default('8000') }}
SIMILARITY_THRESHOLD={{ arke_similarity_threshold | default('0.7') }}
MIN_IMPORTANCE_SCORE={{ arke_min_importance_score | default('0.7') }}
# ============================================================================
# Message Size Limits
# ============================================================================
# Maximum tokens allowed for incoming messages (default: 32768)
# This limit prevents excessively large requests that could overwhelm the system
MESSAGE_MAX_TOKENS=700000
# ============================================================================
# Background Task Configuration (Async Embedding Generation)
# ============================================================================
# Enable background task processing for async operations
BACKGROUND_TASKS_ENABLED=true
# Number of worker threads for background tasks
BACKGROUND_TASK_WORKERS=5
# Maximum retry attempts for failed tasks
BACKGROUND_TASK_MAX_RETRIES=3
# Initial retry delay in seconds (uses exponential backoff)
BACKGROUND_TASK_RETRY_DELAY=1.0
# Cleanup interval for old completed/failed tasks (hours)
BACKGROUND_TASK_CLEANUP_HOURS=24
# --- Async Embedding Configuration ---
# Enable async embedding generation (non-blocking)
ASYNC_EMBEDDINGS_ENABLED=true
# Number of messages to batch together for embedding generation
ASYNC_EMBEDDING_BATCH_SIZE=50
# Priority level for embedding tasks: LOW, NORMAL, HIGH, CRITICAL
ASYNC_EMBEDDING_PRIORITY=NORMAL
# --- Async Deduplication Configuration ---
# Enable async document enhancement (non-blocking embedding generation for deduplicated documents)
ASYNC_DEDUPLICATION_ENABLED=true
# Number of documents to batch together for enhancement
DEDUPLICATION_BATCH_SIZE=20
# Priority level for document enhancement tasks: LOW, NORMAL, HIGH, CRITICAL
DEDUPLICATION_ENHANCEMENT_PRIORITY=NORMAL
# Enable HTML content extraction and processing
HTML_CONTENT_EXTRACTION=true
# Minimum token count for document deduplication
MIN_TOKENS_FOR_DEDUP=500
# Semantic similarity threshold for duplicate detection (0.0-1.0)
DEDUPLICATION_THRESHOLD=0.95
# Reference expansion strategy: smart, full, summary, minimal
REFERENCE_EXPANSION_STRATEGY=smart
# ============================================================================
# Monitoring Configuration
# ============================================================================
PROMETHEUS_ENABLED=true
METRICS_PORT={{arke_metrics_port}}

View File

@@ -0,0 +1,24 @@
[Unit]
Description=Arke MCP Server
After=network.target
Wants=network.target
[Service]
Type=simple
User={{arke_user}}
Group={{arke_group}}
WorkingDirectory={{arke_directory}}
EnvironmentFile={{arke_directory}}/.env
ExecStart={{arke_directory}}/.venv/bin/python {{arke_directory}}/arke.py
Restart=always
RestartSec=10
# Security
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=strict
ProtectHome=true
[Install]
WantedBy=multi-user.target

181
ansible/arke/deploy.yml Normal file
View File

@@ -0,0 +1,181 @@
---
- name: Deploy Arke Proxy Server
hosts: arke
vars:
ansible_common_remote_group: "{{arke_group}}"
allow_world_readable_tmpfiles: true
tasks:
- name: Create Arke group
become: true
ansible.builtin.group:
name: "{{arke_group}}"
state: present
- name: Create arke user
become: true
ansible.builtin.user:
name: "{{arke_user}}"
group: "{{arke_group}}"
home: "{{arke_directory}}"
shell: /bin/bash
system: true
create_home: false
- name: Add remote_user to arke group
become: true
ansible.builtin.user:
name: "{{remote_user}}"
groups: "{{arke_group}}"
append: true
- name: Create required directories
become: true
ansible.builtin.file:
path: "{{arke_directory}}"
owner: "{{arke_user}}"
group: "{{arke_group}}"
state: directory
mode: '750'
- name: Ensure tar is installed for unarchive task
become: true
ansible.builtin.apt:
name:
- tar
state: present
update_cache: true
- name: Ensure Python, Python Dev, Venv module is installed
become: true
ansible.builtin.apt:
name: [python3,python3-venv,python3-dev]
state: present
update_cache: true
- name: Transfer and unarchive git archive
become: true
ansible.builtin.unarchive:
src: "~/rel/arke_{{arke_rel}}.tar"
dest: "{{arke_directory}}"
owner: "{{arke_user}}"
group: "{{arke_group}}"
mode: '550'
notify: restart arke
- name: Ensure media directories are writable
become: true
ansible.builtin.file:
path: "{{arke_directory}}/media/generated_images"
owner: "{{arke_user}}"
group: "{{arke_group}}"
state: directory
mode: '750'
- name: Create virtual environment for Arke
become: true
become_user: "{{arke_user}}"
ansible.builtin.command:
cmd: "python3 -m venv {{arke_directory}}/.venv/"
creates: "{{arke_directory}}/.venv/bin/activate"
- name: Install wheel in virtual environment
become: true
become_user: "{{arke_user}}"
ansible.builtin.pip:
name:
- wheel
state: latest
virtualenv: "{{arke_directory}}/.venv"
- name: Install pyproject.toml dependencies in virtualenv
become: true
become_user: "{{arke_user}}"
ansible.builtin.pip:
chdir: "{{arke_directory}}"
name: .
virtualenv: "{{arke_directory}}/.venv"
virtualenv_command: python3 -m venv
- name: Install Memcached
become: true
ansible.builtin.apt:
name: memcached
state: present
update_cache: true
- name: Ensure Memcached is running
become: true
ansible.builtin.service:
name: memcached
state: started
enabled: true
- name: Template Arke .env configuration
become: true
ansible.builtin.template:
src: .env.j2
dest: "{{arke_directory}}/.env"
owner: "{{arke_user}}"
group: "{{arke_group}}"
mode: '640'
notify: restart arke
- name: Template systemd service file
become: true
ansible.builtin.template:
src: arke.service.j2
dest: /etc/systemd/system/arke.service
owner: root
group: root
mode: '644'
notify: restart arke
- name: Enable and start arke service
become: true
ansible.builtin.systemd:
name: arke
enabled: true
state: started
daemon_reload: true
- name: Ensure Arke metrics endpoint is open to Prometheus (manual step if not using ufw)
ansible.builtin.debug:
msg: |
Ensure the host's firewall allows inbound TCP on port 8000 from sao.helu.ca for Prometheus scraping.
If using ufw:
sudo ufw allow from <sao.helu.ca_ip> to any port 8000 proto tcp
- name: Reminder - Update Prometheus scrape config on sao.helu.ca
ansible.builtin.debug:
msg: |
Add the following job/target to your Prometheus configuration on sao.helu.ca:
- job_name: 'arke'
static_configs:
- targets: ['<arke_host>:{{arke_port}}']
- name: Validate Arke health endpoints
ansible.builtin.uri:
url: "http://localhost:{{arke_port}}/health"
status_code: 200
return_content: true
register: health_check
retries: 5
delay: 5
until: health_check.status == 200
- name: Validate Arke /metrics endpoint
ansible.builtin.uri:
url: "http://localhost:{{arke_port}}/metrics"
status_code: 200
return_content: false
register: metrics_check
retries: 5
delay: 5
until: metrics_check.status == 200
handlers:
- name: restart arke
become: true
ansible.builtin.systemd:
name: arke
state: restarted

26
ansible/arke/remove.yml Normal file
View File

@@ -0,0 +1,26 @@
---
- name: Remove Arke Proxy Server
hosts: arke
become: true
tasks:
- name: Stop and disable arke service
ansible.builtin.systemd:
name: arke
state: stopped
enabled: false
ignore_errors: true
- name: Remove systemd service file
ansible.builtin.file:
path: /etc/systemd/system/arke.service
state: absent
- name: Reload systemd daemon
ansible.builtin.systemd:
daemon_reload: true
- name: Remove Arke directory
ansible.builtin.file:
path: "{{arke_directory}}"
state: absent

29
ansible/arke/stage.yml Normal file
View File

@@ -0,0 +1,29 @@
---
- name: Stage Arke release tarball
hosts: localhost
gather_facts: false
vars:
archive_path: "{{rel_dir}}/arke_{{arke_rel}}.tar"
arke_repo_dir: "{{repo_dir}}/arke"
tasks:
- name: Ensure release directory exists
file:
path: "{{rel_dir}}"
state: directory
mode: '755'
- name: Fetch all remote branches and tags
ansible.builtin.command: git fetch --all
args:
chdir: "{{arke_repo_dir}}"
- name: Pull latest changes
ansible.builtin.command: git pull
args:
chdir: "{{arke_repo_dir}}"
- name: Create Arke archive for specified release
ansible.builtin.command: git archive -o "{{archive_path}}" "{{arke_rel}}"
args:
chdir: "{{arke_repo_dir}}"