mnemosyne/mnemosyne/library/metrics.py

"""
Prometheus metrics for the Mnemosyne embedding pipeline.

Exposes counters, histograms, and gauges for monitoring document parsing,
chunking, embedding, and pipeline orchestration.
"""

from prometheus_client import Counter, Gauge, Histogram

# --- Document Parsing ---

DOCUMENTS_PARSED_TOTAL = Counter(
    "mnemosyne_documents_parsed_total",
    "Total documents parsed",
    ["file_type", "status"],
)
DOCUMENT_PARSE_DURATION = Histogram(
    "mnemosyne_document_parse_duration_seconds",
    "Time to parse a document",
    ["file_type"],
    buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60, 120],
)
IMAGES_EXTRACTED_TOTAL = Counter(
    "mnemosyne_images_extracted_total",
    "Total images extracted from documents",
    ["file_type"],
)

# --- Chunking ---

CHUNKS_CREATED_TOTAL = Counter(
    "mnemosyne_chunks_created_total",
    "Total chunks created",
    ["library_type", "strategy"],
)
CHUNK_SIZE_TOKENS = Histogram(
    "mnemosyne_chunk_size_tokens",
    "Distribution of chunk sizes in tokens",
    buckets=[32, 64, 128, 256, 512, 768, 1024, 2048],
)

# --- Embedding ---

EMBEDDINGS_GENERATED_TOTAL = Counter(
    "mnemosyne_embeddings_generated_total",
    "Total embeddings generated",
    ["model_name", "api_type", "content_type"],
)
EMBEDDING_BATCH_DURATION = Histogram(
    "mnemosyne_embedding_batch_duration_seconds",
    "Time per embedding batch request",
    ["model_name", "api_type"],
    buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60],
)
EMBEDDING_API_ERRORS_TOTAL = Counter(
    "mnemosyne_embedding_api_errors_total",
    "Embedding API errors",
    ["model_name", "api_type", "error_type"],
)
EMBEDDING_TOKENS_TOTAL = Counter(
    "mnemosyne_embedding_tokens_total",
    "Total tokens sent to embedding APIs",
    ["model_name"],
)

# --- Pipeline ---

PIPELINE_ITEMS_TOTAL = Counter(
    "mnemosyne_pipeline_items_total",
    "Total items processed by embedding pipeline",
    ["status"],
)
PIPELINE_DURATION = Histogram(
    "mnemosyne_pipeline_item_duration_seconds",
    "Total time to process one item through the full pipeline",
    buckets=[1, 5, 10, 30, 60, 120, 300, 600],
)
PIPELINE_ITEMS_IN_PROGRESS = Gauge(
    "mnemosyne_pipeline_items_in_progress",
    "Items currently being processed",
)

# --- Concept Extraction ---

CONCEPTS_EXTRACTED_TOTAL = Counter(
    "mnemosyne_concepts_extracted_total",
    "Total concepts extracted",
    ["concept_type"],
)

# --- Vision Analysis (Phase 2B) ---

VISION_ANALYSES_TOTAL = Counter(
    "mnemosyne_vision_analyses_total",
    "Total images analyzed by vision model",
    ["status"],
)
VISION_ANALYSIS_DURATION = Histogram(
    "mnemosyne_vision_analysis_duration_seconds",
    "Time to analyze a single image with the vision model",
    buckets=[0.5, 1, 2, 5, 10, 20, 30, 60],
)
VISION_CONCEPTS_EXTRACTED_TOTAL = Counter(
    "mnemosyne_vision_concepts_extracted_total",
    "Concepts extracted from images by vision analysis",
    ["concept_type"],
)

# --- Search ---

SEARCH_REQUESTS_TOTAL = Counter(
    "mnemosyne_search_requests_total",
    "Total search requests",
    ["search_type", "library_type"],
)
SEARCH_DURATION = Histogram(
    "mnemosyne_search_duration_seconds",
    "Time per search type execution",
    ["search_type"],
    buckets=[0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10],
)
SEARCH_CANDIDATES_TOTAL = Histogram(
    "mnemosyne_search_candidates_total",
    "Number of candidates returned per search type",
    ["search_type"],
    buckets=[0, 1, 5, 10, 20, 50, 100],
)
SEARCH_TOTAL_DURATION = Histogram(
    "mnemosyne_search_total_duration_seconds",
    "End-to-end search latency including fusion and re-ranking",
    buckets=[0.1, 0.25, 0.5, 1, 2, 5, 10, 30],
)

# --- Fusion ---

FUSION_DURATION = Histogram(
    "mnemosyne_fusion_duration_seconds",
    "Time to perform Reciprocal Rank Fusion",
    buckets=[0.001, 0.005, 0.01, 0.05, 0.1, 0.5],
)

# --- Re-ranking ---

RERANK_REQUESTS_TOTAL = Counter(
    "mnemosyne_rerank_requests_total",
    "Total re-ranking requests",
    ["model_name", "status"],
)
RERANK_DURATION = Histogram(
    "mnemosyne_rerank_duration_seconds",
    "Time per re-ranking request",
    ["model_name"],
    buckets=[0.1, 0.25, 0.5, 1, 2, 5, 10, 30],
)
RERANK_CANDIDATES = Histogram(
    "mnemosyne_rerank_candidates",
    "Number of candidates sent to re-ranker",
    buckets=[1, 5, 10, 20, 32, 50, 100],
)

# --- System State ---

EMBEDDING_QUEUE_SIZE = Gauge(
    "mnemosyne_embedding_queue_size",
    "Items waiting in the embedding queue",
)