Files
mnemosyne/mnemosyne/library/metrics.py
Robert Helewka 634845fee0 feat: add Phase 3 hybrid search with Synesis reranking
Implement hybrid search pipeline combining vector, fulltext, and graph
search across Neo4j, with cross-attention reranking via Synesis
(Qwen3-VL-Reranker-2B) `/v1/rerank` endpoint.

- Add SearchService with vector, fulltext, and graph search strategies
- Add SynesisRerankerClient for multimodal reranking via HTTP API
- Add search API endpoint (POST /search/) with filtering by library,
  collection, and library_type
- Add SearchRequest/Response serializers and image search results
- Add "nonfiction" to library_type choices
- Consolidate reranker stack from two models to single Synesis service
- Handle image analysis_status as "skipped" when analysis is unavailable
- Add comprehensive tests for search pipeline and reranker client
2026-03-29 18:09:50 +00:00

167 lines
4.4 KiB
Python

"""
Prometheus metrics for the Mnemosyne embedding pipeline.
Exposes counters, histograms, and gauges for monitoring document parsing,
chunking, embedding, and pipeline orchestration.
"""
from prometheus_client import Counter, Gauge, Histogram
# --- Document Parsing ---
DOCUMENTS_PARSED_TOTAL = Counter(
"mnemosyne_documents_parsed_total",
"Total documents parsed",
["file_type", "status"],
)
DOCUMENT_PARSE_DURATION = Histogram(
"mnemosyne_document_parse_duration_seconds",
"Time to parse a document",
["file_type"],
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60, 120],
)
IMAGES_EXTRACTED_TOTAL = Counter(
"mnemosyne_images_extracted_total",
"Total images extracted from documents",
["file_type"],
)
# --- Chunking ---
CHUNKS_CREATED_TOTAL = Counter(
"mnemosyne_chunks_created_total",
"Total chunks created",
["library_type", "strategy"],
)
CHUNK_SIZE_TOKENS = Histogram(
"mnemosyne_chunk_size_tokens",
"Distribution of chunk sizes in tokens",
buckets=[32, 64, 128, 256, 512, 768, 1024, 2048],
)
# --- Embedding ---
EMBEDDINGS_GENERATED_TOTAL = Counter(
"mnemosyne_embeddings_generated_total",
"Total embeddings generated",
["model_name", "api_type", "content_type"],
)
EMBEDDING_BATCH_DURATION = Histogram(
"mnemosyne_embedding_batch_duration_seconds",
"Time per embedding batch request",
["model_name", "api_type"],
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60],
)
EMBEDDING_API_ERRORS_TOTAL = Counter(
"mnemosyne_embedding_api_errors_total",
"Embedding API errors",
["model_name", "api_type", "error_type"],
)
EMBEDDING_TOKENS_TOTAL = Counter(
"mnemosyne_embedding_tokens_total",
"Total tokens sent to embedding APIs",
["model_name"],
)
# --- Pipeline ---
PIPELINE_ITEMS_TOTAL = Counter(
"mnemosyne_pipeline_items_total",
"Total items processed by embedding pipeline",
["status"],
)
PIPELINE_DURATION = Histogram(
"mnemosyne_pipeline_item_duration_seconds",
"Total time to process one item through the full pipeline",
buckets=[1, 5, 10, 30, 60, 120, 300, 600],
)
PIPELINE_ITEMS_IN_PROGRESS = Gauge(
"mnemosyne_pipeline_items_in_progress",
"Items currently being processed",
)
# --- Concept Extraction ---
CONCEPTS_EXTRACTED_TOTAL = Counter(
"mnemosyne_concepts_extracted_total",
"Total concepts extracted",
["concept_type"],
)
# --- Vision Analysis (Phase 2B) ---
VISION_ANALYSES_TOTAL = Counter(
"mnemosyne_vision_analyses_total",
"Total images analyzed by vision model",
["status"],
)
VISION_ANALYSIS_DURATION = Histogram(
"mnemosyne_vision_analysis_duration_seconds",
"Time to analyze a single image with the vision model",
buckets=[0.5, 1, 2, 5, 10, 20, 30, 60],
)
VISION_CONCEPTS_EXTRACTED_TOTAL = Counter(
"mnemosyne_vision_concepts_extracted_total",
"Concepts extracted from images by vision analysis",
["concept_type"],
)
# --- Search ---
SEARCH_REQUESTS_TOTAL = Counter(
"mnemosyne_search_requests_total",
"Total search requests",
["search_type", "library_type"],
)
SEARCH_DURATION = Histogram(
"mnemosyne_search_duration_seconds",
"Time per search type execution",
["search_type"],
buckets=[0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10],
)
SEARCH_CANDIDATES_TOTAL = Histogram(
"mnemosyne_search_candidates_total",
"Number of candidates returned per search type",
["search_type"],
buckets=[0, 1, 5, 10, 20, 50, 100],
)
SEARCH_TOTAL_DURATION = Histogram(
"mnemosyne_search_total_duration_seconds",
"End-to-end search latency including fusion and re-ranking",
buckets=[0.1, 0.25, 0.5, 1, 2, 5, 10, 30],
)
# --- Fusion ---
FUSION_DURATION = Histogram(
"mnemosyne_fusion_duration_seconds",
"Time to perform Reciprocal Rank Fusion",
buckets=[0.001, 0.005, 0.01, 0.05, 0.1, 0.5],
)
# --- Re-ranking ---
RERANK_REQUESTS_TOTAL = Counter(
"mnemosyne_rerank_requests_total",
"Total re-ranking requests",
["model_name", "status"],
)
RERANK_DURATION = Histogram(
"mnemosyne_rerank_duration_seconds",
"Time per re-ranking request",
["model_name"],
buckets=[0.1, 0.25, 0.5, 1, 2, 5, 10, 30],
)
RERANK_CANDIDATES = Histogram(
"mnemosyne_rerank_candidates",
"Number of candidates sent to re-ranker",
buckets=[1, 5, 10, 20, 32, 50, 100],
)
# --- System State ---
EMBEDDING_QUEUE_SIZE = Gauge(
"mnemosyne_embedding_queue_size",
"Items waiting in the embedding queue",
)