Implement hybrid search pipeline combining vector, fulltext, and graph search across Neo4j, with cross-attention reranking via Synesis (Qwen3-VL-Reranker-2B) `/v1/rerank` endpoint. - Add SearchService with vector, fulltext, and graph search strategies - Add SynesisRerankerClient for multimodal reranking via HTTP API - Add search API endpoint (POST /search/) with filtering by library, collection, and library_type - Add SearchRequest/Response serializers and image search results - Add "nonfiction" to library_type choices - Consolidate reranker stack from two models to single Synesis service - Handle image analysis_status as "skipped" when analysis is unavailable - Add comprehensive tests for search pipeline and reranker client
167 lines
4.4 KiB
Python
167 lines
4.4 KiB
Python
"""
|
|
Prometheus metrics for the Mnemosyne embedding pipeline.
|
|
|
|
Exposes counters, histograms, and gauges for monitoring document parsing,
|
|
chunking, embedding, and pipeline orchestration.
|
|
"""
|
|
|
|
from prometheus_client import Counter, Gauge, Histogram
|
|
|
|
# --- Document Parsing ---
|
|
|
|
DOCUMENTS_PARSED_TOTAL = Counter(
|
|
"mnemosyne_documents_parsed_total",
|
|
"Total documents parsed",
|
|
["file_type", "status"],
|
|
)
|
|
DOCUMENT_PARSE_DURATION = Histogram(
|
|
"mnemosyne_document_parse_duration_seconds",
|
|
"Time to parse a document",
|
|
["file_type"],
|
|
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60, 120],
|
|
)
|
|
IMAGES_EXTRACTED_TOTAL = Counter(
|
|
"mnemosyne_images_extracted_total",
|
|
"Total images extracted from documents",
|
|
["file_type"],
|
|
)
|
|
|
|
# --- Chunking ---
|
|
|
|
CHUNKS_CREATED_TOTAL = Counter(
|
|
"mnemosyne_chunks_created_total",
|
|
"Total chunks created",
|
|
["library_type", "strategy"],
|
|
)
|
|
CHUNK_SIZE_TOKENS = Histogram(
|
|
"mnemosyne_chunk_size_tokens",
|
|
"Distribution of chunk sizes in tokens",
|
|
buckets=[32, 64, 128, 256, 512, 768, 1024, 2048],
|
|
)
|
|
|
|
# --- Embedding ---
|
|
|
|
EMBEDDINGS_GENERATED_TOTAL = Counter(
|
|
"mnemosyne_embeddings_generated_total",
|
|
"Total embeddings generated",
|
|
["model_name", "api_type", "content_type"],
|
|
)
|
|
EMBEDDING_BATCH_DURATION = Histogram(
|
|
"mnemosyne_embedding_batch_duration_seconds",
|
|
"Time per embedding batch request",
|
|
["model_name", "api_type"],
|
|
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60],
|
|
)
|
|
EMBEDDING_API_ERRORS_TOTAL = Counter(
|
|
"mnemosyne_embedding_api_errors_total",
|
|
"Embedding API errors",
|
|
["model_name", "api_type", "error_type"],
|
|
)
|
|
EMBEDDING_TOKENS_TOTAL = Counter(
|
|
"mnemosyne_embedding_tokens_total",
|
|
"Total tokens sent to embedding APIs",
|
|
["model_name"],
|
|
)
|
|
|
|
# --- Pipeline ---
|
|
|
|
PIPELINE_ITEMS_TOTAL = Counter(
|
|
"mnemosyne_pipeline_items_total",
|
|
"Total items processed by embedding pipeline",
|
|
["status"],
|
|
)
|
|
PIPELINE_DURATION = Histogram(
|
|
"mnemosyne_pipeline_item_duration_seconds",
|
|
"Total time to process one item through the full pipeline",
|
|
buckets=[1, 5, 10, 30, 60, 120, 300, 600],
|
|
)
|
|
PIPELINE_ITEMS_IN_PROGRESS = Gauge(
|
|
"mnemosyne_pipeline_items_in_progress",
|
|
"Items currently being processed",
|
|
)
|
|
|
|
# --- Concept Extraction ---
|
|
|
|
CONCEPTS_EXTRACTED_TOTAL = Counter(
|
|
"mnemosyne_concepts_extracted_total",
|
|
"Total concepts extracted",
|
|
["concept_type"],
|
|
)
|
|
|
|
# --- Vision Analysis (Phase 2B) ---
|
|
|
|
VISION_ANALYSES_TOTAL = Counter(
|
|
"mnemosyne_vision_analyses_total",
|
|
"Total images analyzed by vision model",
|
|
["status"],
|
|
)
|
|
VISION_ANALYSIS_DURATION = Histogram(
|
|
"mnemosyne_vision_analysis_duration_seconds",
|
|
"Time to analyze a single image with the vision model",
|
|
buckets=[0.5, 1, 2, 5, 10, 20, 30, 60],
|
|
)
|
|
VISION_CONCEPTS_EXTRACTED_TOTAL = Counter(
|
|
"mnemosyne_vision_concepts_extracted_total",
|
|
"Concepts extracted from images by vision analysis",
|
|
["concept_type"],
|
|
)
|
|
|
|
# --- Search ---
|
|
|
|
SEARCH_REQUESTS_TOTAL = Counter(
|
|
"mnemosyne_search_requests_total",
|
|
"Total search requests",
|
|
["search_type", "library_type"],
|
|
)
|
|
SEARCH_DURATION = Histogram(
|
|
"mnemosyne_search_duration_seconds",
|
|
"Time per search type execution",
|
|
["search_type"],
|
|
buckets=[0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10],
|
|
)
|
|
SEARCH_CANDIDATES_TOTAL = Histogram(
|
|
"mnemosyne_search_candidates_total",
|
|
"Number of candidates returned per search type",
|
|
["search_type"],
|
|
buckets=[0, 1, 5, 10, 20, 50, 100],
|
|
)
|
|
SEARCH_TOTAL_DURATION = Histogram(
|
|
"mnemosyne_search_total_duration_seconds",
|
|
"End-to-end search latency including fusion and re-ranking",
|
|
buckets=[0.1, 0.25, 0.5, 1, 2, 5, 10, 30],
|
|
)
|
|
|
|
# --- Fusion ---
|
|
|
|
FUSION_DURATION = Histogram(
|
|
"mnemosyne_fusion_duration_seconds",
|
|
"Time to perform Reciprocal Rank Fusion",
|
|
buckets=[0.001, 0.005, 0.01, 0.05, 0.1, 0.5],
|
|
)
|
|
|
|
# --- Re-ranking ---
|
|
|
|
RERANK_REQUESTS_TOTAL = Counter(
|
|
"mnemosyne_rerank_requests_total",
|
|
"Total re-ranking requests",
|
|
["model_name", "status"],
|
|
)
|
|
RERANK_DURATION = Histogram(
|
|
"mnemosyne_rerank_duration_seconds",
|
|
"Time per re-ranking request",
|
|
["model_name"],
|
|
buckets=[0.1, 0.25, 0.5, 1, 2, 5, 10, 30],
|
|
)
|
|
RERANK_CANDIDATES = Histogram(
|
|
"mnemosyne_rerank_candidates",
|
|
"Number of candidates sent to re-ranker",
|
|
buckets=[1, 5, 10, 20, 32, 50, 100],
|
|
)
|
|
|
|
# --- System State ---
|
|
|
|
EMBEDDING_QUEUE_SIZE = Gauge(
|
|
"mnemosyne_embedding_queue_size",
|
|
"Items waiting in the embedding queue",
|
|
)
|