Files
mnemosyne/mnemosyne/library/api/serializers.py
Robert Helewka 0a318c7620
Some checks failed
CVE Scan & Docker Build / security-scan (push) Successful in 51s
CVE Scan & Docker Build / build-and-push (push) Failing after 2m13s
feat(api): add max_length=50 validation to source field
2026-05-13 06:18:55 -04:00

243 lines
9.6 KiB
Python

"""
DRF serializers for the library app.
Serialize Neo4j neomodel nodes into JSON for the REST API.
"""
from rest_framework import serializers
LIBRARY_TYPE_CHOICES = [
"fiction",
"nonfiction",
"technical",
"music",
"film",
"art",
"journal",
"business",
"finance",
]
class LibrarySerializer(serializers.Serializer):
uid = serializers.CharField(read_only=True)
name = serializers.CharField(max_length=200)
library_type = serializers.ChoiceField(choices=LIBRARY_TYPE_CHOICES)
description = serializers.CharField(required=False, allow_blank=True, default="")
chunking_config = serializers.JSONField(required=False, default=dict)
embedding_instruction = serializers.CharField(
required=False, allow_blank=True, default=""
)
reranker_instruction = serializers.CharField(
required=False, allow_blank=True, default=""
)
llm_context_prompt = serializers.CharField(
required=False, allow_blank=True, default=""
)
workspace_id = serializers.CharField(read_only=True)
created_at = serializers.DateTimeField(read_only=True)
class CollectionSerializer(serializers.Serializer):
uid = serializers.CharField(read_only=True)
name = serializers.CharField(max_length=200)
description = serializers.CharField(required=False, allow_blank=True, default="")
metadata = serializers.JSONField(required=False, default=dict)
created_at = serializers.DateTimeField(read_only=True)
library_uid = serializers.CharField(
required=False, write_only=True, help_text="UID of the parent library"
)
class ItemSerializer(serializers.Serializer):
uid = serializers.CharField(read_only=True)
title = serializers.CharField(max_length=500)
item_type = serializers.CharField(required=False, allow_blank=True, default="")
s3_key = serializers.CharField(read_only=True)
content_hash = serializers.CharField(read_only=True)
file_type = serializers.CharField(required=False, allow_blank=True, default="")
file_size = serializers.IntegerField(read_only=True)
metadata = serializers.JSONField(required=False, default=dict)
created_at = serializers.DateTimeField(read_only=True)
updated_at = serializers.DateTimeField(read_only=True)
collection_uid = serializers.CharField(
required=False, write_only=True, help_text="UID of the parent collection"
)
# Phase 2: Embedding pipeline fields
embedding_status = serializers.CharField(read_only=True)
embedding_model_name = serializers.CharField(read_only=True)
chunk_count = serializers.IntegerField(read_only=True)
image_count = serializers.IntegerField(read_only=True)
class ChunkSerializer(serializers.Serializer):
uid = serializers.CharField(read_only=True)
chunk_index = serializers.IntegerField()
chunk_s3_key = serializers.CharField()
chunk_size = serializers.IntegerField(required=False, default=0)
text_preview = serializers.CharField(required=False, allow_blank=True, default="")
created_at = serializers.DateTimeField(read_only=True)
class ConceptSerializer(serializers.Serializer):
uid = serializers.CharField(read_only=True)
name = serializers.CharField(max_length=200)
concept_type = serializers.CharField(required=False, allow_blank=True, default="")
class ImageSerializer(serializers.Serializer):
uid = serializers.CharField(read_only=True)
s3_key = serializers.CharField()
image_type = serializers.CharField(required=False, allow_blank=True, default="")
description = serializers.CharField(required=False, allow_blank=True, default="")
metadata = serializers.JSONField(required=False, default=dict)
created_at = serializers.DateTimeField(read_only=True)
# --- Phase 3: Search ---
class SearchRequestSerializer(serializers.Serializer):
"""Request body for ``/library/api/search/``.
Authorization scope is resolved server-side from the request's
Django session (this endpoint is gated by
``permission_classes=[IsAuthenticated]``), not from the request
body — see ``library.utils.all_library_uids`` and the unified
auth model in ``docs/DAEDALUS_PALLAS_INTEGRATION_v1.md`` §3.3.
``library_uid`` / ``library_type`` / ``collection_uid`` are
filters inside that scope, not scope itself.
"""
query = serializers.CharField(max_length=2000)
library_uid = serializers.CharField(required=False, allow_blank=True)
library_type = serializers.ChoiceField(
choices=LIBRARY_TYPE_CHOICES,
required=False,
)
collection_uid = serializers.CharField(required=False, allow_blank=True)
search_types = serializers.ListField(
child=serializers.ChoiceField(choices=["vector", "fulltext", "graph"]),
required=False,
default=["vector", "fulltext", "graph"],
)
limit = serializers.IntegerField(default=20, min_value=1, max_value=100)
rerank = serializers.BooleanField(default=True)
include_images = serializers.BooleanField(default=True)
class SearchCandidateSerializer(serializers.Serializer):
chunk_uid = serializers.CharField()
item_uid = serializers.CharField()
item_title = serializers.CharField()
library_type = serializers.CharField()
text_preview = serializers.CharField()
chunk_s3_key = serializers.CharField()
chunk_index = serializers.IntegerField()
score = serializers.FloatField()
source = serializers.CharField()
metadata = serializers.DictField(required=False, default=dict)
class ImageSearchResultSerializer(serializers.Serializer):
image_uid = serializers.CharField()
item_uid = serializers.CharField()
item_title = serializers.CharField()
image_type = serializers.CharField()
description = serializers.CharField()
s3_key = serializers.CharField()
score = serializers.FloatField()
source = serializers.CharField()
class SearchResponseSerializer(serializers.Serializer):
query = serializers.CharField()
candidates = SearchCandidateSerializer(many=True)
images = ImageSearchResultSerializer(many=True)
total_candidates = serializers.IntegerField()
search_time_ms = serializers.FloatField()
reranker_used = serializers.BooleanField()
reranker_model = serializers.CharField(allow_null=True)
search_types_used = serializers.ListField(child=serializers.CharField())
# Populated when ``rerank=True`` was requested but the re-ranking
# step did not run — e.g. no system reranker model configured
# (``no_system_model``), the Synesis call raised
# (``api_error: ...``), or fusion produced zero candidates
# (``no_candidates``). ``null`` means either success or that
# re-ranking was not requested. ``required=False`` keeps old
# clients happy.
reranker_skip_reason = serializers.CharField(
allow_null=True, required=False, default=None
)
# --- Workspace lifecycle (Daedalus integration) ---
class WorkspaceCreateSerializer(serializers.Serializer):
"""Inbound payload for POST /api/v1/workspaces/."""
workspace_id = serializers.CharField(max_length=64)
name = serializers.CharField(max_length=200)
library_type = serializers.ChoiceField(choices=LIBRARY_TYPE_CHOICES)
description = serializers.CharField(required=False, allow_blank=True, default="")
class WorkspaceStatusSerializer(serializers.Serializer):
"""Outbound payload for workspace lifecycle endpoints."""
workspace_id = serializers.CharField()
library_uid = serializers.CharField()
name = serializers.CharField()
library_type = serializers.CharField()
description = serializers.CharField(allow_blank=True)
item_count = serializers.IntegerField()
chunk_count = serializers.IntegerField()
created_at = serializers.DateTimeField()
# --- Ingest (Daedalus integration) ---
class IngestRequestSerializer(serializers.Serializer):
"""Inbound payload for POST /api/v1/library/ingest/."""
s3_key = serializers.CharField(max_length=500)
title = serializers.CharField(max_length=500)
library_uid = serializers.CharField(required=False, allow_blank=True)
workspace_id = serializers.CharField(required=False, allow_blank=True)
collection_uid = serializers.CharField(required=False, allow_blank=True)
file_type = serializers.CharField(required=False, allow_blank=True, default="")
file_size = serializers.IntegerField(required=False, default=0)
content_hash = serializers.CharField(max_length=64)
source = serializers.CharField(required=False, allow_blank=True, default="", max_length=50)
source_ref = serializers.CharField(required=False, allow_blank=True, default="")
def validate(self, data):
if not data.get("library_uid") and not data.get("workspace_id"):
raise serializers.ValidationError(
"Either library_uid or workspace_id is required."
)
return data
class IngestJobSerializer(serializers.Serializer):
"""Outbound payload for ingest job status."""
job_id = serializers.CharField(source="id")
item_uid = serializers.CharField(allow_blank=True)
library_uid = serializers.CharField()
status = serializers.CharField()
progress = serializers.CharField()
error = serializers.CharField(allow_null=True)
chunks_created = serializers.IntegerField()
concepts_extracted = serializers.IntegerField()
embedding_model = serializers.CharField(allow_blank=True)
content_hash = serializers.CharField(allow_blank=True)
source = serializers.CharField(allow_blank=True)
source_ref = serializers.CharField(allow_blank=True)
created_at = serializers.DateTimeField()
started_at = serializers.DateTimeField(allow_null=True)
completed_at = serializers.DateTimeField(allow_null=True)