243 lines
9.6 KiB
Python
243 lines
9.6 KiB
Python
"""
|
|
DRF serializers for the library app.
|
|
|
|
Serialize Neo4j neomodel nodes into JSON for the REST API.
|
|
"""
|
|
|
|
from rest_framework import serializers
|
|
|
|
|
|
LIBRARY_TYPE_CHOICES = [
|
|
"fiction",
|
|
"nonfiction",
|
|
"technical",
|
|
"music",
|
|
"film",
|
|
"art",
|
|
"journal",
|
|
"business",
|
|
"finance",
|
|
]
|
|
|
|
|
|
class LibrarySerializer(serializers.Serializer):
|
|
uid = serializers.CharField(read_only=True)
|
|
name = serializers.CharField(max_length=200)
|
|
library_type = serializers.ChoiceField(choices=LIBRARY_TYPE_CHOICES)
|
|
description = serializers.CharField(required=False, allow_blank=True, default="")
|
|
chunking_config = serializers.JSONField(required=False, default=dict)
|
|
embedding_instruction = serializers.CharField(
|
|
required=False, allow_blank=True, default=""
|
|
)
|
|
reranker_instruction = serializers.CharField(
|
|
required=False, allow_blank=True, default=""
|
|
)
|
|
llm_context_prompt = serializers.CharField(
|
|
required=False, allow_blank=True, default=""
|
|
)
|
|
workspace_id = serializers.CharField(read_only=True)
|
|
created_at = serializers.DateTimeField(read_only=True)
|
|
|
|
|
|
class CollectionSerializer(serializers.Serializer):
|
|
uid = serializers.CharField(read_only=True)
|
|
name = serializers.CharField(max_length=200)
|
|
description = serializers.CharField(required=False, allow_blank=True, default="")
|
|
metadata = serializers.JSONField(required=False, default=dict)
|
|
created_at = serializers.DateTimeField(read_only=True)
|
|
library_uid = serializers.CharField(
|
|
required=False, write_only=True, help_text="UID of the parent library"
|
|
)
|
|
|
|
|
|
class ItemSerializer(serializers.Serializer):
|
|
uid = serializers.CharField(read_only=True)
|
|
title = serializers.CharField(max_length=500)
|
|
item_type = serializers.CharField(required=False, allow_blank=True, default="")
|
|
s3_key = serializers.CharField(read_only=True)
|
|
content_hash = serializers.CharField(read_only=True)
|
|
file_type = serializers.CharField(required=False, allow_blank=True, default="")
|
|
file_size = serializers.IntegerField(read_only=True)
|
|
metadata = serializers.JSONField(required=False, default=dict)
|
|
created_at = serializers.DateTimeField(read_only=True)
|
|
updated_at = serializers.DateTimeField(read_only=True)
|
|
collection_uid = serializers.CharField(
|
|
required=False, write_only=True, help_text="UID of the parent collection"
|
|
)
|
|
# Phase 2: Embedding pipeline fields
|
|
embedding_status = serializers.CharField(read_only=True)
|
|
embedding_model_name = serializers.CharField(read_only=True)
|
|
chunk_count = serializers.IntegerField(read_only=True)
|
|
image_count = serializers.IntegerField(read_only=True)
|
|
|
|
|
|
class ChunkSerializer(serializers.Serializer):
|
|
uid = serializers.CharField(read_only=True)
|
|
chunk_index = serializers.IntegerField()
|
|
chunk_s3_key = serializers.CharField()
|
|
chunk_size = serializers.IntegerField(required=False, default=0)
|
|
text_preview = serializers.CharField(required=False, allow_blank=True, default="")
|
|
created_at = serializers.DateTimeField(read_only=True)
|
|
|
|
|
|
class ConceptSerializer(serializers.Serializer):
|
|
uid = serializers.CharField(read_only=True)
|
|
name = serializers.CharField(max_length=200)
|
|
concept_type = serializers.CharField(required=False, allow_blank=True, default="")
|
|
|
|
|
|
class ImageSerializer(serializers.Serializer):
|
|
uid = serializers.CharField(read_only=True)
|
|
s3_key = serializers.CharField()
|
|
image_type = serializers.CharField(required=False, allow_blank=True, default="")
|
|
description = serializers.CharField(required=False, allow_blank=True, default="")
|
|
metadata = serializers.JSONField(required=False, default=dict)
|
|
created_at = serializers.DateTimeField(read_only=True)
|
|
|
|
|
|
# --- Phase 3: Search ---
|
|
|
|
|
|
class SearchRequestSerializer(serializers.Serializer):
|
|
"""Request body for ``/library/api/search/``.
|
|
|
|
Authorization scope is resolved server-side from the request's
|
|
Django session (this endpoint is gated by
|
|
``permission_classes=[IsAuthenticated]``), not from the request
|
|
body — see ``library.utils.all_library_uids`` and the unified
|
|
auth model in ``docs/DAEDALUS_PALLAS_INTEGRATION_v1.md`` §3.3.
|
|
``library_uid`` / ``library_type`` / ``collection_uid`` are
|
|
filters inside that scope, not scope itself.
|
|
"""
|
|
|
|
query = serializers.CharField(max_length=2000)
|
|
library_uid = serializers.CharField(required=False, allow_blank=True)
|
|
library_type = serializers.ChoiceField(
|
|
choices=LIBRARY_TYPE_CHOICES,
|
|
required=False,
|
|
)
|
|
collection_uid = serializers.CharField(required=False, allow_blank=True)
|
|
search_types = serializers.ListField(
|
|
child=serializers.ChoiceField(choices=["vector", "fulltext", "graph"]),
|
|
required=False,
|
|
default=["vector", "fulltext", "graph"],
|
|
)
|
|
limit = serializers.IntegerField(default=20, min_value=1, max_value=100)
|
|
rerank = serializers.BooleanField(default=True)
|
|
include_images = serializers.BooleanField(default=True)
|
|
|
|
|
|
class SearchCandidateSerializer(serializers.Serializer):
|
|
chunk_uid = serializers.CharField()
|
|
item_uid = serializers.CharField()
|
|
item_title = serializers.CharField()
|
|
library_type = serializers.CharField()
|
|
text_preview = serializers.CharField()
|
|
chunk_s3_key = serializers.CharField()
|
|
chunk_index = serializers.IntegerField()
|
|
score = serializers.FloatField()
|
|
source = serializers.CharField()
|
|
metadata = serializers.DictField(required=False, default=dict)
|
|
|
|
|
|
class ImageSearchResultSerializer(serializers.Serializer):
|
|
image_uid = serializers.CharField()
|
|
item_uid = serializers.CharField()
|
|
item_title = serializers.CharField()
|
|
image_type = serializers.CharField()
|
|
description = serializers.CharField()
|
|
s3_key = serializers.CharField()
|
|
score = serializers.FloatField()
|
|
source = serializers.CharField()
|
|
|
|
|
|
class SearchResponseSerializer(serializers.Serializer):
|
|
query = serializers.CharField()
|
|
candidates = SearchCandidateSerializer(many=True)
|
|
images = ImageSearchResultSerializer(many=True)
|
|
total_candidates = serializers.IntegerField()
|
|
search_time_ms = serializers.FloatField()
|
|
reranker_used = serializers.BooleanField()
|
|
reranker_model = serializers.CharField(allow_null=True)
|
|
search_types_used = serializers.ListField(child=serializers.CharField())
|
|
# Populated when ``rerank=True`` was requested but the re-ranking
|
|
# step did not run — e.g. no system reranker model configured
|
|
# (``no_system_model``), the Synesis call raised
|
|
# (``api_error: ...``), or fusion produced zero candidates
|
|
# (``no_candidates``). ``null`` means either success or that
|
|
# re-ranking was not requested. ``required=False`` keeps old
|
|
# clients happy.
|
|
reranker_skip_reason = serializers.CharField(
|
|
allow_null=True, required=False, default=None
|
|
)
|
|
|
|
|
|
# --- Workspace lifecycle (Daedalus integration) ---
|
|
|
|
|
|
class WorkspaceCreateSerializer(serializers.Serializer):
|
|
"""Inbound payload for POST /api/v1/workspaces/."""
|
|
|
|
workspace_id = serializers.CharField(max_length=64)
|
|
name = serializers.CharField(max_length=200)
|
|
library_type = serializers.ChoiceField(choices=LIBRARY_TYPE_CHOICES)
|
|
description = serializers.CharField(required=False, allow_blank=True, default="")
|
|
|
|
|
|
class WorkspaceStatusSerializer(serializers.Serializer):
|
|
"""Outbound payload for workspace lifecycle endpoints."""
|
|
|
|
workspace_id = serializers.CharField()
|
|
library_uid = serializers.CharField()
|
|
name = serializers.CharField()
|
|
library_type = serializers.CharField()
|
|
description = serializers.CharField(allow_blank=True)
|
|
item_count = serializers.IntegerField()
|
|
chunk_count = serializers.IntegerField()
|
|
created_at = serializers.DateTimeField()
|
|
|
|
|
|
# --- Ingest (Daedalus integration) ---
|
|
|
|
|
|
class IngestRequestSerializer(serializers.Serializer):
|
|
"""Inbound payload for POST /api/v1/library/ingest/."""
|
|
|
|
s3_key = serializers.CharField(max_length=500)
|
|
title = serializers.CharField(max_length=500)
|
|
library_uid = serializers.CharField(required=False, allow_blank=True)
|
|
workspace_id = serializers.CharField(required=False, allow_blank=True)
|
|
collection_uid = serializers.CharField(required=False, allow_blank=True)
|
|
file_type = serializers.CharField(required=False, allow_blank=True, default="")
|
|
file_size = serializers.IntegerField(required=False, default=0)
|
|
content_hash = serializers.CharField(max_length=64)
|
|
source = serializers.CharField(required=False, allow_blank=True, default="", max_length=50)
|
|
source_ref = serializers.CharField(required=False, allow_blank=True, default="")
|
|
|
|
def validate(self, data):
|
|
if not data.get("library_uid") and not data.get("workspace_id"):
|
|
raise serializers.ValidationError(
|
|
"Either library_uid or workspace_id is required."
|
|
)
|
|
return data
|
|
|
|
|
|
class IngestJobSerializer(serializers.Serializer):
|
|
"""Outbound payload for ingest job status."""
|
|
|
|
job_id = serializers.CharField(source="id")
|
|
item_uid = serializers.CharField(allow_blank=True)
|
|
library_uid = serializers.CharField()
|
|
status = serializers.CharField()
|
|
progress = serializers.CharField()
|
|
error = serializers.CharField(allow_null=True)
|
|
chunks_created = serializers.IntegerField()
|
|
concepts_extracted = serializers.IntegerField()
|
|
embedding_model = serializers.CharField(allow_blank=True)
|
|
content_hash = serializers.CharField(allow_blank=True)
|
|
source = serializers.CharField(allow_blank=True)
|
|
source_ref = serializers.CharField(allow_blank=True)
|
|
created_at = serializers.DateTimeField()
|
|
started_at = serializers.DateTimeField(allow_null=True)
|
|
completed_at = serializers.DateTimeField(allow_null=True)
|