feat(api): add workspace + ingest REST endpoints for Daedalus
Adds the REST API surface that Daedalus calls to manage workspace
lifecycle and dispatch file ingestion. All endpoints under /library/api/:
POST /workspaces/ create workspace (idempotent on
workspace_id; library_type frozen)
GET /workspaces/{workspace_id}/ workspace status with item/chunk
counts
DELETE /workspaces/{workspace_id}/ delete workspace + reachable
content; concept-safe (orphan-only
Concept GC; concepts referenced
elsewhere are preserved)
POST /ingest/ queue a file for ingest. Idempotent
on (library, source_ref, hash):
same triple → return existing job;
new hash → supersede.
GET /jobs/{job_id}/ poll job status
POST /jobs/{job_id}/retry/ re-dispatch a failed job
GET /jobs/?status=&library_uid= list recent jobs
Workspace-Library lookup uses the unique workspace_id index added in the
schema commit. Concept GC runs as a separate transaction after item/chunk
delete so partial failures don't leave the global graph corrupted.
Tests cover serializer validation, IngestJob ORM behavior, the
(library, source_ref, hash) idempotency query pattern, and auth
boundaries on every new endpoint. Cypher correctness is validated by
manual end-to-end testing — no live Neo4j in unit tests.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
217
mnemosyne/library/api/workspaces.py
Normal file
217
mnemosyne/library/api/workspaces.py
Normal file
@@ -0,0 +1,217 @@
|
||||
"""
|
||||
Workspace lifecycle endpoints for the Daedalus integration.
|
||||
|
||||
A "workspace" in Mnemosyne is a Library scoped to a Daedalus workspace UUID.
|
||||
It uses the same Library node as a global library; the difference is that
|
||||
`workspace_id` is set, and search must filter on it.
|
||||
|
||||
These endpoints are called by the Daedalus backend (HTTP Basic auth as
|
||||
the `daedalus-service` user). Daedalus owns the workspace_id; Mnemosyne
|
||||
just persists what Daedalus tells it.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from neomodel import db
|
||||
from rest_framework import status
|
||||
from rest_framework.decorators import api_view, permission_classes
|
||||
from rest_framework.permissions import IsAuthenticated
|
||||
from rest_framework.response import Response
|
||||
|
||||
from library.content_types import get_library_type_config
|
||||
|
||||
from .serializers import WorkspaceCreateSerializer, WorkspaceStatusSerializer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _serialize_workspace(lib):
|
||||
"""Build a WorkspaceStatus payload from a Library node + chunk/item counts."""
|
||||
counts, _ = db.cypher_query(
|
||||
"MATCH (l:Library {workspace_id: $wsid}) "
|
||||
"OPTIONAL MATCH (l)-[:CONTAINS]->(:Collection)-[:CONTAINS]->(i:Item) "
|
||||
"OPTIONAL MATCH (i)-[:HAS_CHUNK]->(c:Chunk) "
|
||||
"RETURN count(DISTINCT i) AS item_count, count(DISTINCT c) AS chunk_count",
|
||||
{"wsid": lib.workspace_id},
|
||||
)
|
||||
item_count = counts[0][0] if counts else 0
|
||||
chunk_count = counts[0][1] if counts else 0
|
||||
|
||||
return {
|
||||
"workspace_id": lib.workspace_id,
|
||||
"library_uid": lib.uid,
|
||||
"name": lib.name,
|
||||
"library_type": lib.library_type,
|
||||
"description": lib.description or "",
|
||||
"item_count": item_count,
|
||||
"chunk_count": chunk_count,
|
||||
"created_at": lib.created_at,
|
||||
}
|
||||
|
||||
|
||||
@api_view(["POST"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def workspace_create(request):
|
||||
"""
|
||||
Create a workspace Library, idempotently.
|
||||
|
||||
A POST with a `workspace_id` already in use returns the existing
|
||||
workspace (200) — not an error. The library_type is frozen at first
|
||||
create; subsequent calls are not allowed to change it.
|
||||
"""
|
||||
from library.models import Library
|
||||
|
||||
serializer = WorkspaceCreateSerializer(data=request.data)
|
||||
serializer.is_valid(raise_exception=True)
|
||||
data = serializer.validated_data
|
||||
|
||||
# Idempotent path: workspace already exists.
|
||||
try:
|
||||
existing = Library.nodes.get(workspace_id=data["workspace_id"])
|
||||
except Library.DoesNotExist:
|
||||
existing = None
|
||||
|
||||
if existing is not None:
|
||||
if existing.library_type != data["library_type"]:
|
||||
return Response(
|
||||
{
|
||||
"detail": (
|
||||
"library_type is immutable for an existing workspace "
|
||||
f"(have '{existing.library_type}', "
|
||||
f"got '{data['library_type']}')."
|
||||
)
|
||||
},
|
||||
status=status.HTTP_409_CONFLICT,
|
||||
)
|
||||
logger.info(
|
||||
"Workspace already exists workspace_id=%s library_uid=%s",
|
||||
data["workspace_id"], existing.uid,
|
||||
)
|
||||
return Response(
|
||||
WorkspaceStatusSerializer(_serialize_workspace(existing)).data,
|
||||
status=status.HTTP_200_OK,
|
||||
)
|
||||
|
||||
defaults = get_library_type_config(data["library_type"])
|
||||
lib = Library(
|
||||
name=data["name"],
|
||||
library_type=data["library_type"],
|
||||
description=data.get("description", ""),
|
||||
workspace_id=data["workspace_id"],
|
||||
chunking_config=defaults["chunking_config"],
|
||||
embedding_instruction=defaults["embedding_instruction"],
|
||||
reranker_instruction=defaults["reranker_instruction"],
|
||||
llm_context_prompt=defaults["llm_context_prompt"],
|
||||
)
|
||||
lib.save()
|
||||
logger.info(
|
||||
"Workspace created workspace_id=%s library_uid=%s library_type=%s",
|
||||
data["workspace_id"], lib.uid, lib.library_type,
|
||||
)
|
||||
|
||||
return Response(
|
||||
WorkspaceStatusSerializer(_serialize_workspace(lib)).data,
|
||||
status=status.HTTP_201_CREATED,
|
||||
)
|
||||
|
||||
|
||||
@api_view(["GET", "DELETE"])
|
||||
@permission_classes([IsAuthenticated])
|
||||
def workspace_detail_or_delete(request, workspace_id):
|
||||
"""
|
||||
GET: return workspace status (item/chunk counts, metadata).
|
||||
|
||||
DELETE: delete the workspace Library and everything reachable AND unique
|
||||
to it. Concept-safe: orphan-only Concept GC happens at the end.
|
||||
Concepts referenced by other libraries (workspace or global) are preserved.
|
||||
"""
|
||||
from library.models import Library
|
||||
|
||||
if request.method == "GET":
|
||||
try:
|
||||
lib = Library.nodes.get(workspace_id=workspace_id)
|
||||
except Library.DoesNotExist:
|
||||
return Response(
|
||||
{"detail": "Workspace not found."},
|
||||
status=status.HTTP_404_NOT_FOUND,
|
||||
)
|
||||
|
||||
return Response(WorkspaceStatusSerializer(_serialize_workspace(lib)).data)
|
||||
|
||||
# DELETE — idempotent: a missing workspace returns 204.
|
||||
try:
|
||||
lib = Library.nodes.get(workspace_id=workspace_id)
|
||||
except Library.DoesNotExist:
|
||||
return Response(status=status.HTTP_204_NO_CONTENT)
|
||||
|
||||
library_uid = lib.uid
|
||||
library_name = lib.name
|
||||
|
||||
# Step 1-4: delete chunks, items, collections, then the library itself.
|
||||
# We collect Item s3_keys first so the caller can clean up S3
|
||||
# asynchronously (a future enhancement — for now, the keys are logged).
|
||||
s3_rows, _ = db.cypher_query(
|
||||
"MATCH (l:Library {workspace_id: $wsid})-[:CONTAINS]->(:Collection)"
|
||||
"-[:CONTAINS]->(i:Item) RETURN i.uid, i.s3_key",
|
||||
{"wsid": workspace_id},
|
||||
)
|
||||
item_s3_keys = [(r[0], r[1]) for r in s3_rows if r[1]]
|
||||
|
||||
db.cypher_query(
|
||||
"""
|
||||
MATCH (l:Library {workspace_id: $wsid})-[:CONTAINS]->(:Collection)
|
||||
-[:CONTAINS]->(i:Item)-[:HAS_CHUNK]->(c:Chunk)
|
||||
DETACH DELETE c
|
||||
""",
|
||||
{"wsid": workspace_id},
|
||||
)
|
||||
db.cypher_query(
|
||||
"""
|
||||
MATCH (l:Library {workspace_id: $wsid})-[:CONTAINS]->(:Collection)
|
||||
-[:CONTAINS]->(i:Item)-[:HAS_IMAGE]->(img:Image)
|
||||
OPTIONAL MATCH (img)-[:HAS_EMBEDDING]->(emb:ImageEmbedding)
|
||||
DETACH DELETE img, emb
|
||||
""",
|
||||
{"wsid": workspace_id},
|
||||
)
|
||||
db.cypher_query(
|
||||
"""
|
||||
MATCH (l:Library {workspace_id: $wsid})-[:CONTAINS]->(:Collection)
|
||||
-[:CONTAINS]->(i:Item)
|
||||
DETACH DELETE i
|
||||
""",
|
||||
{"wsid": workspace_id},
|
||||
)
|
||||
db.cypher_query(
|
||||
"""
|
||||
MATCH (l:Library {workspace_id: $wsid})-[:CONTAINS]->(col:Collection)
|
||||
DETACH DELETE col
|
||||
""",
|
||||
{"wsid": workspace_id},
|
||||
)
|
||||
db.cypher_query(
|
||||
"MATCH (l:Library {workspace_id: $wsid}) DETACH DELETE l",
|
||||
{"wsid": workspace_id},
|
||||
)
|
||||
|
||||
# Step 5: orphan Concept garbage collection.
|
||||
orphan_result, _ = db.cypher_query(
|
||||
"""
|
||||
MATCH (con:Concept)
|
||||
WHERE NOT (con)<-[:REFERENCES]-() AND NOT (con)<-[:MENTIONS]-()
|
||||
AND NOT (con)<-[:DEPICTS]-()
|
||||
WITH con
|
||||
DETACH DELETE con
|
||||
RETURN count(con) AS deleted
|
||||
"""
|
||||
)
|
||||
orphans_deleted = orphan_result[0][0] if orphan_result else 0
|
||||
|
||||
logger.info(
|
||||
"Workspace deleted workspace_id=%s library_uid=%s name=%s "
|
||||
"items=%d orphans_deleted=%d",
|
||||
workspace_id, library_uid, library_name,
|
||||
len(item_s3_keys), orphans_deleted,
|
||||
)
|
||||
|
||||
return Response(status=status.HTTP_204_NO_CONTENT)
|
||||
Reference in New Issue
Block a user