Files
mnemosyne/mnemosyne/library/api/workspaces.py
Robert Helewka dd06f923cd
Some checks failed
CVE Scan & Docker Build / security-scan (pull_request) Successful in 3m49s
CVE Scan & Docker Build / build-and-push (pull_request) Has been cancelled
feat(workspaces): return 409 name_conflict instead of 500 on Library name clash
A recreate of a workspace whose Mnemosyne Library was orphaned (left behind
by a failed Daedalus delete-propagate) collides on the global Library.name
unique constraint. neomodel raised UniqueProperty unguarded, so workspace_create
500'd and ingest then 404'd forever — the queue froze silently.

Guard lib.save() and return a structured 409 with a machine code so Daedalus
can classify the failure without string-matching:
- name_conflict   — the new name-collision case
- owner_conflict, library_type_immutable — codes added to the two existing 409s

Cypher-touching paths stay covered by the manual end-to-end plan, per the
test module's stated convention.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-17 20:26:43 -04:00

208 lines
7.6 KiB
Python

"""
Workspace lifecycle endpoints for the Daedalus integration.
A "workspace" in Mnemosyne is a Library scoped to a Daedalus workspace UUID.
It uses the same Library node as a global library; the difference is that
`workspace_id` is set, and search must filter on it.
These endpoints are called by the Daedalus backend authenticated as the
Mnemosyne user the workspace belongs to (per-user ``UserToken``,
``Authorization: Bearer <plaintext>``, minted at ``/profile/tokens/``). The
workspace's owning user is recorded on the Library node as
``owner_username``; every read and mutation is scoped to that user.
Non-owners receive 404 so a workspace's existence isn't disclosed
across users.
"""
import logging
from neomodel import db
from neomodel.exceptions import UniqueProperty
from rest_framework import status
from rest_framework.decorators import api_view, permission_classes
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from library.content_types import get_library_type_config
from library.services.library_delete import delete_library_cascade
from .serializers import WorkspaceCreateSerializer, WorkspaceStatusSerializer
logger = logging.getLogger(__name__)
def _serialize_workspace(lib):
"""Build a WorkspaceStatus payload from a Library node + chunk/item counts."""
counts, _ = db.cypher_query(
"MATCH (l:Library {workspace_id: $wsid}) "
"OPTIONAL MATCH (l)-[:CONTAINS]->(:Collection)-[:CONTAINS]->(i:Item) "
"OPTIONAL MATCH (i)-[:HAS_CHUNK]->(c:Chunk) "
"RETURN count(DISTINCT i) AS item_count, count(DISTINCT c) AS chunk_count",
{"wsid": lib.workspace_id},
)
item_count = counts[0][0] if counts else 0
chunk_count = counts[0][1] if counts else 0
return {
"workspace_id": lib.workspace_id,
"library_uid": lib.uid,
"name": lib.name,
"library_type": lib.library_type,
"description": lib.description or "",
"item_count": item_count,
"chunk_count": chunk_count,
"created_at": lib.created_at,
}
@api_view(["POST"])
@permission_classes([IsAuthenticated])
def workspace_create(request):
"""
Create a workspace Library, idempotently.
A POST with a `workspace_id` already in use returns the existing
workspace (200) — not an error. The library_type is frozen at first
create; subsequent calls are not allowed to change it.
"""
from library.models import Library
serializer = WorkspaceCreateSerializer(data=request.data)
serializer.is_valid(raise_exception=True)
data = serializer.validated_data
# Idempotent path: workspace already exists.
try:
existing = Library.nodes.get(workspace_id=data["workspace_id"])
except Library.DoesNotExist:
existing = None
if existing is not None:
if existing.owner_username and existing.owner_username != request.user.username:
# Same workspace_id under a different owner. Don't leak the
# collision shape; surface a generic conflict.
logger.warning(
"workspace_create owner_conflict workspace_id=%s caller=%s",
data["workspace_id"], request.user.username,
)
return Response(
{
"detail": "Workspace id is already in use.",
"code": "owner_conflict",
},
status=status.HTTP_409_CONFLICT,
)
if existing.library_type != data["library_type"]:
return Response(
{
"detail": (
"library_type is immutable for an existing workspace "
f"(have '{existing.library_type}', "
f"got '{data['library_type']}')."
),
"code": "library_type_immutable",
},
status=status.HTTP_409_CONFLICT,
)
logger.info(
"Workspace already exists workspace_id=%s library_uid=%s",
data["workspace_id"], existing.uid,
)
return Response(
WorkspaceStatusSerializer(_serialize_workspace(existing)).data,
status=status.HTTP_200_OK,
)
defaults = get_library_type_config(data["library_type"])
lib = Library(
name=data["name"],
library_type=data["library_type"],
description=data.get("description", ""),
workspace_id=data["workspace_id"],
owner_username=request.user.username,
chunking_config=defaults["chunking_config"],
embedding_instruction=defaults["embedding_instruction"],
reranker_instruction=defaults["reranker_instruction"],
llm_context_prompt=defaults["llm_context_prompt"],
)
try:
lib.save()
except UniqueProperty:
# Library.name is globally unique. A name collision here almost always
# means an orphaned Library survived a failed Daedalus workspace delete
# (the old node kept the name), and the recreate under a new
# workspace_id now clashes. Surface a clean 409 instead of a 500 so
# Daedalus can record + report it; the operator clears the orphan
# (admin delete) or renames the workspace.
logger.warning(
"workspace_create name_conflict workspace_id=%s name=%s",
data["workspace_id"], data["name"],
)
return Response(
{
"detail": (
f"A library named '{data['name']}' already exists in "
"Mnemosyne."
),
"code": "name_conflict",
},
status=status.HTTP_409_CONFLICT,
)
logger.info(
"Workspace created workspace_id=%s library_uid=%s library_type=%s",
data["workspace_id"], lib.uid, lib.library_type,
)
return Response(
WorkspaceStatusSerializer(_serialize_workspace(lib)).data,
status=status.HTTP_201_CREATED,
)
@api_view(["GET", "DELETE"])
@permission_classes([IsAuthenticated])
def workspace_detail_or_delete(request, workspace_id):
"""
GET: return workspace status (item/chunk counts, metadata).
DELETE: delete the workspace Library and everything reachable AND unique
to it. Concept-safe: orphan-only Concept GC happens at the end.
Concepts referenced by other libraries (workspace or global) are preserved.
"""
from library.models import Library
try:
lib = Library.nodes.get(workspace_id=workspace_id)
except Library.DoesNotExist:
lib = None
# Cross-user reads/writes look like "not found" — don't disclose
# existence across users.
if lib is not None and lib.owner_username != request.user.username:
lib = None
if request.method == "GET":
if lib is None:
return Response(
{"detail": "Workspace not found."},
status=status.HTTP_404_NOT_FOUND,
)
return Response(WorkspaceStatusSerializer(_serialize_workspace(lib)).data)
# DELETE — idempotent: a missing (or unowned) workspace returns 204.
if lib is None:
return Response(status=status.HTTP_204_NO_CONTENT)
# Delete the Library and everything reachable + unique to it, plus
# orphan-Concept GC. Shared with the admin/HTML delete path.
result = delete_library_cascade(lib)
logger.info(
"Workspace deleted workspace_id=%s library_uid=%s name=%s "
"items=%d orphans_deleted=%d",
workspace_id, result["library_uid"], result["name"],
result["item_count"], result["orphans_deleted"],
)
return Response(status=status.HTTP_204_NO_CONTENT)