2 Commits

Author SHA1 Message Date
dd06f923cd feat(workspaces): return 409 name_conflict instead of 500 on Library name clash
Some checks failed
CVE Scan & Docker Build / security-scan (pull_request) Successful in 3m49s
CVE Scan & Docker Build / build-and-push (pull_request) Has been cancelled
A recreate of a workspace whose Mnemosyne Library was orphaned (left behind
by a failed Daedalus delete-propagate) collides on the global Library.name
unique constraint. neomodel raised UniqueProperty unguarded, so workspace_create
500'd and ingest then 404'd forever — the queue froze silently.

Guard lib.save() and return a structured 409 with a machine code so Daedalus
can classify the failure without string-matching:
- name_conflict   — the new name-collision case
- owner_conflict, library_type_immutable — codes added to the two existing 409s

Cypher-touching paths stay covered by the manual end-to-end plan, per the
test module's stated convention.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-17 20:26:43 -04:00
142e9675b5 feat(library): allow admin delete of Daedalus-managed library via shared cascade
Admin/HTML library delete previously hard-blocked workspace-scoped
(Daedalus-managed) libraries, leaving no way to clear an orphaned Library
node — e.g. one left behind when a Daedalus workspace delete failed to
propagate. A recreate of that workspace then collides on the global
Library.name unique constraint and 500s, freezing ingest.

Allow the delete behind the existing confirm warning (low risk: source
content lives in Daedalus and is recreated + re-embedded on next sync),
and route both the API and HTML delete paths through one shared cascade.

- Add library/services/library_delete.delete_library_cascade(lib), keyed on
  Library uid so it covers global and workspace-scoped libraries. It removes
  Chunks, Images/ImageEmbeddings, Items, Collections, the Library, then GCs
  orphan-only Concepts (verbatim from the API view, re-keyed workspace_id->uid).
- workspace_detail_or_delete (API) now calls the shared helper.
- library_delete (HTML) no longer blocks workspace_id libraries; it calls the
  cascade instead of a bare lib.delete() (which leaked child nodes — also a
  latent bug for global libraries with content).
- Confirm-delete template shows a caution banner for Daedalus-managed libraries.

No migration: Mnemosyne library data is in Neo4j (neomodel); no schema change.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-17 19:37:58 -04:00
9 changed files with 227 additions and 111 deletions

View File

@@ -17,12 +17,14 @@ across users.
import logging
from neomodel import db
from neomodel.exceptions import UniqueProperty
from rest_framework import status
from rest_framework.decorators import api_view, permission_classes
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response
from library.content_types import get_library_type_config
from library.services.library_delete import delete_library_cascade
from .serializers import WorkspaceCreateSerializer, WorkspaceStatusSerializer
@@ -84,7 +86,10 @@ def workspace_create(request):
data["workspace_id"], request.user.username,
)
return Response(
{"detail": "Workspace id is already in use."},
{
"detail": "Workspace id is already in use.",
"code": "owner_conflict",
},
status=status.HTTP_409_CONFLICT,
)
if existing.library_type != data["library_type"]:
@@ -94,7 +99,8 @@ def workspace_create(request):
"library_type is immutable for an existing workspace "
f"(have '{existing.library_type}', "
f"got '{data['library_type']}')."
)
),
"code": "library_type_immutable",
},
status=status.HTTP_409_CONFLICT,
)
@@ -119,7 +125,29 @@ def workspace_create(request):
reranker_instruction=defaults["reranker_instruction"],
llm_context_prompt=defaults["llm_context_prompt"],
)
try:
lib.save()
except UniqueProperty:
# Library.name is globally unique. A name collision here almost always
# means an orphaned Library survived a failed Daedalus workspace delete
# (the old node kept the name), and the recreate under a new
# workspace_id now clashes. Surface a clean 409 instead of a 500 so
# Daedalus can record + report it; the operator clears the orphan
# (admin delete) or renames the workspace.
logger.warning(
"workspace_create name_conflict workspace_id=%s name=%s",
data["workspace_id"], data["name"],
)
return Response(
{
"detail": (
f"A library named '{data['name']}' already exists in "
"Mnemosyne."
),
"code": "name_conflict",
},
status=status.HTTP_409_CONFLICT,
)
logger.info(
"Workspace created workspace_id=%s library_uid=%s library_type=%s",
data["workspace_id"], lib.uid, lib.library_type,
@@ -165,74 +193,15 @@ def workspace_detail_or_delete(request, workspace_id):
if lib is None:
return Response(status=status.HTTP_204_NO_CONTENT)
library_uid = lib.uid
library_name = lib.name
# Step 1-4: delete chunks, items, collections, then the library itself.
# We collect Item s3_keys first so the caller can clean up S3
# asynchronously (a future enhancement — for now, the keys are logged).
s3_rows, _ = db.cypher_query(
"MATCH (l:Library {workspace_id: $wsid})-[:CONTAINS]->(:Collection)"
"-[:CONTAINS]->(i:Item) RETURN i.uid, i.s3_key",
{"wsid": workspace_id},
)
item_s3_keys = [(r[0], r[1]) for r in s3_rows if r[1]]
db.cypher_query(
"""
MATCH (l:Library {workspace_id: $wsid})-[:CONTAINS]->(:Collection)
-[:CONTAINS]->(i:Item)-[:HAS_CHUNK]->(c:Chunk)
DETACH DELETE c
""",
{"wsid": workspace_id},
)
db.cypher_query(
"""
MATCH (l:Library {workspace_id: $wsid})-[:CONTAINS]->(:Collection)
-[:CONTAINS]->(i:Item)-[:HAS_IMAGE]->(img:Image)
OPTIONAL MATCH (img)-[:HAS_EMBEDDING]->(emb:ImageEmbedding)
DETACH DELETE img, emb
""",
{"wsid": workspace_id},
)
db.cypher_query(
"""
MATCH (l:Library {workspace_id: $wsid})-[:CONTAINS]->(:Collection)
-[:CONTAINS]->(i:Item)
DETACH DELETE i
""",
{"wsid": workspace_id},
)
db.cypher_query(
"""
MATCH (l:Library {workspace_id: $wsid})-[:CONTAINS]->(col:Collection)
DETACH DELETE col
""",
{"wsid": workspace_id},
)
db.cypher_query(
"MATCH (l:Library {workspace_id: $wsid}) DETACH DELETE l",
{"wsid": workspace_id},
)
# Step 5: orphan Concept garbage collection.
orphan_result, _ = db.cypher_query(
"""
MATCH (con:Concept)
WHERE NOT (con)<-[:REFERENCES]-() AND NOT (con)<-[:MENTIONS]-()
AND NOT (con)<-[:DEPICTS]-()
WITH con
DETACH DELETE con
RETURN count(con) AS deleted
"""
)
orphans_deleted = orphan_result[0][0] if orphan_result else 0
# Delete the Library and everything reachable + unique to it, plus
# orphan-Concept GC. Shared with the admin/HTML delete path.
result = delete_library_cascade(lib)
logger.info(
"Workspace deleted workspace_id=%s library_uid=%s name=%s "
"items=%d orphans_deleted=%d",
workspace_id, library_uid, library_name,
len(item_s3_keys), orphans_deleted,
workspace_id, result["library_uid"], result["name"],
result["item_count"], result["orphans_deleted"],
)
return Response(status=status.HTTP_204_NO_CONTENT)

View File

@@ -0,0 +1,108 @@
"""
Shared Library deletion cascade.
Deletes a Library node and everything reachable AND unique to it
(Collections, Items, Chunks, Images + ImageEmbeddings), then garbage-collects
Concepts that are no longer referenced by any other Library.
Keyed on the Library ``uid`` so it works for *both* global libraries
(``workspace_id`` is null) and workspace-scoped libraries. This is the single
source of truth used by:
* the Daedalus integration API (``DELETE /library/api/workspaces/{id}/``), and
* the admin/HTML delete view (``library_delete``).
Concept-safe: orphan-only Concept GC happens at the end. Concepts still
referenced by another library (workspace or global) are preserved.
"""
import logging
from neomodel import db
logger = logging.getLogger(__name__)
def delete_library_cascade(lib) -> dict:
"""Delete ``lib`` and all content reachable and unique to it.
:param lib: A ``library.models.Library`` node instance.
:returns: Dict with ``library_uid``, ``name``, ``item_count``,
``item_s3_keys`` (list of ``(uid, s3_key)`` for async S3 cleanup),
and ``orphans_deleted`` (Concept GC count).
"""
library_uid = lib.uid
library_name = lib.name
# Collect Item s3_keys first so the caller can clean up S3 asynchronously
# (a future enhancement — for now, the keys are returned/logged).
s3_rows, _ = db.cypher_query(
"MATCH (l:Library {uid: $uid})-[:CONTAINS]->(:Collection)"
"-[:CONTAINS]->(i:Item) RETURN i.uid, i.s3_key",
{"uid": library_uid},
)
item_s3_keys = [(r[0], r[1]) for r in s3_rows if r[1]]
db.cypher_query(
"""
MATCH (l:Library {uid: $uid})-[:CONTAINS]->(:Collection)
-[:CONTAINS]->(i:Item)-[:HAS_CHUNK]->(c:Chunk)
DETACH DELETE c
""",
{"uid": library_uid},
)
db.cypher_query(
"""
MATCH (l:Library {uid: $uid})-[:CONTAINS]->(:Collection)
-[:CONTAINS]->(i:Item)-[:HAS_IMAGE]->(img:Image)
OPTIONAL MATCH (img)-[:HAS_EMBEDDING]->(emb:ImageEmbedding)
DETACH DELETE img, emb
""",
{"uid": library_uid},
)
db.cypher_query(
"""
MATCH (l:Library {uid: $uid})-[:CONTAINS]->(:Collection)
-[:CONTAINS]->(i:Item)
DETACH DELETE i
""",
{"uid": library_uid},
)
db.cypher_query(
"""
MATCH (l:Library {uid: $uid})-[:CONTAINS]->(col:Collection)
DETACH DELETE col
""",
{"uid": library_uid},
)
db.cypher_query(
"MATCH (l:Library {uid: $uid}) DETACH DELETE l",
{"uid": library_uid},
)
# Orphan Concept garbage collection: drop Concepts no longer referenced
# by any Item (REFERENCES/MENTIONS) or Image (DEPICTS).
orphan_result, _ = db.cypher_query(
"""
MATCH (con:Concept)
WHERE NOT (con)<-[:REFERENCES]-() AND NOT (con)<-[:MENTIONS]-()
AND NOT (con)<-[:DEPICTS]-()
WITH con
DETACH DELETE con
RETURN count(con) AS deleted
"""
)
orphans_deleted = orphan_result[0][0] if orphan_result else 0
logger.info(
"Library cascade-deleted library_uid=%s name=%s items=%d orphans_deleted=%d",
library_uid, library_name, len(item_s3_keys), orphans_deleted,
)
return {
"library_uid": library_uid,
"name": library_name,
"item_count": len(item_s3_keys),
"item_s3_keys": item_s3_keys,
"orphans_deleted": orphans_deleted,
}

View File

@@ -12,6 +12,18 @@
<div class="alert alert-warning mb-6">
<span>Are you sure you want to delete <strong>{{ library.name }}</strong>? This action cannot be undone.</span>
</div>
{% if library.workspace_id %}
<div class="alert alert-error mb-6">
<span>
<strong>This Library is managed by Daedalus</strong>
(workspace <code>{{ library.workspace_id }}</code>).
Deleting it here removes its embedded content from Mnemosyne, but the
source files still live in Daedalus — it will be <strong>recreated and
re-embedded on the next Daedalus sync</strong>. Use this to clear an
orphaned Library that is blocking workspace re-registration.
</span>
</div>
{% endif %}
<form method="post">
{% csrf_token %}
<div class="flex gap-2">

View File

@@ -48,3 +48,30 @@ class ConceptExtractionParsingTests(TestCase):
result = self.extractor._parse_concept_response(response)
self.assertEqual(len(result), 1)
self.assertEqual(result[0]["name"], "valid")
class SampleIndexSelectionTests(TestCase):
"""Tests for sample index selection."""
def setUp(self):
self.extractor = ConceptExtractor(MagicMock())
def test_small_total_returns_all(self):
indices = self.extractor._select_sample_indices(5, max_samples=10)
self.assertEqual(indices, [0, 1, 2, 3, 4])
def test_equal_total_returns_all(self):
indices = self.extractor._select_sample_indices(10, max_samples=10)
self.assertEqual(indices, list(range(10)))
def test_large_total_returns_max_samples(self):
indices = self.extractor._select_sample_indices(100, max_samples=10)
self.assertEqual(len(indices), 10)
# Should be evenly spaced
self.assertEqual(indices[0], 0)
self.assertEqual(indices[-1], 90)
def test_returns_integers(self):
indices = self.extractor._select_sample_indices(50, max_samples=7)
for idx in indices:
self.assertIsInstance(idx, int)

View File

@@ -48,7 +48,7 @@ class EmbeddingPipelineInitTests(TestCase):
class PipelineItemNotFoundTests(TestCase):
"""Tests for handling missing items."""
@patch("library.models.Item")
@patch("library.services.pipeline.Item")
def test_process_nonexistent_item_raises(self, mock_item_cls):
mock_item_cls.nodes.get.side_effect = Exception("Not found")
@@ -57,7 +57,7 @@ class PipelineItemNotFoundTests(TestCase):
pipeline.process_item("nonexistent-uid")
self.assertIn("Item not found", str(ctx.exception))
@patch("library.models.Item")
@patch("library.services.pipeline.Item")
def test_reprocess_nonexistent_item_raises(self, mock_item_cls):
mock_item_cls.nodes.get.side_effect = Exception("Not found")
@@ -69,9 +69,9 @@ class PipelineItemNotFoundTests(TestCase):
class PipelineNoEmbeddingModelTests(TestCase):
"""Tests for handling missing system embedding model."""
@patch("llm_manager.models.LLMModel")
@patch("library.services.pipeline.LLMModel")
@patch("library.services.pipeline.default_storage")
@patch("library.services.parsers.DocumentParser")
@patch("library.services.pipeline.DocumentParser")
def test_no_embedding_model_raises(self, mock_parser, mock_storage, mock_llm):
"""Pipeline raises ValueError if no system embedding model is configured."""
mock_llm.get_system_embedding_model.return_value = None
@@ -86,7 +86,7 @@ class PipelineNoEmbeddingModelTests(TestCase):
mock_item.chunks.all.return_value = []
mock_item.images.all.return_value = []
with patch("library.models.Item") as mock_item_cls:
with patch("library.services.pipeline.Item") as mock_item_cls:
mock_item_cls.nodes.get.return_value = mock_item
# Mock S3 read
@@ -166,11 +166,11 @@ class PipelineVisionStageTests(TestCase):
item.images.all.return_value = []
return item
@patch("library.services.concepts.ConceptExtractor")
@patch("library.services.embedding_client.EmbeddingClient")
@patch("library.services.chunker.ContentTypeChunker")
@patch("library.services.parsers.DocumentParser")
@patch("llm_manager.models.LLMModel")
@patch("library.services.pipeline.ConceptExtractor")
@patch("library.services.pipeline.EmbeddingClient")
@patch("library.services.pipeline.ContentTypeChunker")
@patch("library.services.pipeline.DocumentParser")
@patch("library.services.pipeline.LLMModel")
@patch("library.services.pipeline.default_storage")
def test_no_vision_model_marks_images_skipped(
self, mock_storage, mock_llm, mock_parser_cls,
@@ -227,12 +227,12 @@ class PipelineVisionStageTests(TestCase):
img_node.save.assert_called()
self.assertEqual(result["images_analyzed"], 0)
@patch("library.services.vision.VisionAnalyzer")
@patch("library.services.concepts.ConceptExtractor")
@patch("library.services.embedding_client.EmbeddingClient")
@patch("library.services.chunker.ContentTypeChunker")
@patch("library.services.parsers.DocumentParser")
@patch("llm_manager.models.LLMModel")
@patch("library.services.pipeline.VisionAnalyzer")
@patch("library.services.pipeline.ConceptExtractor")
@patch("library.services.pipeline.EmbeddingClient")
@patch("library.services.pipeline.ContentTypeChunker")
@patch("library.services.pipeline.DocumentParser")
@patch("library.services.pipeline.LLMModel")
@patch("library.services.pipeline.default_storage")
def test_vision_model_triggers_analysis(
self, mock_storage, mock_llm, mock_parser_cls,
@@ -287,7 +287,7 @@ class PipelineVisionStageTests(TestCase):
mock_vision_cls.assert_called_once_with(mock_vision_model, user=None)
mock_analyzer.analyze_images.assert_called_once()
@patch("llm_manager.models.LLMModel")
@patch("library.services.pipeline.LLMModel")
def test_no_images_skips_vision_entirely(self, mock_llm):
"""When there are no images, vision stage is a no-op regardless of model."""
mock_vision_model = MagicMock()
@@ -309,10 +309,10 @@ class PipelineVisionStageTests(TestCase):
patch.object(pipeline, "_store_chunks", return_value=[]), \
patch.object(pipeline, "_store_images", return_value=[]), \
patch.object(pipeline, "_associate_images_with_chunks"), \
patch("library.services.parsers.DocumentParser") as mock_parser_cls, \
patch("library.services.chunker.ContentTypeChunker") as mock_chunker_cls, \
patch("library.services.embedding_client.EmbeddingClient"), \
patch("library.services.vision.VisionAnalyzer") as mock_vision_cls:
patch("library.services.pipeline.DocumentParser") as mock_parser_cls, \
patch("library.services.pipeline.ContentTypeChunker") as mock_chunker_cls, \
patch("library.services.pipeline.EmbeddingClient"), \
patch("library.services.pipeline.VisionAnalyzer") as mock_vision_cls:
mock_parser = MagicMock()
mock_parser.parse_bytes.return_value = MagicMock(images=[], text_blocks=[])

View File

@@ -100,7 +100,7 @@ class SearchAPIResponseTest(TestCase):
self.client = APIClient()
self.client.force_authenticate(user=self.user)
@patch("library.services.search.SearchService")
@patch("library.api.views.SearchService")
def test_successful_search_response_format(self, MockService):
"""Successful search returns expected JSON structure."""
mock_response = SearchResponse(
@@ -159,7 +159,7 @@ class SearchAPIResponseTest(TestCase):
self.assertEqual(image["image_uid"], "img1")
self.assertEqual(image["image_type"], "diagram")
@patch("library.services.search.SearchService")
@patch("library.api.views.SearchService")
def test_vector_only_endpoint(self, MockService):
"""Vector-only endpoint sets correct search types."""
mock_response = SearchResponse(
@@ -184,7 +184,7 @@ class SearchAPIResponseTest(TestCase):
self.assertEqual(call_args.search_types, ["vector"])
self.assertFalse(call_args.rerank)
@patch("library.services.search.SearchService")
@patch("library.api.views.SearchService")
def test_fulltext_only_endpoint(self, MockService):
"""Fulltext-only endpoint sets correct search types."""
mock_response = SearchResponse(
@@ -208,7 +208,7 @@ class SearchAPIResponseTest(TestCase):
self.assertEqual(call_args.search_types, ["fulltext"])
self.assertFalse(call_args.rerank)
@patch("library.services.search.SearchService")
@patch("library.api.views.SearchService")
def test_reranker_skip_reason_surfaced_in_json(self, MockService):
"""``reranker_skip_reason`` propagates through the JSON API."""
mock_response = SearchResponse(

View File

@@ -48,7 +48,7 @@ class AllLibraryUidsHelperTests(TestCase):
def test_returns_empty_when_neo4j_unavailable(self):
"""Helper must not touch ``Library.nodes`` if Neo4j is down."""
with patch("library.utils.neo4j_available", return_value=False):
with patch("library.views.neo4j_available", return_value=False):
self.assertEqual(views._all_library_uids(), [])
def test_returns_every_library_uid(self):
@@ -62,7 +62,7 @@ class AllLibraryUidsHelperTests(TestCase):
fake_nodes.all.return_value = fake_libs
fake_library_cls = SimpleNamespace(nodes=fake_nodes)
with patch("library.utils.neo4j_available", return_value=True), \
with patch("library.views.neo4j_available", return_value=True), \
patch.dict("sys.modules", {"library.models": SimpleNamespace(Library=fake_library_cls)}):
result = views._all_library_uids()
@@ -83,7 +83,7 @@ class AllLibraryUidsHelperTests(TestCase):
fake_nodes.all.return_value = fake_libs
fake_library_cls = SimpleNamespace(nodes=fake_nodes)
with patch("library.utils.neo4j_available", return_value=True), \
with patch("library.views.neo4j_available", return_value=True), \
patch.dict("sys.modules", {"library.models": SimpleNamespace(Library=fake_library_cls)}):
result = views._all_library_uids()
@@ -95,7 +95,7 @@ class AllLibraryUidsHelperTests(TestCase):
fake_nodes.all.side_effect = RuntimeError("neo4j blew up")
fake_library_cls = SimpleNamespace(nodes=fake_nodes)
with patch("library.utils.neo4j_available", return_value=True), \
with patch("library.views.neo4j_available", return_value=True), \
patch.dict("sys.modules", {"library.models": SimpleNamespace(Library=fake_library_cls)}):
self.assertEqual(views._all_library_uids(), [])

View File

@@ -13,7 +13,7 @@ from django.test import TestCase, override_settings
class EmbedItemTaskTests(TestCase):
"""Tests for the embed_item task."""
@patch("library.services.pipeline.EmbeddingPipeline")
@patch("library.tasks.EmbeddingPipeline")
def test_embed_item_success(self, mock_pipeline_cls):
from library.tasks import embed_item
@@ -31,7 +31,7 @@ class EmbedItemTaskTests(TestCase):
self.assertEqual(result["item_uid"], "test-uid-123")
mock_pipeline.process_item.assert_called_once()
@patch("library.services.pipeline.EmbeddingPipeline")
@patch("library.tasks.EmbeddingPipeline")
def test_embed_item_failure(self, mock_pipeline_cls):
from library.tasks import embed_item
@@ -49,7 +49,7 @@ class EmbedItemTaskTests(TestCase):
class ReembedItemTaskTests(TestCase):
"""Tests for the reembed_item task."""
@patch("library.services.pipeline.EmbeddingPipeline")
@patch("library.tasks.EmbeddingPipeline")
def test_reembed_item_success(self, mock_pipeline_cls):
from library.tasks import reembed_item

View File

@@ -319,20 +319,20 @@ def library_delete(request, uid):
messages.error(request, f"Library not found: {e}")
return redirect("library:library-list")
# Daedalus owns the lifecycle of workspace-scoped libraries — they can
# only be deleted via DELETE /library/api/workspaces/{workspace_id}/.
# Block the human delete path so a stray click can't desync state.
if lib.workspace_id:
messages.error(
request,
f'"{lib.name}" is managed by Daedalus workspace '
f"{lib.workspace_id}. Delete it from Daedalus, not here.",
)
return redirect("library:library-detail", uid=uid)
# Daedalus owns the lifecycle of workspace-scoped libraries. Deleting one
# here is allowed but discouraged: the confirm page warns that Daedalus
# still holds the source content and will recreate + re-embed it on the
# next sync. The risk is low (no data loss — only re-embedding cost), and
# this is the supported escape hatch for clearing an orphaned Library that
# blocks workspace re-registration.
if request.method == "POST":
name = lib.name
lib.delete()
# Use the shared cascade so child nodes (Collections/Items/Chunks/
# Images) and orphan Concepts are removed too — a bare lib.delete()
# would leak them.
from .services.library_delete import delete_library_cascade
delete_library_cascade(lib)
messages.success(request, f'Library "{name}" deleted.')
return redirect("library:library-list")
return render(request, "library/library_confirm_delete.html", {"library": lib})