fix(library): normalize MIME types to file extensions in Daedalus ingest
All checks were successful
CVE Scan & Docker Build / security-scan (push) Successful in 1m9s
CVE Scan & Docker Build / build-and-push (push) Successful in 2m15s

Daedalus may send `file_type` as a MIME type (e.g. `text/markdown`) rather
than a bare extension. Add a `_normalize_file_type` helper with a MIME→ext
lookup table and sensible fallbacks so ingested items are stored with
proper extensions like `md` instead of `text/markdown`.
This commit is contained in:
2026-05-04 12:39:54 -04:00
parent 37bb38ee43
commit 56e977ffb5

View File

@@ -17,6 +17,41 @@ logger = logging.getLogger(__name__)
# Cache key pattern for task progress
PROGRESS_KEY = "library:task:{task_id}:progress"
# MIME type → file extension, for when Daedalus sends content_type as file_type
_MIME_TO_EXT = {
"text/markdown": "md",
"text/plain": "txt",
"text/html": "html",
"text/csv": "csv",
"text/xml": "xml",
"application/pdf": "pdf",
"application/epub+zip": "epub",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
"application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
"application/json": "json",
"image/jpeg": "jpg",
"image/png": "png",
"image/gif": "gif",
"image/webp": "webp",
"image/tiff": "tiff",
}
def _normalize_file_type(raw: str) -> str:
"""Convert a MIME type or extension string to a bare extension."""
raw = (raw or "").strip().lower()
if "/" in raw:
# It's a MIME type — look up or derive from the subtype
ext = _MIME_TO_EXT.get(raw)
if ext:
return ext
# Fallback: use the part after the slash, strip vendor prefixes
subtype = raw.split("/", 1)[1]
subtype = subtype.split("+")[-1] # e.g. "epub+zip" → "zip"; "vnd.ms-excel" → keep
return subtype.lstrip(".") or "bin"
return raw.lstrip(".") or "bin"
def _update_progress(task, percent: int, message: str):
"""
@@ -373,7 +408,7 @@ def ingest_from_daedalus(self, job_id: str):
data = fetch_from_daedalus(job.s3_key)
# --- 4. Create Item node ---
ext = (job.file_type or "bin").lstrip(".").lower() or "bin"
ext = _normalize_file_type(job.file_type)
item = Item(
title=job.title,
file_type=ext,