fix(library): normalize MIME types to file extensions in Daedalus ingest
Daedalus may send `file_type` as a MIME type (e.g. `text/markdown`) rather than a bare extension. Add a `_normalize_file_type` helper with a MIME→ext lookup table and sensible fallbacks so ingested items are stored with proper extensions like `md` instead of `text/markdown`.
This commit is contained in:
@@ -17,6 +17,41 @@ logger = logging.getLogger(__name__)
|
||||
# Cache key pattern for task progress
|
||||
PROGRESS_KEY = "library:task:{task_id}:progress"
|
||||
|
||||
# MIME type → file extension, for when Daedalus sends content_type as file_type
|
||||
_MIME_TO_EXT = {
|
||||
"text/markdown": "md",
|
||||
"text/plain": "txt",
|
||||
"text/html": "html",
|
||||
"text/csv": "csv",
|
||||
"text/xml": "xml",
|
||||
"application/pdf": "pdf",
|
||||
"application/epub+zip": "epub",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
|
||||
"application/json": "json",
|
||||
"image/jpeg": "jpg",
|
||||
"image/png": "png",
|
||||
"image/gif": "gif",
|
||||
"image/webp": "webp",
|
||||
"image/tiff": "tiff",
|
||||
}
|
||||
|
||||
|
||||
def _normalize_file_type(raw: str) -> str:
|
||||
"""Convert a MIME type or extension string to a bare extension."""
|
||||
raw = (raw or "").strip().lower()
|
||||
if "/" in raw:
|
||||
# It's a MIME type — look up or derive from the subtype
|
||||
ext = _MIME_TO_EXT.get(raw)
|
||||
if ext:
|
||||
return ext
|
||||
# Fallback: use the part after the slash, strip vendor prefixes
|
||||
subtype = raw.split("/", 1)[1]
|
||||
subtype = subtype.split("+")[-1] # e.g. "epub+zip" → "zip"; "vnd.ms-excel" → keep
|
||||
return subtype.lstrip(".") or "bin"
|
||||
return raw.lstrip(".") or "bin"
|
||||
|
||||
|
||||
def _update_progress(task, percent: int, message: str):
|
||||
"""
|
||||
@@ -373,7 +408,7 @@ def ingest_from_daedalus(self, job_id: str):
|
||||
data = fetch_from_daedalus(job.s3_key)
|
||||
|
||||
# --- 4. Create Item node ---
|
||||
ext = (job.file_type or "bin").lstrip(".").lower() or "bin"
|
||||
ext = _normalize_file_type(job.file_type)
|
||||
item = Item(
|
||||
title=job.title,
|
||||
file_type=ext,
|
||||
|
||||
Reference in New Issue
Block a user