Files
mnemosyne/mnemosyne/library/tests/test_text_utils.py
Robert Helewka 99bdb4ac92 Add Themis application with custom widgets, views, and utilities
- Implemented custom form widgets for date, time, and datetime fields with DaisyUI styling.
- Created utility functions for formatting dates, times, and numbers according to user preferences.
- Developed views for profile settings, API key management, and notifications, including health check endpoints.
- Added URL configurations for Themis tests and main application routes.
- Established test cases for custom widgets to ensure proper functionality and integration.
- Defined project metadata and dependencies in pyproject.toml for package management.
2026-03-21 02:00:18 +00:00

122 lines
3.8 KiB
Python

"""
Tests for text sanitization utilities.
"""
from django.test import TestCase
from library.services.text_utils import (
clean_pdf_artifacts,
remove_excessive_whitespace,
sanitize_text,
truncate_text,
)
class SanitizeTextTests(TestCase):
"""Tests for the sanitize_text function."""
def test_empty_string(self):
self.assertEqual(sanitize_text("", log_changes=False), "")
def test_none_input(self):
self.assertIsNone(sanitize_text(None, log_changes=False))
def test_clean_text_unchanged(self):
text = "Hello, this is clean text."
self.assertEqual(sanitize_text(text, log_changes=False), text)
def test_removes_null_bytes(self):
text = "Hello\x00World"
result = sanitize_text(text, log_changes=False)
self.assertNotIn("\x00", result)
self.assertEqual(result, "HelloWorld")
def test_removes_control_characters(self):
text = "Hello\x07World\x0eTest"
result = sanitize_text(text, log_changes=False)
self.assertNotIn("\x07", result)
self.assertNotIn("\x0e", result)
def test_preserves_newlines_and_tabs(self):
text = "Hello\nWorld\tTest\r\n"
result = sanitize_text(text, log_changes=False)
self.assertIn("\n", result)
self.assertIn("\t", result)
def test_removes_zero_width_characters(self):
text = "Hello\u200bWorld"
result = sanitize_text(text, log_changes=False)
self.assertNotIn("\u200b", result)
def test_normalizes_unicode(self):
# é as combining characters vs. precomposed
combining = "e\u0301" # e + combining acute
result = sanitize_text(combining, log_changes=False)
self.assertEqual(result, "\u00e9") # precomposed é
def test_cleans_pdf_ligatures(self):
text = "finding the flow of effort"
result = sanitize_text(text, log_changes=False)
self.assertIn("fi", result)
self.assertIn("fl", result)
self.assertIn("ff", result)
class CleanPdfArtifactsTests(TestCase):
"""Tests for clean_pdf_artifacts."""
def test_replaces_smart_quotes(self):
text = "\u201cHello\u201d \u2018World\u2019"
result = clean_pdf_artifacts(text)
self.assertEqual(result, '"Hello" \'World\'')
def test_replaces_dashes(self):
text = "word\u2013word\u2014end"
result = clean_pdf_artifacts(text)
self.assertEqual(result, "word-word-end")
def test_replaces_ellipsis(self):
text = "wait\u2026"
result = clean_pdf_artifacts(text)
self.assertEqual(result, "wait...")
def test_replaces_nbsp(self):
text = "non\u00a0breaking"
result = clean_pdf_artifacts(text)
self.assertEqual(result, "non breaking")
class RemoveExcessiveWhitespaceTests(TestCase):
"""Tests for remove_excessive_whitespace."""
def test_collapses_spaces(self):
self.assertEqual(remove_excessive_whitespace("a b"), "a b")
def test_collapses_newlines(self):
self.assertEqual(
remove_excessive_whitespace("a\n\n\n\nb"), "a\n\nb"
)
def test_strips_line_whitespace(self):
self.assertEqual(
remove_excessive_whitespace(" hello \n world "),
"hello\nworld",
)
class TruncateTextTests(TestCase):
"""Tests for truncate_text."""
def test_short_text_unchanged(self):
self.assertEqual(truncate_text("hello", 100), "hello")
def test_truncates_at_word_boundary(self):
text = "hello beautiful world"
result = truncate_text(text, 15)
self.assertTrue(result.endswith("..."))
self.assertLessEqual(len(result), 15)
def test_custom_suffix(self):
result = truncate_text("hello beautiful world", 15, suffix="")
self.assertTrue(result.endswith(""))