From a0559e70de008b2491a924df74f0e1f1042a0d39 Mon Sep 17 00:00:00 2001 From: Jon Froehlich Date: Thu, 18 Jun 2026 12:35:21 -0700 Subject: [PATCH 1/7] test(#1278): cover delete_unused_files command (item 5) Backfill regression tests for the delete_unused_files management command, which runs on every container start and deletes media files -- the highest- risk untested path in the app (zero prior coverage). Tests run against a throwaway MEDIA_ROOT and pin: - orphans deleted, DB-referenced files kept (pubs/talks/posters) - easy-thumbnails _detail cache files preserved (owned by thumbnail_cleanup) - accurate (count, bytes) tally from the delete helper - no crash on empty media, no DB rows, or an empty pdf_file FileField Co-Authored-By: Claude Opus 4.8 (1M context) --- website/tests/test_delete_unused_files.py | 155 ++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 website/tests/test_delete_unused_files.py diff --git a/website/tests/test_delete_unused_files.py b/website/tests/test_delete_unused_files.py new file mode 100644 index 00000000..1ec5f7a2 --- /dev/null +++ b/website/tests/test_delete_unused_files.py @@ -0,0 +1,155 @@ +""" +Regression tests for the ``delete_unused_files`` management command (#1278, item 5). + +This command runs on **every container start** (see ``docker-entrypoint.sh``) and +**deletes files** off the media filesystem, so it is the single highest-risk +untested code path in the app: a logic slip here silently destroys real +publication / talk / poster PDFs and thumbnails in production. Until now it had +zero tests. + +The command globs ``MEDIA_ROOT/{publications,talks,posters}`` (and their +``images/`` thumbnail subdirs) for files, removes from that set anything still +referenced by a DB row, and deletes whatever is left over. These tests pin the +three behaviors that matter: + +1. **Orphans are deleted, referenced files are kept** — the core contract. +2. **easy-thumbnails ``_detail`` cache files are never touched** — they are + owned by ``thumbnail_cleanup``; deleting them here would fight that command. +3. **It never crashes** on empty media dirs, zero DB rows, or a row whose + ``FileField`` is empty (the null-``.path`` crash class called out in #1278). + +Every test runs against a throwaway ``MEDIA_ROOT`` (a temp dir wired in via +``override_settings``) so no real media is ever at risk. +""" + +import os +import shutil +import tempfile +from datetime import date + +from django.core.management import call_command +from django.test import override_settings + +from website.models import Publication +from website.tests.base import DatabaseTestCase + + +class DeleteUnusedFilesTests(DatabaseTestCase): + """Exercise ``manage.py delete_unused_files`` against a temp MEDIA_ROOT.""" + + def setUp(self): + super().setUp() + # A disposable media root so model saves and the command's deletions + # only ever touch files under here, never the developer's real media/. + self.media_root = tempfile.mkdtemp(prefix="ml_media_test_") + self.addCleanup(shutil.rmtree, self.media_root, ignore_errors=True) + + override = override_settings(MEDIA_ROOT=self.media_root) + override.enable() + self.addCleanup(override.disable) + + # The command globs these dirs; create them so an empty run has + # something to glob (mirrors a freshly-deployed container). + for sub in ("publications/images", "talks/images", "posters/images"): + os.makedirs(os.path.join(self.media_root, sub), exist_ok=True) + + def _write(self, relpath, content=b"unused"): + """Write a stray file under MEDIA_ROOT and return its absolute path.""" + full = os.path.join(self.media_root, relpath) + os.makedirs(os.path.dirname(full), exist_ok=True) + with open(full, "wb") as fh: + fh.write(content) + return full + + def test_orphan_publication_pdf_is_deleted_referenced_is_kept(self): + """The whole point: drop the orphan, keep the file a DB row points at.""" + pub = self.make_publication(title="Kept Paper") + referenced = pub.pdf_file.path + self.assertTrue(os.path.exists(referenced)) + + orphan = self._write("publications/orphan_abandoned.pdf", b"%PDF-1.4 orphan") + + call_command("delete_unused_files") + + self.assertFalse(os.path.exists(orphan), "unreferenced PDF should be deleted") + self.assertTrue(os.path.exists(referenced), "referenced PDF must be kept") + + def test_easy_thumbnail_detail_files_are_preserved(self): + """``_detail`` cache files belong to thumbnail_cleanup, not this command.""" + detail = self._write( + "publications/images/Foo_CHI2022.jpg.300x0_q85_detail.jpg" + ) + orphan_thumb = self._write("publications/images/orphan_thumb.jpg") + + call_command("delete_unused_files") + + self.assertTrue( + os.path.exists(detail), + "_detail easy-thumbnail file must be preserved", + ) + self.assertFalse( + os.path.exists(orphan_thumb), + "unreferenced thumbnail should be deleted", + ) + + def test_orphan_talk_pdf_and_raw_files_are_deleted(self): + """Talks: orphan .pdf/.pptx/.key go; the referenced talk PDF stays.""" + talk = self.make_talk(title="Kept Talk") + referenced = talk.pdf_file.path + + orphan_pdf = self._write("talks/orphan_talk.pdf") + orphan_pptx = self._write("talks/orphan_deck.pptx") + orphan_key = self._write("talks/orphan_deck.key") + + call_command("delete_unused_files") + + self.assertTrue(os.path.exists(referenced), "referenced talk PDF must be kept") + for stray in (orphan_pdf, orphan_pptx, orphan_key): + self.assertFalse(os.path.exists(stray), f"{stray} should be deleted") + + def test_orphan_poster_files_are_deleted(self): + """Posters: the raw set is .pptx/.key/.ai (note .ai, unlike talks).""" + strays = [ + self._write("posters/orphan_poster.pdf"), + self._write("posters/orphan_poster.ai"), + self._write("posters/orphan_poster.key"), + self._write("posters/orphan_poster.pptx"), + ] + + call_command("delete_unused_files") + + for stray in strays: + self.assertFalse(os.path.exists(stray), f"{stray} should be deleted") + + def test_delete_unused_files_helper_reports_count_and_bytes(self): + """The low-level helper returns an accurate (count, total_bytes) tally.""" + from website.management.commands.delete_unused_files import Command + + f1 = self._write("publications/a.pdf", b"12345") # 5 bytes + f2 = self._write("publications/b.pdf", b"678") # 3 bytes + + count, total_bytes = Command().delete_unused_files([f1, f2]) + + self.assertEqual(count, 2) + self.assertEqual(total_bytes, 8) + self.assertFalse(os.path.exists(f1)) + self.assertFalse(os.path.exists(f2)) + + def test_runs_cleanly_on_empty_media_and_no_db_rows(self): + """Fresh deploy: empty media dirs, no DB rows -> no exception, no deletions.""" + call_command("delete_unused_files") # reaching the next line == no crash + + for sub in ("publications", "talks", "posters"): + self.assertTrue(os.path.isdir(os.path.join(self.media_root, sub))) + + def test_artifact_with_empty_pdf_field_does_not_crash(self): + """A row whose pdf_file is empty must not crash the guarded .path access.""" + # Guards the null-FileField crash class flagged in #1278: the command's + # `if pub.pdf_file:` check must short-circuit before touching `.path`. + # Built via objects.create (a single, first-time save) so this test + # isolates the *command's* guard; the separate Artifact.save() null-pdf + # re-save crash is pinned in test_artifact.py. + Publication.objects.create(title="No PDF", date=date(2024, 1, 1)) + self.assertFalse(bool(Publication.objects.get(title="No PDF").pdf_file)) + + call_command("delete_unused_files") # no AttributeError on empty .path From 443a66d587771a6439713477b2bfd764028a0892 Mon Sep 17 00:00:00 2001 From: Jon Froehlich Date: Thu, 18 Jun 2026 12:36:23 -0700 Subject: [PATCH 2/7] fix(#1278): guard Artifact.save() thumbnail block against empty pdf_file Found while backfilling tests (item 5). pdf_file is nullable, but the thumbnail-generation block in Artifact.save() called os.path.basename(self.pdf_file.name) unconditionally on every non-first save. self.pdf_file.name is None when the field is empty, raising TypeError. Any second save of a PDF-less artifact hit it (an admin edit, or the authors_changed m2m signal re-saving to rename files). Wrap the block in 'if self.pdf_file:' -- no PDF means no thumbnail to generate. Adds a DB-backed regression test (red before, green after). Co-Authored-By: Claude Opus 4.8 (1M context) --- website/models/artifact.py | 48 ++++++++++++++++++---------------- website/tests/test_artifact.py | 30 +++++++++++++++++++++ 2 files changed, 56 insertions(+), 22 deletions(-) diff --git a/website/models/artifact.py b/website/models/artifact.py index 708e9bc5..34f2ed4e 100644 --- a/website/models/artifact.py +++ b/website/models/artifact.py @@ -307,28 +307,32 @@ def save(self, *args, **kwargs): else: _logger.debug("No authors exist yet, so will wait for m2m authors_changed to rename files") - # Generate a thumbnail if one does not already exist - pdf_filename = os.path.basename(self.pdf_file.name) - pdf_filename_no_ext, ext = os.path.splitext(pdf_filename) - thumbnail_filename = os.path.basename(pdf_filename_no_ext) + ".jpg" - thumbnail_filename_with_local_path = self.get_upload_thumbnail_dir(thumbnail_filename) - thumbnail_exists_in_storage = self.thumbnail.storage.exists(thumbnail_filename_with_local_path) - if not self.thumbnail or not thumbnail_exists_in_storage: - _logger.debug(f"The thumbnail for artifact.id={self.id} does not exist at {thumbnail_filename_with_local_path}, generating...") - - # generate a thumbnail - if self.pdf_file.storage.exists(self.pdf_file.name): - thumbnail_local_path = os.path.dirname(thumbnail_filename_with_local_path) - ml_fileutils.generate_thumbnail_for_pdf(self.pdf_file, self.thumbnail, thumbnail_local_path) - - # If 'update_fields' does not exist in kwargs, all fields are saved - # Add 'thumbnail' to the update_fields list so that it gets updated in the db - if 'update_fields' in kwargs: - kwargs.setdefault('update_fields', []).append('thumbnail') - else: - _logger.debug(f"Could not generate a thumbnail because the pdf {self.pdf_file.path} was not found in storage") - elif thumbnail_exists_in_storage: - _logger.debug(f"The thumbnail for artifact.id={self.id} already exists at {thumbnail_filename_with_local_path}, so not generating") + # Generate a thumbnail if one does not already exist. Guard on + # pdf_file: it's nullable, and self.pdf_file.name is None when empty, + # which would crash os.path.basename below (#1278). No PDF simply + # means there is no thumbnail to generate. + if self.pdf_file: + pdf_filename = os.path.basename(self.pdf_file.name) + pdf_filename_no_ext, ext = os.path.splitext(pdf_filename) + thumbnail_filename = os.path.basename(pdf_filename_no_ext) + ".jpg" + thumbnail_filename_with_local_path = self.get_upload_thumbnail_dir(thumbnail_filename) + thumbnail_exists_in_storage = self.thumbnail.storage.exists(thumbnail_filename_with_local_path) + if not self.thumbnail or not thumbnail_exists_in_storage: + _logger.debug(f"The thumbnail for artifact.id={self.id} does not exist at {thumbnail_filename_with_local_path}, generating...") + + # generate a thumbnail + if self.pdf_file.storage.exists(self.pdf_file.name): + thumbnail_local_path = os.path.dirname(thumbnail_filename_with_local_path) + ml_fileutils.generate_thumbnail_for_pdf(self.pdf_file, self.thumbnail, thumbnail_local_path) + + # If 'update_fields' does not exist in kwargs, all fields are saved + # Add 'thumbnail' to the update_fields list so that it gets updated in the db + if 'update_fields' in kwargs: + kwargs.setdefault('update_fields', []).append('thumbnail') + else: + _logger.debug(f"Could not generate a thumbnail because the pdf {self.pdf_file.path} was not found in storage") + elif thumbnail_exists_in_storage: + _logger.debug(f"The thumbnail for artifact.id={self.id} already exists at {thumbnail_filename_with_local_path}, so not generating") _logger.debug(f"Calling super().save(*args, **kwargs)") diff --git a/website/tests/test_artifact.py b/website/tests/test_artifact.py index 65fab6c2..7b2b2ef2 100644 --- a/website/tests/test_artifact.py +++ b/website/tests/test_artifact.py @@ -1,9 +1,13 @@ """Tests for Artifact model methods (filename-drift check, raw-file label).""" +from datetime import date from unittest.mock import MagicMock, patch from django.test import SimpleTestCase +from website.models import Publication +from website.tests.base import DatabaseTestCase + # --- Artifact filename check regression ----------------------------------- @@ -113,3 +117,29 @@ def test_no_raw_file_returns_none(self): def test_no_extension_returns_none(self): self.assertIsNone(self._label("talks/Doe2020Title")) + + +# --- Artifact.save() with no PDF ------------------------------------------ + + +class ArtifactSaveNullPdfTests(DatabaseTestCase): + """ + Regression test for Artifact.save() when ``pdf_file`` is empty (#1278). + + ``pdf_file`` is nullable (``null=True, default=None``), so an artifact can + legitimately exist without a PDF. But the thumbnail-generation block in + Artifact.save() ran ``os.path.basename(self.pdf_file.name)`` unconditionally + on every non-first save -- and ``self.pdf_file.name`` is ``None`` when the + field is empty, raising ``TypeError: expected str ... not NoneType``. + + Any second save of a PDF-less artifact triggered it: an admin edit, or the + ``authors_changed`` m2m signal re-saving to rename files. This pins the + guard so a missing PDF simply means "no thumbnail to generate". + """ + + def test_resaving_artifact_without_pdf_does_not_crash(self): + pub = Publication.objects.create(title="No PDF", date=date(2024, 1, 1)) + # First save (objects.create) is fine; the crash was on the *second*. + pub.location = "Seattle, WA" + pub.save() # must not raise + self.assertFalse(bool(Publication.objects.get(pk=pub.pk).pdf_file)) From db991c448a27c449b8035cd77f64a15e6e66c107 Mon Sep 17 00:00:00 2001 From: Jon Froehlich Date: Thu, 18 Jun 2026 12:38:59 -0700 Subject: [PATCH 3/7] test(#1278): add public view smoke-sweep (item 5) GET every public page route (built via reverse so renamed url names fail here too) and assert 200. Catches the NoReverseMatch / template- AttributeError / missing-context-key regression class that only surfaces through the real URL/view/template stack. Covers: index, people, publications, awards, projects, news_listing, view_project_people, member (by id + name), member_artifacts (all 4 types), project detail, news item (by id + slug). The two media routes are excluded (dedicated tests already cover them). Fixtures form a small connected graph so populated template branches render, not just empty states. Co-Authored-By: Claude Opus 4.8 (1M context) --- website/tests/test_view_smoke.py | 124 +++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 website/tests/test_view_smoke.py diff --git a/website/tests/test_view_smoke.py b/website/tests/test_view_smoke.py new file mode 100644 index 00000000..5da2c43b --- /dev/null +++ b/website/tests/test_view_smoke.py @@ -0,0 +1,124 @@ +""" +View smoke-sweep: GET every public page route and assert it renders (#1278, item 5). + +The cheap, high-yield test the issue calls for. It doesn't check page *content* +-- it checks that each public URL resolves, the view runs, and the template +renders without blowing up. That catches a whole class of regressions that unit +tests miss because they only surface through the real URL/view/template stack: +NoReverseMatch from a renamed url name, AttributeError from a template touching a +field that moved, a context key a partial expects but the view stopped passing. + +Routes are built with ``reverse()`` so a renamed url name fails here too (rather +than silently skipping the route). The two media routes (serve_pdf, +serve_publication_image) are intentionally excluded -- they need real files on +disk and already have dedicated tests (test_serve_pdf, test_serve_publication_image). + +The fixtures form a small connected graph (a person who authored a publication +and a talk, both tied to a visible project they have a role on, plus a news item) +so the listing and detail templates exercise their populated branches, not just +the empty-state ones. +""" + +from datetime import date + +from website.tests.base import DatabaseTestCase +from website.tests.factories import ProjectRoleFactory + +from django.urls import reverse + + +# Public page routes that take no arguments. view_project_people is an AJAX-y +# endpoint but is a plain GET that returns a rendered template, so it sweeps here. +STATIC_ROUTE_NAMES = [ + "index", + "people", + "publications", + "awards", + "projects", + "news_listing", + "view_project_people", +] + +# member_artifacts serves one section at a time; these are the valid types +# (anything else is a deliberate 404 -- see member.py::ARTIFACT_PAGE_SIZES). +MEMBER_ARTIFACT_TYPES = ["projects", "publications", "videos", "talks"] + + +class PublicViewSmokeTests(DatabaseTestCase): + """GET every public page route and assert a 200 (catches the template bug class).""" + + def setUp(self): + super().setUp() + self.person = self.make_person(first_name="Smoke", last_name="Tester") + # start_date set so the page renders the normal "2022–Present" date + # string; the null-start_date edge case is pinned in test_project.py. + self.project = self.make_project( + name="Smoke Project", is_visible=True, start_date=date(2022, 1, 1) + ) + + # A publication + talk authored by the person and tied to the project, + # so member/project/listing templates render their populated branches. + self.pub = self.make_publication(title="Smoke Paper", authors=[self.person]) + self.pub.projects.add(self.project) + self.talk = self.make_talk(title="Smoke Talk", authors=[self.person]) + + # A role so the project has a member (drives view_project_people + the + # member page's project section). + ProjectRoleFactory(person=self.person, project=self.project) + + self.news = self.make_news_item(title="Smoke News", author=self.person) + + def _assert_ok(self, url): + response = self.client.get(url) + self.assertEqual( + response.status_code, 200, + f"GET {url} returned {response.status_code}, expected 200", + ) + return response + + def test_static_pages_render(self): + for name in STATIC_ROUTE_NAMES: + with self.subTest(route=name): + self._assert_ok(reverse(f"website:{name}")) + + def test_member_page_by_id_and_by_name(self): + self._assert_ok( + reverse("website:member_by_id", kwargs={"member_id": self.person.id}) + ) + self._assert_ok( + reverse( + "website:member_by_name", + kwargs={"member_name": self.person.url_name}, + ) + ) + + def test_member_artifacts_all_types(self): + for artifact_type in MEMBER_ARTIFACT_TYPES: + with self.subTest(artifact_type=artifact_type): + self._assert_ok( + reverse( + "website:member_artifacts", + kwargs={ + "member_id": self.person.id, + "artifact_type": artifact_type, + }, + ) + ) + + def test_project_detail_page(self): + self._assert_ok( + reverse( + "website:project", + kwargs={"project_name": self.project.short_name}, + ) + ) + + def test_news_item_by_id_and_by_slug(self): + self._assert_ok( + reverse("website:news_item_by_id", kwargs={"id": self.news.id}) + ) + self._assert_ok( + reverse( + "website:news_item_by_slug", kwargs={"slug": self.news.slug} + ) + ) From c06c76ca760732a00e44c4098f78ca5caac0b2ef Mon Sep 17 00:00:00 2001 From: Jon Froehlich Date: Thu, 18 Jun 2026 12:40:11 -0700 Subject: [PATCH 4/7] fix(#1278): guard get_project_dates_str against null start_date Found by the view smoke-sweep (item 5). Project.start_date is nullable, but get_project_dates_str accessed self.start_date.year unconditionally, so visiting the public page of a project with no start_date 500'd with AttributeError. Return '' (template prints date_str as-is, so it renders nothing). Adds regression tests for all four branches. Co-Authored-By: Claude Opus 4.8 (1M context) --- website/models/project.py | 6 +++++ website/tests/test_project.py | 41 +++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/website/models/project.py b/website/models/project.py index 58dd113d..cf0b8ba6 100644 --- a/website/models/project.py +++ b/website/models/project.py @@ -613,6 +613,12 @@ def get_project_dates_str(self): If the end date is None, it returns a string in the format 'start_year–Present'. Otherwise, it returns a string in the format 'start_year–end_year'. """ + # start_date is nullable, so a project may have none. Without it there's + # no range to format; return "" so the template renders nothing rather + # than 500ing on self.start_date.year (#1278). + if self.start_date is None: + return "" + # If end_date is None, return 'start_year–Present' if self.end_date is None: return f"{self.start_date.year}–Present" diff --git a/website/tests/test_project.py b/website/tests/test_project.py index 2bd2a634..dc092337 100644 --- a/website/tests/test_project.py +++ b/website/tests/test_project.py @@ -5,9 +5,50 @@ from django.test import SimpleTestCase +from website.models.project import Project from website.tests.base import DatabaseTestCase +# --- Project date-range string -------------------------------------------- + + +class ProjectDatesStrTests(SimpleTestCase): + """ + Tests for Project.get_project_dates_str, including the null-start_date + guard (#1278). + + start_date is nullable (``null=True, blank=True``), so a project can exist + without one. The method accessed ``self.start_date.year`` unconditionally, + so rendering such a project's page (the template prints ``{{ date_str }}``) + crashed with AttributeError -- a real 500 found by the view smoke-sweep. + A missing start_date now yields an empty string (renders as nothing). + """ + + def _dates_str(self, start, end): + obj = MagicMock() + obj.start_date = start + obj.end_date = end + return Project.get_project_dates_str(obj) + + def test_ongoing_project_returns_start_present(self): + self.assertEqual(self._dates_str(date(2022, 1, 1), None), "2022–Present") + + def test_same_year_returns_single_year(self): + self.assertEqual( + self._dates_str(date(2022, 1, 1), date(2022, 6, 1)), "2022" + ) + + def test_multi_year_returns_start_end_range(self): + self.assertEqual( + self._dates_str(date(2020, 1, 1), date(2022, 6, 1)), "2020–2022" + ) + + def test_null_start_date_returns_empty_string(self): + self.assertEqual(self._dates_str(None, None), "") + # Even with an end_date set, a missing start can't form a range. + self.assertEqual(self._dates_str(None, date(2022, 6, 1)), "") + + # --- Project member count regression -------------------------------------- From ed3e8a25ac364f1ad286a8c36c18d15cba9d6279 Mon Sep 17 00:00:00 2001 From: Jon Froehlich Date: Thu, 18 Jun 2026 12:42:00 -0700 Subject: [PATCH 5/7] test(#1278): unit-test pure utils (timeutils, ml_utils, fileutils) (item 5) Backfill fast SimpleTestCase coverage for previously-untested pure helpers: - timeutils: humanize_duration buckets, ends_with_year, remove_trailing_year - ml_utils: slugify_max, school/department abbreviations, get_video_embed (youtube/vimeo/unknown), clean_forum_name, get_closest_match, weighted_choice - fileutils: is_image, get_ckeditor_image_filename, get_filename_no_ext, and the canonical get_filename_for_artifact naming (suffix, ext, truncation) 43 tests, no DB. Star Wars image helpers and filesystem-touching fileutils (thumbnail/PDF) stay covered by their existing/integration tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- website/tests/test_fileutils.py | 97 +++++++++++++++++++++++++ website/tests/test_ml_utils.py | 122 ++++++++++++++++++++++++++++++++ website/tests/test_timeutils.py | 67 ++++++++++++++++++ 3 files changed, 286 insertions(+) create mode 100644 website/tests/test_fileutils.py create mode 100644 website/tests/test_ml_utils.py create mode 100644 website/tests/test_timeutils.py diff --git a/website/tests/test_fileutils.py b/website/tests/test_fileutils.py new file mode 100644 index 00000000..a4edb61d --- /dev/null +++ b/website/tests/test_fileutils.py @@ -0,0 +1,97 @@ +"""Unit tests for the pure filename helpers in website.utils.fileutils (#1278, item 5). + +These build the deterministic, archive-friendly filenames the artifact rename +pipeline depends on (get_filename_for_artifact and friends). The Star Wars +image helpers in this module are already covered by test_easter_egg_picker; +the filesystem-touching helpers (thumbnail generation, PDF page count) are left +to integration coverage. No DB. +""" + +import os +from datetime import date + +from django.test import SimpleTestCase + +from website.utils.fileutils import ( + ensure_filename_is_unique, + get_ckeditor_image_filename, + get_filename_for_artifact, + get_filename_no_ext, + get_filename_without_ext_for_artifact, + is_image, +) + + +class IsImageTests(SimpleTestCase): + def test_known_image_extensions(self): + for name in ("photo.jpg", "PHOTO.JPEG", "art.png", "anim.gif"): + self.assertTrue(is_image(name), name) + + def test_non_image_extensions(self): + for name in ("doc.pdf", "deck.pptx", "noext"): + self.assertFalse(is_image(name), name) + + +class GetCkeditorImageFilenameTests(SimpleTestCase): + def test_uppercases_filename(self): + self.assertEqual(get_ckeditor_image_filename("My File.png"), "MY FILE.PNG") + + +class GetFilenameNoExtTests(SimpleTestCase): + def test_strips_path_and_extension(self): + self.assertEqual( + get_filename_no_ext("publications/Doe_Paper_CHI2022.pdf"), + "Doe_Paper_CHI2022", + ) + + +class GetFilenameForArtifactTests(SimpleTestCase): + """The canonical Lastname_Title_Forum+Year naming used for archived files.""" + + def test_basic_filename(self): + self.assertEqual( + get_filename_for_artifact( + "Froehlich", "This Is A Test", "CHI", date(2022, 1, 1), "pdf" + ), + "Froehlich_ThisIsATest_CHI2022.pdf", + ) + + def test_extension_normalized_with_or_without_dot(self): + with_dot = get_filename_for_artifact( + "Doe", "Paper", "UIST", date(2021, 1, 1), ".pdf" + ) + without_dot = get_filename_for_artifact( + "Doe", "Paper", "UIST", date(2021, 1, 1), "pdf" + ) + self.assertEqual(with_dot, without_dot) + self.assertTrue(with_dot.endswith(".pdf")) + + def test_missing_last_name_uses_none_placeholder(self): + self.assertTrue( + get_filename_without_ext_for_artifact( + "", "Paper", "CHI", date(2022, 1, 1) + ).startswith("None_") + ) + + def test_suffix_is_inserted_before_forum(self): + self.assertEqual( + get_filename_without_ext_for_artifact( + "Doe", "Paper", "CHI", date(2022, 1, 1), suffix="poster" + ), + "Doe_Paper_poster_CHI2022", + ) + + def test_title_truncation(self): + result = get_filename_without_ext_for_artifact( + "Doe", "A Very Long Title Here", "CHI", date(2022, 1, 1), + max_pub_title_length=5, + ) + # Title portion (between the underscores) is capped at 5 chars. + title_part = result.split("_")[1] + self.assertEqual(len(title_part), 5) + + +class EnsureFilenameIsUniqueTests(SimpleTestCase): + def test_nonexistent_path_returned_unchanged(self): + path = os.path.join("/tmp", "definitely-not-a-real-file-1278.pdf") + self.assertEqual(ensure_filename_is_unique(path), path) diff --git a/website/tests/test_ml_utils.py b/website/tests/test_ml_utils.py new file mode 100644 index 00000000..2dfb6c35 --- /dev/null +++ b/website/tests/test_ml_utils.py @@ -0,0 +1,122 @@ +"""Unit tests for website.utils.ml_utils (#1278, item 5). + +Pure helper functions used across views and templates (school/department +abbreviations, video embeds, forum-name cleaning, fuzzy matching). No DB. +The model- and datetime-coupled helpers (sort_projects_*, choose_banners*) +are intentionally left for integration-style tests. +""" + +from datetime import date + +from django.test import SimpleTestCase + +from website.utils.ml_utils import ( + clean_forum_name, + create_acronym, + get_closest_match, + get_department_abbreviated, + get_school_abbreviated, + get_video_embed, + slugify_max, + weighted_choice, +) + + +class SlugifyMaxTests(SimpleTestCase): + def test_short_text_passes_through(self): + self.assertEqual(slugify_max("Hello World"), "hello-world") + + def test_truncates_without_exceeding_max_length(self): + result = slugify_max("Alpha Beta Gamma Delta", max_length=10) + self.assertLessEqual(len(result), 10) + self.assertFalse(result.endswith("-"), "must not end on a partial-word hyphen") + + +class CreateAcronymTests(SimpleTestCase): + def test_first_letter_of_each_word(self): + self.assertEqual(create_acronym("human computer interaction"), "hci") + + +class GetSchoolAbbreviatedTests(SimpleTestCase): + def test_washington_is_uw(self): + self.assertEqual(get_school_abbreviated("University of Washington"), "UW") + + def test_maryland_is_umd(self): + self.assertEqual(get_school_abbreviated("University of Maryland"), "UMD") + + def test_other_school_falls_back_to_acronym(self): + # " of " is dropped before acronyming, so "University of Foo Bar" -> UFB. + self.assertEqual(get_school_abbreviated("University of Foo Bar"), "UFB") + + +class GetDepartmentAbbreviatedTests(SimpleTestCase): + def test_cse(self): + self.assertEqual( + get_department_abbreviated("Computer Science & Engineering"), "CSE" + ) + + def test_allen_school_is_cse(self): + self.assertEqual(get_department_abbreviated("Paul G. Allen School"), "CSE") + + def test_ischool(self): + self.assertEqual(get_department_abbreviated("The Information School"), "iSchool") + + def test_hcde(self): + self.assertEqual(get_department_abbreviated("HCDE"), "HCDE") + + +class GetVideoEmbedTests(SimpleTestCase): + def test_youtube_short_url(self): + embed = get_video_embed("https://youtu.be/i0IDbHGir-8") + self.assertTrue(embed.startswith("https://youtube.com/embed")) + self.assertIn("i0IDbHGir-8", embed) + + def test_youtube_watch_url(self): + self.assertTrue( + get_video_embed("https://www.youtube.com/watch?v=dQw4w9WgXcQ").startswith( + "https://youtube.com/embed" + ) + ) + + def test_vimeo_url(self): + self.assertEqual( + get_video_embed("https://vimeo.com/164630179"), + "https://player.vimeo.com/video/164630179", + ) + + def test_unknown_service(self): + self.assertTrue( + get_video_embed("https://example.com/clip").startswith( + "unknown video service" + ) + ) + + +class CleanForumNameTests(SimpleTestCase): + def test_strips_proceedings_of_and_trailing_year(self): + self.assertEqual(clean_forum_name("Proceedings of CHI 2022"), "CHI") + + def test_plain_name_with_year(self): + self.assertEqual(clean_forum_name("CHI 2022"), "CHI") + + def test_name_without_decoration_unchanged(self): + self.assertEqual(clean_forum_name("ASSETS"), "ASSETS") + + +class GetClosestMatchTests(SimpleTestCase): + def test_returns_closest_above_cutoff(self): + self.assertEqual( + get_closest_match("aple", ["apple", "banana"], cutoff=0.5), "apple" + ) + + def test_returns_none_when_nothing_within_cutoff(self): + self.assertIsNone(get_closest_match("xyz", ["apple", "banana"], cutoff=0.8)) + + +class WeightedChoiceTests(SimpleTestCase): + def test_single_choice_is_deterministic(self): + self.assertEqual(weighted_choice([("only", 1)]), "only") + + def test_zero_weight_option_is_never_chosen(self): + # The non-zero-weight option spans the whole [0, total] range. + self.assertEqual(weighted_choice([("yes", 1), ("never", 0)]), "yes") diff --git a/website/tests/test_timeutils.py b/website/tests/test_timeutils.py new file mode 100644 index 00000000..9e5f49d9 --- /dev/null +++ b/website/tests/test_timeutils.py @@ -0,0 +1,67 @@ +"""Unit tests for website.utils.timeutils (#1278, item 5). + +Pure functions, zero DB. These had no coverage despite feeding user-facing +strings (duration labels) and the forum-name / url-name cleaning pipeline +(ends_with_year / remove_trailing_year are used by clean_forum_name and the +url_name de-collision logic). +""" + +from datetime import timedelta + +from django.test import SimpleTestCase + +from website.utils.timeutils import ( + ends_with_year, + humanize_duration, + remove_trailing_year, +) + + +class HumanizeDurationTests(SimpleTestCase): + """humanize_duration picks the largest fitting unit (year/month/week/day).""" + + def test_years_bucket(self): + self.assertEqual(humanize_duration(timedelta(days=365)), "1.0 years") + self.assertEqual(humanize_duration(timedelta(days=730)), "2.0 years") + + def test_months_bucket(self): + self.assertEqual(humanize_duration(timedelta(days=60)), "2.0 months") + + def test_weeks_bucket(self): + self.assertEqual(humanize_duration(timedelta(days=14)), "2.0 weeks") + + def test_days_bucket_is_the_sub_week_fallback(self): + self.assertEqual(humanize_duration(timedelta(days=3)), "3.0 days") + + def test_abbreviated_units(self): + self.assertEqual( + humanize_duration(timedelta(days=365), use_abbreviated_units=True), + "1.0 yrs", + ) + + def test_sig_figs_controls_precision(self): + self.assertEqual( + humanize_duration(timedelta(days=400), sig_figs=2), "1.10 years" + ) + + +class EndsWithYearTests(SimpleTestCase): + def test_trailing_four_digits_is_a_year(self): + self.assertTrue(ends_with_year("CHI 2022")) + + def test_no_trailing_digits(self): + self.assertFalse(ends_with_year("CHI")) + + def test_none_is_false(self): + self.assertFalse(ends_with_year(None)) + + +class RemoveTrailingYearTests(SimpleTestCase): + def test_strips_trailing_year_and_whitespace(self): + self.assertEqual(remove_trailing_year("CHI 2022"), "CHI") + + def test_no_year_returns_input_unchanged(self): + self.assertEqual(remove_trailing_year("CHI"), "CHI") + + def test_none_returns_empty_string(self): + self.assertEqual(remove_trailing_year(None), "") From cc6a3ff1417db189f9521f26ae1e75243c757bc8 Mon Sep 17 00:00:00 2001 From: Jon Froehlich Date: Thu, 18 Jun 2026 12:43:49 -0700 Subject: [PATCH 6/7] test(#1278): cover Person.save() side effects (item 5) Backfill the two previously-untested Person.save() behaviors: - Star Wars fallback: empty image / easter_egg get a random figure; provided images are left untouched. Redirects MEDIA_ROOT to a temp dir and patches the picker so the branch is exercised without touching real media. - bio_datetime_modified: stamped on create, updated when bio changes, and left alone on unrelated edits. url_name derivation is already covered by test_recompute_url_names. Co-Authored-By: Claude Opus 4.8 (1M context) --- website/tests/test_person_save.py | 114 ++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 website/tests/test_person_save.py diff --git a/website/tests/test_person_save.py b/website/tests/test_person_save.py new file mode 100644 index 00000000..c6338346 --- /dev/null +++ b/website/tests/test_person_save.py @@ -0,0 +1,114 @@ +""" +Tests for Person.save() side effects (#1278, item 5). + +Person.save() does three non-obvious things on top of a normal save: +1. derives a unique url_name (already covered in test_recompute_url_names), +2. stamps bio_datetime_modified when the bio is first set or changes, +3. fills empty image / easter_egg fields with a random Star Wars figure. + +(2) and (3) had no coverage. The Star Wars branch reads a real file off +media/, so these tests redirect MEDIA_ROOT to a temp dir and patch the picker +to return a controlled stand-in -- exercising the branch logic without touching +real media or depending on the figure set's contents. +""" + +import os +import shutil +import tempfile +from unittest.mock import patch + +from django.test import override_settings +from django.utils import timezone + +from website.models import Person +from website.tests.base import DatabaseTestCase +from website.tests.factories import _GIF_1PX, image_upload + +_PICKER = "website.models.person.ml_fileutils.get_path_to_random_starwars_image" + + +class PersonStarWarsFallbackTests(DatabaseTestCase): + """Empty image fields are backfilled with a random Star Wars figure.""" + + def setUp(self): + super().setUp() + # Temp media so the auto-assigned image is copied somewhere disposable, + # never into the real media/ tree. + self.media_root = tempfile.mkdtemp(prefix="ml_person_test_") + self.addCleanup(shutil.rmtree, self.media_root, ignore_errors=True) + override = override_settings(MEDIA_ROOT=self.media_root) + override.enable() + self.addCleanup(override.disable) + + # A stand-in figure the patched picker hands back. + self.fake_figure = os.path.join(self.media_root, "fake_rebel.gif") + with open(self.fake_figure, "wb") as fh: + fh.write(_GIF_1PX) + + @patch(_PICKER) + def test_missing_image_and_easter_egg_are_auto_assigned(self, mock_pick): + mock_pick.return_value = self.fake_figure + + person = Person(first_name="No", last_name="Image") + person.save() + + self.assertTrue(person.image, "image should be auto-assigned when unset") + self.assertTrue( + person.easter_egg, "easter_egg should be auto-assigned when unset" + ) + # Both fields were empty, so the picker is consulted once per field. + self.assertEqual(mock_pick.call_count, 2) + + @patch(_PICKER) + def test_provided_images_are_left_untouched(self, mock_pick): + mock_pick.return_value = self.fake_figure + + person = Person( + first_name="Has", + last_name="Image", + image=image_upload("custom_head.gif"), + easter_egg=image_upload("custom_egg.gif"), + ) + person.save() + + # Both fields were provided, so the Star Wars fallback must not fire. + # (Person's upload_to renames the file from the person's name, so we + # assert on the fallback not running rather than the stored filename.) + mock_pick.assert_not_called() + self.assertTrue(person.image) + self.assertTrue(person.easter_egg) + + +class PersonBioDatetimeModifiedTests(DatabaseTestCase): + """bio_datetime_modified tracks when the bio text was last changed.""" + + def test_set_on_create(self): + person = self.make_person() + self.assertEqual(person.bio_datetime_modified, timezone.now().date()) + + def test_updated_when_bio_changes(self): + person = self.make_person(bio="original bio") + # Backdate it so a real update is observable. + Person.objects.filter(pk=person.pk).update( + bio_datetime_modified="2000-01-01" + ) + person.refresh_from_db() + + person.bio = "a brand new bio" + person.save() + + self.assertEqual(person.bio_datetime_modified, timezone.now().date()) + + def test_not_bumped_when_bio_unchanged(self): + person = self.make_person(bio="stable bio") + Person.objects.filter(pk=person.pk).update( + bio_datetime_modified="2000-01-01" + ) + person.refresh_from_db() + + # An unrelated edit must not touch the bio timestamp. + person.last_name = "Renamed" + person.save() + + person.refresh_from_db() + self.assertEqual(str(person.bio_datetime_modified), "2000-01-01") From c75da2ac5a7f2a51dcfdcba3ed189769139c774c Mon Sep 17 00:00:00 2001 From: Jon Froehlich Date: Thu, 18 Jun 2026 12:50:25 -0700 Subject: [PATCH 7/7] chore(#1278): bump version to 2.12.2 (test backfill + two 500 fixes) Co-Authored-By: Claude Opus 4.8 (1M context) --- makeabilitylab/settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/makeabilitylab/settings.py b/makeabilitylab/settings.py index dbb536eb..6ccce210 100644 --- a/makeabilitylab/settings.py +++ b/makeabilitylab/settings.py @@ -86,8 +86,8 @@ SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https') # Makeability Lab Global Variables, including Makeability Lab version -ML_WEBSITE_VERSION = "2.12.1" # Keep this updated with each release and also change the short description below -ML_WEBSITE_VERSION_DESCRIPTION = "Patch: tighten meta descriptions (#1142/#1324). Home now uses a concise description mirroring the hero blurb; projects without a one-line summary fall back to a truncated About instead of the generic lab boilerplate; the last-resort default is trimmed to ~135 chars. Reduces duplicate/over-long descriptions flagged by social/OG inspectors. Template/view-only — no schema change." +ML_WEBSITE_VERSION = "2.12.2" # Keep this updated with each release and also change the short description below +ML_WEBSITE_VERSION_DESCRIPTION = "Patch: test backfill for high-risk untested code (#1278 item 5) surfaced and fixed two latent 500s — Artifact.save() crashed re-saving an artifact with no PDF, and a project with no start_date 500'd its page. Adds a public-view smoke-sweep plus coverage for delete_unused_files, Person.save() side effects, and the pure utils (timeutils/ml_utils/fileutils), lifting app coverage 59%→69%. Two one-line model guards; no schema change." DATE_MAKEABILITYLAB_FORMED = datetime.date(2012, 1, 1) # Date Makeability Lab was formed MAX_BANNERS = 7 # Maximum number of banners on a page