Skip to content
4 changes: 2 additions & 2 deletions makeabilitylab/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@
SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https')

# Makeability Lab Global Variables, including Makeability Lab version
ML_WEBSITE_VERSION = "2.12.1" # Keep this updated with each release and also change the short description below
ML_WEBSITE_VERSION_DESCRIPTION = "Patch: tighten meta descriptions (#1142/#1324). Home now uses a concise description mirroring the hero blurb; projects without a one-line summary fall back to a truncated About instead of the generic lab boilerplate; the last-resort default is trimmed to ~135 chars. Reduces duplicate/over-long descriptions flagged by social/OG inspectors. Template/view-only — no schema change."
ML_WEBSITE_VERSION = "2.12.2" # Keep this updated with each release and also change the short description below
ML_WEBSITE_VERSION_DESCRIPTION = "Patch: test backfill for high-risk untested code (#1278 item 5) surfaced and fixed two latent 500s — Artifact.save() crashed re-saving an artifact with no PDF, and a project with no start_date 500'd its page. Adds a public-view smoke-sweep plus coverage for delete_unused_files, Person.save() side effects, and the pure utils (timeutils/ml_utils/fileutils), lifting app coverage 59%→69%. Two one-line model guards; no schema change."
DATE_MAKEABILITYLAB_FORMED = datetime.date(2012, 1, 1) # Date Makeability Lab was formed
MAX_BANNERS = 7 # Maximum number of banners on a page

Expand Down
48 changes: 26 additions & 22 deletions website/models/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,28 +307,32 @@ def save(self, *args, **kwargs):
else:
_logger.debug("No authors exist yet, so will wait for m2m authors_changed to rename files")

# Generate a thumbnail if one does not already exist
pdf_filename = os.path.basename(self.pdf_file.name)
pdf_filename_no_ext, ext = os.path.splitext(pdf_filename)
thumbnail_filename = os.path.basename(pdf_filename_no_ext) + ".jpg"
thumbnail_filename_with_local_path = self.get_upload_thumbnail_dir(thumbnail_filename)
thumbnail_exists_in_storage = self.thumbnail.storage.exists(thumbnail_filename_with_local_path)
if not self.thumbnail or not thumbnail_exists_in_storage:
_logger.debug(f"The thumbnail for artifact.id={self.id} does not exist at {thumbnail_filename_with_local_path}, generating...")

# generate a thumbnail
if self.pdf_file.storage.exists(self.pdf_file.name):
thumbnail_local_path = os.path.dirname(thumbnail_filename_with_local_path)
ml_fileutils.generate_thumbnail_for_pdf(self.pdf_file, self.thumbnail, thumbnail_local_path)

# If 'update_fields' does not exist in kwargs, all fields are saved
# Add 'thumbnail' to the update_fields list so that it gets updated in the db
if 'update_fields' in kwargs:
kwargs.setdefault('update_fields', []).append('thumbnail')
else:
_logger.debug(f"Could not generate a thumbnail because the pdf {self.pdf_file.path} was not found in storage")
elif thumbnail_exists_in_storage:
_logger.debug(f"The thumbnail for artifact.id={self.id} already exists at {thumbnail_filename_with_local_path}, so not generating")
# Generate a thumbnail if one does not already exist. Guard on
# pdf_file: it's nullable, and self.pdf_file.name is None when empty,
# which would crash os.path.basename below (#1278). No PDF simply
# means there is no thumbnail to generate.
if self.pdf_file:
pdf_filename = os.path.basename(self.pdf_file.name)
pdf_filename_no_ext, ext = os.path.splitext(pdf_filename)
thumbnail_filename = os.path.basename(pdf_filename_no_ext) + ".jpg"
thumbnail_filename_with_local_path = self.get_upload_thumbnail_dir(thumbnail_filename)
thumbnail_exists_in_storage = self.thumbnail.storage.exists(thumbnail_filename_with_local_path)
if not self.thumbnail or not thumbnail_exists_in_storage:
_logger.debug(f"The thumbnail for artifact.id={self.id} does not exist at {thumbnail_filename_with_local_path}, generating...")

# generate a thumbnail
if self.pdf_file.storage.exists(self.pdf_file.name):
thumbnail_local_path = os.path.dirname(thumbnail_filename_with_local_path)
ml_fileutils.generate_thumbnail_for_pdf(self.pdf_file, self.thumbnail, thumbnail_local_path)

# If 'update_fields' does not exist in kwargs, all fields are saved
# Add 'thumbnail' to the update_fields list so that it gets updated in the db
if 'update_fields' in kwargs:
kwargs.setdefault('update_fields', []).append('thumbnail')
else:
_logger.debug(f"Could not generate a thumbnail because the pdf {self.pdf_file.path} was not found in storage")
elif thumbnail_exists_in_storage:
_logger.debug(f"The thumbnail for artifact.id={self.id} already exists at {thumbnail_filename_with_local_path}, so not generating")

_logger.debug(f"Calling super().save(*args, **kwargs)")

Expand Down
6 changes: 6 additions & 0 deletions website/models/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,12 @@ def get_project_dates_str(self):
If the end date is None, it returns a string in the format 'start_year–Present'.
Otherwise, it returns a string in the format 'start_year–end_year'.
"""
# start_date is nullable, so a project may have none. Without it there's
# no range to format; return "" so the template renders nothing rather
# than 500ing on self.start_date.year (#1278).
if self.start_date is None:
return ""

# If end_date is None, return 'start_year–Present'
if self.end_date is None:
return f"{self.start_date.year}–Present"
Expand Down
30 changes: 30 additions & 0 deletions website/tests/test_artifact.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
"""Tests for Artifact model methods (filename-drift check, raw-file label)."""

from datetime import date
from unittest.mock import MagicMock, patch

from django.test import SimpleTestCase

from website.models import Publication
from website.tests.base import DatabaseTestCase


# --- Artifact filename check regression -----------------------------------

Expand Down Expand Up @@ -113,3 +117,29 @@ def test_no_raw_file_returns_none(self):

def test_no_extension_returns_none(self):
self.assertIsNone(self._label("talks/Doe2020Title"))


# --- Artifact.save() with no PDF ------------------------------------------


class ArtifactSaveNullPdfTests(DatabaseTestCase):
"""
Regression test for Artifact.save() when ``pdf_file`` is empty (#1278).

``pdf_file`` is nullable (``null=True, default=None``), so an artifact can
legitimately exist without a PDF. But the thumbnail-generation block in
Artifact.save() ran ``os.path.basename(self.pdf_file.name)`` unconditionally
on every non-first save -- and ``self.pdf_file.name`` is ``None`` when the
field is empty, raising ``TypeError: expected str ... not NoneType``.

Any second save of a PDF-less artifact triggered it: an admin edit, or the
``authors_changed`` m2m signal re-saving to rename files. This pins the
guard so a missing PDF simply means "no thumbnail to generate".
"""

def test_resaving_artifact_without_pdf_does_not_crash(self):
pub = Publication.objects.create(title="No PDF", date=date(2024, 1, 1))
# First save (objects.create) is fine; the crash was on the *second*.
pub.location = "Seattle, WA"
pub.save() # must not raise
self.assertFalse(bool(Publication.objects.get(pk=pub.pk).pdf_file))
155 changes: 155 additions & 0 deletions website/tests/test_delete_unused_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
"""
Regression tests for the ``delete_unused_files`` management command (#1278, item 5).

This command runs on **every container start** (see ``docker-entrypoint.sh``) and
**deletes files** off the media filesystem, so it is the single highest-risk
untested code path in the app: a logic slip here silently destroys real
publication / talk / poster PDFs and thumbnails in production. Until now it had
zero tests.

The command globs ``MEDIA_ROOT/{publications,talks,posters}`` (and their
``images/`` thumbnail subdirs) for files, removes from that set anything still
referenced by a DB row, and deletes whatever is left over. These tests pin the
three behaviors that matter:

1. **Orphans are deleted, referenced files are kept** — the core contract.
2. **easy-thumbnails ``_detail`` cache files are never touched** — they are
owned by ``thumbnail_cleanup``; deleting them here would fight that command.
3. **It never crashes** on empty media dirs, zero DB rows, or a row whose
``FileField`` is empty (the null-``.path`` crash class called out in #1278).

Every test runs against a throwaway ``MEDIA_ROOT`` (a temp dir wired in via
``override_settings``) so no real media is ever at risk.
"""

import os
import shutil
import tempfile
from datetime import date

from django.core.management import call_command
from django.test import override_settings

from website.models import Publication
from website.tests.base import DatabaseTestCase


class DeleteUnusedFilesTests(DatabaseTestCase):
"""Exercise ``manage.py delete_unused_files`` against a temp MEDIA_ROOT."""

def setUp(self):
super().setUp()
# A disposable media root so model saves and the command's deletions
# only ever touch files under here, never the developer's real media/.
self.media_root = tempfile.mkdtemp(prefix="ml_media_test_")
self.addCleanup(shutil.rmtree, self.media_root, ignore_errors=True)

override = override_settings(MEDIA_ROOT=self.media_root)
override.enable()
self.addCleanup(override.disable)

# The command globs these dirs; create them so an empty run has
# something to glob (mirrors a freshly-deployed container).
for sub in ("publications/images", "talks/images", "posters/images"):
os.makedirs(os.path.join(self.media_root, sub), exist_ok=True)

def _write(self, relpath, content=b"unused"):
"""Write a stray file under MEDIA_ROOT and return its absolute path."""
full = os.path.join(self.media_root, relpath)
os.makedirs(os.path.dirname(full), exist_ok=True)
with open(full, "wb") as fh:
fh.write(content)
return full

def test_orphan_publication_pdf_is_deleted_referenced_is_kept(self):
"""The whole point: drop the orphan, keep the file a DB row points at."""
pub = self.make_publication(title="Kept Paper")
referenced = pub.pdf_file.path
self.assertTrue(os.path.exists(referenced))

orphan = self._write("publications/orphan_abandoned.pdf", b"%PDF-1.4 orphan")

call_command("delete_unused_files")

self.assertFalse(os.path.exists(orphan), "unreferenced PDF should be deleted")
self.assertTrue(os.path.exists(referenced), "referenced PDF must be kept")

def test_easy_thumbnail_detail_files_are_preserved(self):
"""``_detail`` cache files belong to thumbnail_cleanup, not this command."""
detail = self._write(
"publications/images/Foo_CHI2022.jpg.300x0_q85_detail.jpg"
)
orphan_thumb = self._write("publications/images/orphan_thumb.jpg")

call_command("delete_unused_files")

self.assertTrue(
os.path.exists(detail),
"_detail easy-thumbnail file must be preserved",
)
self.assertFalse(
os.path.exists(orphan_thumb),
"unreferenced thumbnail should be deleted",
)

def test_orphan_talk_pdf_and_raw_files_are_deleted(self):
"""Talks: orphan .pdf/.pptx/.key go; the referenced talk PDF stays."""
talk = self.make_talk(title="Kept Talk")
referenced = talk.pdf_file.path

orphan_pdf = self._write("talks/orphan_talk.pdf")
orphan_pptx = self._write("talks/orphan_deck.pptx")
orphan_key = self._write("talks/orphan_deck.key")

call_command("delete_unused_files")

self.assertTrue(os.path.exists(referenced), "referenced talk PDF must be kept")
for stray in (orphan_pdf, orphan_pptx, orphan_key):
self.assertFalse(os.path.exists(stray), f"{stray} should be deleted")

def test_orphan_poster_files_are_deleted(self):
"""Posters: the raw set is .pptx/.key/.ai (note .ai, unlike talks)."""
strays = [
self._write("posters/orphan_poster.pdf"),
self._write("posters/orphan_poster.ai"),
self._write("posters/orphan_poster.key"),
self._write("posters/orphan_poster.pptx"),
]

call_command("delete_unused_files")

for stray in strays:
self.assertFalse(os.path.exists(stray), f"{stray} should be deleted")

def test_delete_unused_files_helper_reports_count_and_bytes(self):
"""The low-level helper returns an accurate (count, total_bytes) tally."""
from website.management.commands.delete_unused_files import Command

f1 = self._write("publications/a.pdf", b"12345") # 5 bytes
f2 = self._write("publications/b.pdf", b"678") # 3 bytes

count, total_bytes = Command().delete_unused_files([f1, f2])

self.assertEqual(count, 2)
self.assertEqual(total_bytes, 8)
self.assertFalse(os.path.exists(f1))
self.assertFalse(os.path.exists(f2))

def test_runs_cleanly_on_empty_media_and_no_db_rows(self):
"""Fresh deploy: empty media dirs, no DB rows -> no exception, no deletions."""
call_command("delete_unused_files") # reaching the next line == no crash

for sub in ("publications", "talks", "posters"):
self.assertTrue(os.path.isdir(os.path.join(self.media_root, sub)))

def test_artifact_with_empty_pdf_field_does_not_crash(self):
"""A row whose pdf_file is empty must not crash the guarded .path access."""
# Guards the null-FileField crash class flagged in #1278: the command's
# `if pub.pdf_file:` check must short-circuit before touching `.path`.
# Built via objects.create (a single, first-time save) so this test
# isolates the *command's* guard; the separate Artifact.save() null-pdf
# re-save crash is pinned in test_artifact.py.
Publication.objects.create(title="No PDF", date=date(2024, 1, 1))
self.assertFalse(bool(Publication.objects.get(title="No PDF").pdf_file))

call_command("delete_unused_files") # no AttributeError on empty .path
97 changes: 97 additions & 0 deletions website/tests/test_fileutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""Unit tests for the pure filename helpers in website.utils.fileutils (#1278, item 5).

These build the deterministic, archive-friendly filenames the artifact rename
pipeline depends on (get_filename_for_artifact and friends). The Star Wars
image helpers in this module are already covered by test_easter_egg_picker;
the filesystem-touching helpers (thumbnail generation, PDF page count) are left
to integration coverage. No DB.
"""

import os
from datetime import date

from django.test import SimpleTestCase

from website.utils.fileutils import (
ensure_filename_is_unique,
get_ckeditor_image_filename,
get_filename_for_artifact,
get_filename_no_ext,
get_filename_without_ext_for_artifact,
is_image,
)


class IsImageTests(SimpleTestCase):
def test_known_image_extensions(self):
for name in ("photo.jpg", "PHOTO.JPEG", "art.png", "anim.gif"):
self.assertTrue(is_image(name), name)

def test_non_image_extensions(self):
for name in ("doc.pdf", "deck.pptx", "noext"):
self.assertFalse(is_image(name), name)


class GetCkeditorImageFilenameTests(SimpleTestCase):
def test_uppercases_filename(self):
self.assertEqual(get_ckeditor_image_filename("My File.png"), "MY FILE.PNG")


class GetFilenameNoExtTests(SimpleTestCase):
def test_strips_path_and_extension(self):
self.assertEqual(
get_filename_no_ext("publications/Doe_Paper_CHI2022.pdf"),
"Doe_Paper_CHI2022",
)


class GetFilenameForArtifactTests(SimpleTestCase):
"""The canonical Lastname_Title_Forum+Year naming used for archived files."""

def test_basic_filename(self):
self.assertEqual(
get_filename_for_artifact(
"Froehlich", "This Is A Test", "CHI", date(2022, 1, 1), "pdf"
),
"Froehlich_ThisIsATest_CHI2022.pdf",
)

def test_extension_normalized_with_or_without_dot(self):
with_dot = get_filename_for_artifact(
"Doe", "Paper", "UIST", date(2021, 1, 1), ".pdf"
)
without_dot = get_filename_for_artifact(
"Doe", "Paper", "UIST", date(2021, 1, 1), "pdf"
)
self.assertEqual(with_dot, without_dot)
self.assertTrue(with_dot.endswith(".pdf"))

def test_missing_last_name_uses_none_placeholder(self):
self.assertTrue(
get_filename_without_ext_for_artifact(
"", "Paper", "CHI", date(2022, 1, 1)
).startswith("None_")
)

def test_suffix_is_inserted_before_forum(self):
self.assertEqual(
get_filename_without_ext_for_artifact(
"Doe", "Paper", "CHI", date(2022, 1, 1), suffix="poster"
),
"Doe_Paper_poster_CHI2022",
)

def test_title_truncation(self):
result = get_filename_without_ext_for_artifact(
"Doe", "A Very Long Title Here", "CHI", date(2022, 1, 1),
max_pub_title_length=5,
)
# Title portion (between the underscores) is capped at 5 chars.
title_part = result.split("_")[1]
self.assertEqual(len(title_part), 5)


class EnsureFilenameIsUniqueTests(SimpleTestCase):
def test_nonexistent_path_returned_unchanged(self):
path = os.path.join("/tmp", "definitely-not-a-real-file-1278.pdf")
self.assertEqual(ensure_filename_is_unique(path), path)
Loading
Loading