diff --git a/website/sitemaps.py b/website/sitemaps.py index 58a893f5..6ba055ce 100644 --- a/website/sitemaps.py +++ b/website/sitemaps.py @@ -24,7 +24,17 @@ from django.contrib.sitemaps import Sitemap from django.urls import reverse -from website.models import Project, Person, News +from website.models import Project, Person, News, Publication, Award + + +def _latest(model, field): + """Return the most recent non-null value of ``field`` across ``model``, or None.""" + return ( + model.objects.filter(**{f"{field}__isnull": False}) + .order_by(f"-{field}") + .values_list(field, flat=True) + .first() + ) class _HttpsSitemap(Sitemap): @@ -62,6 +72,39 @@ def items(self): def location(self, item): return reverse(item) + def lastmod(self, item): + """ + Most-recent content date for each listing page, so the sitemap signals + when a section last changed (helps crawlers prioritize re-crawls). + + Returns ``None`` for an empty section; the framework then simply omits + ```` for that URL. Some sections expose a ``date`` (plain date) + and people a datetime — Django's ``get_latest_lastmod`` catches the + resulting mixed-type ``max()`` and just drops the sitemap-level lastmod, + so this is safe. + """ + if item == "website:people": + return _latest(Person, "bio_datetime_modified") + if item == "website:publications": + return _latest(Publication, "date") + if item == "website:projects": + return _latest(Project, "updated") + if item == "website:awards": + return _latest(Award, "date") + if item == "website:news_listing": + return _latest(News, "date") + if item == "website:index": + # Home page surfaces recent content across sections; use the most + # recent of news, publications, and project updates. + candidates = [ + _latest(News, "date"), + _latest(Publication, "date"), + _latest(Project, "updated"), + ] + candidates = [c for c in candidates if c is not None] + return max(candidates) if candidates else None + return None + class ProjectSitemap(_HttpsSitemap): """Public project pages: /project//.""" diff --git a/website/tests/test_sitemap.py b/website/tests/test_sitemap.py index b1949f47..4acc6fd7 100644 --- a/website/tests/test_sitemap.py +++ b/website/tests/test_sitemap.py @@ -60,6 +60,19 @@ def test_sitemap_includes_news_item(self): body = self.client.get("/sitemap.xml").content.decode() self.assertIn(f"/news/{news.slug}/", body) + def test_static_listing_pages_have_lastmod(self): + # The listing pages should advertise a sourced from their + # most-recent content, not be the only entries with none. Create a news + # item so the news/home/listing sections are non-empty. + self.make_news_item(title="Dated News") + body = self.client.get("/sitemap.xml").content.decode() + # Pull the block for the /news/ listing and assert it carries a + # . (Detail-page news URLs look like /news//.) + url_blocks = re.findall(r"(.*?)", body, re.DOTALL) + listing = [b for b in url_blocks if re.search(r"[^<]*/news/", b)] + self.assertTrue(listing, "expected a /news/ listing entry in the sitemap") + self.assertIn("", listing[0]) + def test_sitemap_uses_https_scheme(self): # Apache proxies to Django over plain HTTP, so without a pinned # protocol the URLs would be http:// and only 302-redirect to