From 7d845373e10b7f53ada02aa79ea9eef11c1b9f0a Mon Sep 17 00:00:00 2001 From: Jon Froehlich Date: Thu, 18 Jun 2026 12:32:02 -0700 Subject: [PATCH] refactor(seo): drop sitemap https pin; rely on request.scheme (#1329) Now that SECURE_PROXY_SSL_HEADER lets Django see the real https scheme behind UW CSE's Apache proxy (#1329), the sitemap no longer needs to pin protocol="https". Remove the _HttpsSitemap base so every sitemap inherits Django's default behavior, where URLs use request.scheme. This doubles as the on-server verification for #1329: /sitemap.xml on -test now reflects request.scheme directly, so if its URLs stay https the proxy header is being honored end-to-end (the meta-tag site_scheme workaround masks this everywhere else). Tests: replace the old "always https" assertion with one that drives the scheme via an X-Forwarded-Proto header under SECURE_PROXY_SSL_HEADER (the real #1329 path), plus a check that a plain request now reflects the request scheme rather than a hardcoded https. Co-Authored-By: Claude Opus 4.8 (1M context) --- website/sitemaps.py | 30 +++++++++++------------------- website/tests/test_sitemap.py | 34 ++++++++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/website/sitemaps.py b/website/sitemaps.py index 6ba055ce..5fc7d903 100644 --- a/website/sitemaps.py +++ b/website/sitemaps.py @@ -11,6 +11,13 @@ ``makeabilitylab.cs.washington.edu`` in prod, ``makeabilitylab-test...`` on the test server, and ``localhost`` in dev — no per-environment configuration. +Scheme handling: the ```` protocol comes from ``request.scheme``. Behind +UW CSE's TLS-terminating Apache proxy that is now ``https`` because +``SECURE_PROXY_SSL_HEADER`` trusts the proxy's ``X-Forwarded-Proto`` header +(#1329); in local dev it is ``http``. We previously pinned ``protocol="https"`` +to paper over Django seeing ``http`` behind the proxy — that workaround is gone +now that the scheme is correct at the framework level. + We map only the pages that have real, indexable URLs: - static listing pages (home, people, publications, projects, awards, news) - one entry per visible Project -> /project// @@ -37,22 +44,7 @@ def _latest(model, field): ) -class _HttpsSitemap(Sitemap): - """ - Base sitemap that pins generated URLs to the https scheme. - - Apache terminates TLS and proxies to Django over plain HTTP, so the - request scheme Django sees is ``http``. Without this, RequestSite would - emit ``http://`` URLs that only 302-redirect to https — making the - sitemap advertise non-canonical URLs with an extra hop. Pinning the - protocol here makes every sitemap list the canonical https URLs directly. - (Cosmetic only in local dev, where the site is served over http.) - """ - - protocol = "https" - - -class StaticViewSitemap(_HttpsSitemap): +class StaticViewSitemap(Sitemap): """Top-level listing pages that aren't tied to a single model instance.""" changefreq = "weekly" @@ -106,7 +98,7 @@ def lastmod(self, item): return None -class ProjectSitemap(_HttpsSitemap): +class ProjectSitemap(Sitemap): """Public project pages: /project//.""" changefreq = "weekly" @@ -127,7 +119,7 @@ def lastmod(self, obj): return obj.updated -class PersonSitemap(_HttpsSitemap): +class PersonSitemap(Sitemap): """Public people pages: /member//.""" changefreq = "monthly" @@ -153,7 +145,7 @@ def lastmod(self, obj): return obj.bio_datetime_modified -class NewsSitemap(_HttpsSitemap): +class NewsSitemap(Sitemap): """News item pages: /news//.""" changefreq = "monthly" diff --git a/website/tests/test_sitemap.py b/website/tests/test_sitemap.py index 4acc6fd7..c557c7f2 100644 --- a/website/tests/test_sitemap.py +++ b/website/tests/test_sitemap.py @@ -11,6 +11,8 @@ import re from datetime import date +from django.test import override_settings + from website.tests.base import DatabaseTestCase @@ -73,16 +75,36 @@ def test_static_listing_pages_have_lastmod(self): self.assertTrue(listing, "expected a /news/ listing entry in the sitemap") self.assertIn("", listing[0]) - def test_sitemap_uses_https_scheme(self): - # Apache proxies to Django over plain HTTP, so without a pinned - # protocol the URLs would be http:// and only 302-redirect to - # https. Every must be canonical https. See _HttpsSitemap. + @override_settings(SECURE_PROXY_SSL_HEADER=("HTTP_X_FORWARDED_PROTO", "https")) + def test_sitemap_honors_forwarded_proto_header(self): + # The sitemap scheme follows request.scheme (we no longer pin + # protocol="https"). Behind UW CSE's TLS-terminating proxy, Django + # reaches https via SECURE_PROXY_SSL_HEADER trusting X-Forwarded-Proto + # (#1329). Simulate that header here and confirm every is https. self.make_project(name="Scheme Proj", short_name="schemeproj", is_visible=True) - body = self.client.get("/sitemap.xml").content.decode() + body = self.client.get( + "/sitemap.xml", HTTP_X_FORWARDED_PROTO="https" + ).content.decode() locs = re.findall(r"(.*?)", body) self.assertTrue(locs) # guard against an empty sitemap passing vacuously self.assertFalse( [loc for loc in locs if not loc.startswith("https://")], - "all sitemap URLs should use the https scheme", + "with X-Forwarded-Proto=https the sitemap URLs should be https", + ) + + def test_sitemap_scheme_follows_request(self): + # With the protocol pin removed, a plain request (no forwarded-proto, + # no SECURE_PROXY_SSL_HEADER) reflects the request scheme — http here. + # This is the local-dev / direct-request case; the proxy supplies https + # in the deployed environments (see the test above and #1329). + self.make_project(name="Plain Proj", short_name="plainproj", + is_visible=True) + body = self.client.get("/sitemap.xml").content.decode() + locs = re.findall(r"(.*?)", body) + self.assertTrue(locs) + self.assertTrue( + all(loc.startswith("http://") for loc in locs), + "without a forwarded-proto header the sitemap should reflect the " + "request scheme (http) rather than a pinned https", )