From 8bbf4ea74031569e9353f1405551867bb259ef33 Mon Sep 17 00:00:00 2001 From: Jon Froehlich Date: Thu, 18 Jun 2026 13:22:06 -0700 Subject: [PATCH] refactor(seo): remove site_scheme workaround; use request.scheme (#1329) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With SECURE_PROXY_SSL_HEADER enabled (#1336) and verified end-to-end via the sitemap on -test (#1338), request.scheme is now correct everywhere behind UW CSE's TLS proxy. Remove the in-app site_scheme workaround that pinned https off DJANGO_ENV (#1236) and rely on the framework: - Delete the site_scheme context processor (+ its TEMPLATES registration) and the site_scheme helper in website/utils/metadata.py. - absolute_url() (view-built JSON-LD URLs) now uses request.scheme. - Templates (base/member/news_item/project) build canonical/OG/Twitter URLs from {{ request.scheme }} instead of {{ site_scheme }}. Tests: rework the scheme assertions to exercise the real mechanism — an X-Forwarded-Proto: https request under SECURE_PROXY_SSL_HEADER emits https; a secure request emits https; a plain http request reflects the request scheme. Full suite green (344 tests). Closes #1329. Co-Authored-By: Claude Opus 4.8 (1M context) --- makeabilitylab/settings.py | 1 - website/context_processors.py | 14 ----- website/templates/website/base.html | 13 ++--- website/templates/website/member.html | 4 +- website/templates/website/news_item.html | 8 +-- website/templates/website/project.html | 4 +- website/tests/test_page_metadata.py | 68 ++++++++++++++---------- website/utils/metadata.py | 28 +++------- 8 files changed, 63 insertions(+), 77 deletions(-) diff --git a/makeabilitylab/settings.py b/makeabilitylab/settings.py index 6ccce210..39ea0479 100644 --- a/makeabilitylab/settings.py +++ b/makeabilitylab/settings.py @@ -281,7 +281,6 @@ 'django.contrib.messages.context_processors.messages', 'website.context_processors.recent_news', 'website.context_processors.admin_version_info', - 'website.context_processors.site_scheme', ], }, }, diff --git a/website/context_processors.py b/website/context_processors.py index 90053933..64a26216 100644 --- a/website/context_processors.py +++ b/website/context_processors.py @@ -9,7 +9,6 @@ from .models import News from django.conf import settings -from website.utils.metadata import site_scheme as metadata_site_scheme def recent_news(request): """ context processors returning recent news """ @@ -18,19 +17,6 @@ def recent_news(request): return { 'recent_news': news_items, } -def site_scheme(request): - """ - Expose the canonical URL scheme (``http`` / ``https``) to every template, so - templates can build absolute URLs as - ``{{ site_scheme }}://{{ request.get_host }}{{ path }}`` that aren't ``http://`` - behind UW CSE's TLS-terminating proxy (issue #1236). - - Delegates to :func:`website.utils.metadata.site_scheme` (the single source of - truth, keyed on ``DJANGO_ENV``) so the scheme used by view-built JSON-LD URLs - and template-built canonical/OG URLs can't drift. - """ - return {'site_scheme': metadata_site_scheme(request)} - def admin_version_info(request): """ Make version and debug info available to all templates. diff --git a/website/templates/website/base.html b/website/templates/website/base.html index ba874025..7d5bbbdc 100644 --- a/website/templates/website/base.html +++ b/website/templates/website/base.html @@ -69,8 +69,9 @@ by , og:description, and twitter:description (no duplication). - Absolute URLs use {{ site_scheme }} (https on the servers, http in local dev) - so they don't advertise http:// behind the TLS proxy — see #1236. + Absolute URLs use request.scheme (https on the servers, http in local dev). + It's correct behind the TLS proxy because SECURE_PROXY_SSL_HEADER trusts the + proxy's X-Forwarded-Proto header — see #1329 (which replaced the #1236 fix). Detail templates customize via: {% block social_image %} — og:image + twitter:image (default: lab logo) @@ -81,7 +82,7 @@ {% with meta_title=page_meta.title|default:"Makeability Lab" og_type=page_meta.og_type|default:"website" canonical_path=page_meta.canonical_path|default:request.path %} {% firstof page_meta.description "The Makeability Lab is an advanced research lab in Human-AI directed by Prof. Jon E. Froehlich at UW's Allen School of Computer Science." as meta_description %} - + @@ -89,11 +90,11 @@ - + {% block social_image %} - + - + {% endblock social_image %} {% block meta_extra %}{% endblock %} diff --git a/website/templates/website/member.html b/website/templates/website/member.html index bc06eb94..b0f84304 100644 --- a/website/templates/website/member.html +++ b/website/templates/website/member.html @@ -56,11 +56,11 @@ {% block social_image %} {% thumbnail person.image '1200x630' box=person.cropping crop=True upscale=True as og_img %} {% if og_img %} - + - + {% else %} {{ block.super }} {% endif %} diff --git a/website/templates/website/news_item.html b/website/templates/website/news_item.html index 17173c7d..d4522bc4 100644 --- a/website/templates/website/news_item.html +++ b/website/templates/website/news_item.html @@ -48,15 +48,15 @@ {% block social_image %} {% if news_item.image %}{% thumbnail news_item.image '1200x630' box=news_item.cropping crop=True upscale=True as og_img %}{% endif %} {% if og_img %} - + - + {% else %} - + - + {% endif %} {% endblock social_image %} diff --git a/website/templates/website/project.html b/website/templates/website/project.html index 9c3c7099..1f2872e5 100644 --- a/website/templates/website/project.html +++ b/website/templates/website/project.html @@ -70,11 +70,11 @@ {% block social_image %} {% thumbnail project.gallery_image 1200x630 box=project.cropping crop=True upscale=True as og_img %} {% if og_img %} - + - + {% else %} {{ block.super }} {% endif %} diff --git a/website/tests/test_page_metadata.py b/website/tests/test_page_metadata.py index dd840bd8..02d0dc16 100644 --- a/website/tests/test_page_metadata.py +++ b/website/tests/test_page_metadata.py @@ -7,13 +7,13 @@ it) is caught. They pin three things: * canonical + Open Graph + Twitter Card tags are present on every page type; - * absolute URLs use ``https`` behind the proxy (the #1236 fix), driven by the - ``site_scheme`` context processor — verified by toggling ``DEBUG``; + * absolute URLs follow ``request.scheme`` — https behind the proxy because + ``SECURE_PROXY_SSL_HEADER`` trusts the proxy's ``X-Forwarded-Proto`` header + (#1329, which replaced the #1236 ``site_scheme`` workaround); * detail pages emit distinct, per-page titles/descriptions/types rather than the single generic site description. -See website/templates/website/base.html, website/context_processors.py, and -website/utils/metadata.py. +See website/templates/website/base.html and website/utils/metadata.py. """ import json @@ -46,15 +46,17 @@ def _position(person, title=None): ) -# On the servers (DJANGO_ENV TEST/PROD) site_scheme pins https — assert against -# that (the test client's host is "testserver", auto-added to ALLOWED_HOSTS). -# NB: DJANGO_ENV, not DEBUG — the test server runs DEBUG=True behind the proxy -# (see PageMetadataSchemeTests for the regression that motivated this). -@override_settings(DJANGO_ENV='PROD') +# Absolute URLs follow request.scheme. We issue secure (https) requests to mirror +# the deployed servers, where SECURE_PROXY_SSL_HEADER makes request.scheme https +# behind the TLS proxy (see PageMetadataSchemeTests for the header-driven path). +# The test client's host is "testserver", auto-added to ALLOWED_HOSTS. class PageMetadataHttpsTests(DatabaseTestCase): + # Requests are issued with secure=True so request.scheme == "https", mirroring + # the deployed servers where SECURE_PROXY_SSL_HEADER makes the scheme https + # behind the TLS proxy. def test_home_has_core_metadata(self): - resp = self.client.get(reverse("website:index")) + resp = self.client.get(reverse("website:index"), secure=True) self.assertEqual(resp.status_code, 200) # Canonical present and https. self.assertContains(resp, '') @@ -67,7 +69,7 @@ def test_home_has_core_metadata(self): def test_no_http_scheme_in_social_urls(self): """#1236: og:url / og:image / canonical must never advertise http://.""" - resp = self.client.get(reverse("website:index")) + resp = self.client.get(reverse("website:index"), secure=True) self.assertNotContains(resp, 'property="og:url" content="http://') self.assertNotContains(resp, 'property="og:image" content="http://') self.assertNotContains(resp, 'rel="canonical" href="http://') @@ -78,7 +80,7 @@ def test_project_detail_metadata(self): start_date=date(2020, 1, 1), summary="SoundWatch is a smartwatch system for sound awareness.", ) - resp = self.client.get(reverse("website:project", args=[project.short_name])) + resp = self.client.get(reverse("website:project", args=[project.short_name]), secure=True) self.assertEqual(resp.status_code, 200) # Canonical matches the sitemap's reverse()-built URL exactly. canonical = "https://testserver" + reverse("website:project", args=[project.short_name]) @@ -94,7 +96,8 @@ def test_member_detail_metadata(self): bio="Ada researches accessible computing.") _position(person) resp = self.client.get( - reverse("website:member_by_name", kwargs={"member_name": person.url_name}) + reverse("website:member_by_name", kwargs={"member_name": person.url_name}), + secure=True, ) self.assertEqual(resp.status_code, 200) self.assertContains(resp, '') @@ -113,7 +116,8 @@ def test_news_detail_metadata(self): content="The Makeability Lab won a best paper award at CHI.", ) resp = self.client.get( - reverse("website:news_item_by_id", kwargs={"id": item.id}) + reverse("website:news_item_by_id", kwargs={"id": item.id}), + secure=True, ) self.assertEqual(resp.status_code, 200) self.assertContains(resp, '') @@ -245,25 +249,35 @@ def test_project_without_summary_or_about_uses_trimmed_default(self): class PageMetadataSchemeTests(DatabaseTestCase): - """site_scheme keys off DJANGO_ENV, not DEBUG. The test server runs DEBUG=True - behind the same TLS proxy as prod, so a DEBUG-based check would emit http:// - there (regression for the #1236 follow-up fix).""" - - @override_settings(DJANGO_ENV='TEST', DEBUG=True) - def test_test_server_uses_https_even_with_debug_true(self): - """The crux: DJANGO_ENV=TEST + DEBUG=True must still emit https.""" - resp = self.client.get(reverse("website:index")) + """Absolute URLs follow ``request.scheme``. Behind UW CSE's TLS-terminating + proxy that is https because ``SECURE_PROXY_SSL_HEADER`` trusts the proxy's + ``X-Forwarded-Proto`` header (#1329, which replaced the DJANGO_ENV-keyed + ``site_scheme`` workaround from #1236).""" + + @override_settings(SECURE_PROXY_SSL_HEADER=("HTTP_X_FORWARDED_PROTO", "https")) + def test_forwarded_proto_header_drives_https(self): + """The deployed path: an http request carrying X-Forwarded-Proto: https + is treated as secure, so absolute URLs emit https — exactly what Apache + forwards to the container (#1329). This is the crux that the old + DEBUG-based check got wrong on the test server (#1236).""" + # Send a clean Host header too (as Apache does), so get_host() doesn't + # append the test client's :80 port to the now-https URL. + resp = self.client.get( + reverse("website:index"), + HTTP_X_FORWARDED_PROTO="https", + HTTP_HOST="testserver", + ) self.assertContains(resp, '') self.assertContains(resp, '') - @override_settings(DJANGO_ENV='PROD', DEBUG=False) - def test_prod_uses_https(self): - resp = self.client.get(reverse("website:index")) + def test_secure_request_uses_https(self): + """A direct TLS request (request.scheme == https) emits https URLs.""" + resp = self.client.get(reverse("website:index"), secure=True) + self.assertContains(resp, '') self.assertContains(resp, '') - @override_settings(DJANGO_ENV='DEBUG', DEBUG=True) def test_local_dev_uses_request_scheme(self): - """Local dev (DJANGO_ENV=DEBUG/unset over http) follows request.scheme.""" + """Local dev over http (no proxy header) follows request.scheme — http.""" resp = self.client.get(reverse("website:index")) self.assertContains(resp, '') self.assertContains(resp, '') diff --git a/website/utils/metadata.py b/website/utils/metadata.py index e1189bc3..a6fe1ab6 100644 --- a/website/utils/metadata.py +++ b/website/utils/metadata.py @@ -11,7 +11,6 @@ import json -from django.conf import settings from django.utils.html import strip_tags from django.utils.safestring import mark_safe from django.utils.text import Truncator @@ -43,30 +42,17 @@ def meta_description(html, max_chars=META_DESCRIPTION_MAX_CHARS): return Truncator(text).chars(max_chars) -def site_scheme(request): - """ - The canonical scheme for absolute URLs (https on the test/prod servers, - ``request.scheme`` in local dev). Single source of truth — the ``site_scheme`` - context processor delegates here, so view-built JSON-LD URLs and template-built - canonical/OG URLs always agree. - - We key off ``DJANGO_ENV``, NOT ``DEBUG``: the **test** server runs with - ``DEBUG=True`` (config-test.ini) yet sits behind the same TLS-terminating Apache - proxy as prod, so a DEBUG-based check emits ``http://`` on test — the exact #1236 - bug. ``DJANGO_ENV`` is ``'TEST'``/``'PROD'`` on the servers (set by - rebuildanddeploy.sh) and ``'DEBUG'``/unset locally, which is the signal we want. +def absolute_url(request, path): + """Build an absolute, scheme-correct URL from a root-relative path. - NOTE: in-repo workaround. #1329 (IT enabling SECURE_PROXY_SSL_HEADER) would make - request.scheme correct everywhere and let this fall back to it. + Uses ``request.scheme``, which is correct in every environment now that + ``SECURE_PROXY_SSL_HEADER`` trusts the proxy's ``X-Forwarded-Proto`` header + behind UW CSE's TLS-terminating Apache (#1329) — https on test/prod, http in + local dev. This replaced the old ``site_scheme`` DJANGO_ENV workaround (#1236). """ - return 'https' if settings.DJANGO_ENV in ('PROD', 'TEST') else request.scheme - - -def absolute_url(request, path): - """Build an absolute, scheme-correct URL from a root-relative path.""" if not path: return None - return f"{site_scheme(request)}://{request.get_host()}{path}" + return f"{request.scheme}://{request.get_host()}{path}" def render_jsonld(data):