Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 11 additions & 19 deletions website/sitemaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
``makeabilitylab.cs.washington.edu`` in prod, ``makeabilitylab-test...`` on the
test server, and ``localhost`` in dev — no per-environment configuration.

Scheme handling: the ``<loc>`` protocol comes from ``request.scheme``. Behind
UW CSE's TLS-terminating Apache proxy that is now ``https`` because
``SECURE_PROXY_SSL_HEADER`` trusts the proxy's ``X-Forwarded-Proto`` header
(#1329); in local dev it is ``http``. We previously pinned ``protocol="https"``
to paper over Django seeing ``http`` behind the proxy — that workaround is gone
now that the scheme is correct at the framework level.

We map only the pages that have real, indexable URLs:
- static listing pages (home, people, publications, projects, awards, news)
- one entry per visible Project -> /project/<short_name>/
Expand All @@ -37,22 +44,7 @@ def _latest(model, field):
)


class _HttpsSitemap(Sitemap):
"""
Base sitemap that pins generated URLs to the https scheme.

Apache terminates TLS and proxies to Django over plain HTTP, so the
request scheme Django sees is ``http``. Without this, RequestSite would
emit ``http://`` <loc> URLs that only 302-redirect to https — making the
sitemap advertise non-canonical URLs with an extra hop. Pinning the
protocol here makes every sitemap list the canonical https URLs directly.
(Cosmetic only in local dev, where the site is served over http.)
"""

protocol = "https"


class StaticViewSitemap(_HttpsSitemap):
class StaticViewSitemap(Sitemap):
"""Top-level listing pages that aren't tied to a single model instance."""

changefreq = "weekly"
Expand Down Expand Up @@ -106,7 +98,7 @@ def lastmod(self, item):
return None


class ProjectSitemap(_HttpsSitemap):
class ProjectSitemap(Sitemap):
"""Public project pages: /project/<short_name>/."""

changefreq = "weekly"
Expand All @@ -127,7 +119,7 @@ def lastmod(self, obj):
return obj.updated


class PersonSitemap(_HttpsSitemap):
class PersonSitemap(Sitemap):
"""Public people pages: /member/<url_name>/."""

changefreq = "monthly"
Expand All @@ -153,7 +145,7 @@ def lastmod(self, obj):
return obj.bio_datetime_modified


class NewsSitemap(_HttpsSitemap):
class NewsSitemap(Sitemap):
"""News item pages: /news/<slug>/."""

changefreq = "monthly"
Expand Down
34 changes: 28 additions & 6 deletions website/tests/test_sitemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import re
from datetime import date

from django.test import override_settings

from website.tests.base import DatabaseTestCase


Expand Down Expand Up @@ -73,16 +75,36 @@ def test_static_listing_pages_have_lastmod(self):
self.assertTrue(listing, "expected a /news/ listing entry in the sitemap")
self.assertIn("<lastmod>", listing[0])

def test_sitemap_uses_https_scheme(self):
# Apache proxies to Django over plain HTTP, so without a pinned
# protocol the <loc> URLs would be http:// and only 302-redirect to
# https. Every <loc> must be canonical https. See _HttpsSitemap.
@override_settings(SECURE_PROXY_SSL_HEADER=("HTTP_X_FORWARDED_PROTO", "https"))
def test_sitemap_honors_forwarded_proto_header(self):
# The sitemap scheme follows request.scheme (we no longer pin
# protocol="https"). Behind UW CSE's TLS-terminating proxy, Django
# reaches https via SECURE_PROXY_SSL_HEADER trusting X-Forwarded-Proto
# (#1329). Simulate that header here and confirm every <loc> is https.
self.make_project(name="Scheme Proj", short_name="schemeproj",
is_visible=True)
body = self.client.get("/sitemap.xml").content.decode()
body = self.client.get(
"/sitemap.xml", HTTP_X_FORWARDED_PROTO="https"
).content.decode()
locs = re.findall(r"<loc>(.*?)</loc>", body)
self.assertTrue(locs) # guard against an empty sitemap passing vacuously
self.assertFalse(
[loc for loc in locs if not loc.startswith("https://")],
"all sitemap <loc> URLs should use the https scheme",
"with X-Forwarded-Proto=https the sitemap <loc> URLs should be https",
)

def test_sitemap_scheme_follows_request(self):
# With the protocol pin removed, a plain request (no forwarded-proto,
# no SECURE_PROXY_SSL_HEADER) reflects the request scheme — http here.
# This is the local-dev / direct-request case; the proxy supplies https
# in the deployed environments (see the test above and #1329).
self.make_project(name="Plain Proj", short_name="plainproj",
is_visible=True)
body = self.client.get("/sitemap.xml").content.decode()
locs = re.findall(r"<loc>(.*?)</loc>", body)
self.assertTrue(locs)
self.assertTrue(
all(loc.startswith("http://") for loc in locs),
"without a forwarded-proto header the sitemap should reflect the "
"request scheme (http) rather than a pinned https",
)
Loading