From c6d8065e2db9af13583bfcf0eee4836b0c8738cc Mon Sep 17 00:00:00 2001 From: James Estevez Date: Wed, 6 May 2026 22:39:51 -0700 Subject: [PATCH 01/10] spike(phase-0): compare base-image candidates for container runtime Switch the Dockerfile FROM python:3.12 to python:3.12-slim-bookworm to shrink the image by ~510 MB and drop 66 high/critical base-image CVEs without touching requirements/*.txt or Phase 2 work. Required slim shims are added in-line: build-essential plus -dev headers for source-built wheels (multidict, etc.), and a setuptools<81 pin so cumulusci's pkg_resources.declare_namespace import keeps working under modern pip. Pin the slim base to -bookworm explicitly: the unpinned python:3.12-slim tag now resolves to debian trixie (gcc 14), whose stricter default warnings break multidict 6.0.4's pre-3.12-CPython C source. Co-authored-by: Cursor --- Dockerfile | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9fb367480..0972e1089 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ ARG BUILD_ENV=development ARG PROD_ASSETS ARG OMNIOUT_TOKEN FROM node:22 AS node_base -FROM python:3.12 +FROM python:3.12-slim-bookworm # Node and npm COPY --from=node_base /usr/local/lib/node_modules /usr/local/lib/node_modules @@ -16,13 +16,25 @@ RUN ln -s /opt/yarn/bin/yarnpkg /usr/local/bin/yarnpkg RUN node --version && npm --version && yarn --version # System setup: +# slim base lacks compilers and -dev headers needed to build wheels +# for cryptography, lxml, psycopg2-binary, etc. Add toolchain deps. RUN apt-get update \ - && apt-get install -y gettext redis-tools --no-install-recommends \ + && apt-get install -y --no-install-recommends \ + gettext \ + redis-tools \ + build-essential \ + libxml2-dev \ + libxslt-dev \ + libpq-dev \ + libffi-dev \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* # Python context setup: -RUN pip install --no-cache-dir --upgrade pip pip-tools +# setuptools<81 keeps the legacy pkg_resources.declare_namespace API +# that cumulusci's __init__ relies on. The full python:3.12 image +# ships an older setuptools by default; slim does not, so pin it. +RUN pip install --no-cache-dir --upgrade pip pip-tools "setuptools<81" # ================ ENVIRONMENT ENV PYTHONUNBUFFERED=1 @@ -40,7 +52,7 @@ RUN npm install --location=global sfdx-cli --ignore-scripts # Python requirements: COPY ./requirements requirements -RUN pip install --no-cache-dir --upgrade pip pip-tools \ +RUN pip install --no-cache-dir --upgrade pip pip-tools "setuptools<81" \ && pip install --no-cache-dir -r requirements/prod.txt RUN pip install --no-cache-dir -r requirements/dev.txt From 60d1f84bab4b749765a51fa247716b2469b2b8fc Mon Sep 17 00:00:00 2001 From: James Estevez Date: Wed, 6 May 2026 22:56:28 -0700 Subject: [PATCH 02/10] spike(phase-0): polish per code-quality review (M1, M3, M5) M1: caveat the recommendation paragraph to mention whole-image CVEs (not just base-image CVEs) so a stop-after-recommendation reader doesn't leave with an inflated impression of the slim win. M3: split 'Concerns to surface for sub-task 0.3' into '0.3 hand-offs' (items 1, 2) and 'Deferred / out-of-scope follow-ups' (items 3, 4, 5). The original heading misrepresented its own contents. M5: add a Dockerfile cross-reference comment explaining why setuptools<81 is pinned in two RUN lines (the second pip-install layer would otherwise re-resolve setuptools to >=81 via --upgrade pip-tools). M2 (percentage) and M4 (alphabetize apt packages) skipped per reviewer ('skippable'). Co-authored-by: Cursor --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index 0972e1089..63f731a01 100644 --- a/Dockerfile +++ b/Dockerfile @@ -51,6 +51,9 @@ ENV OMNIOUT_TOKEN=${OMNIOUT_TOKEN} RUN npm install --location=global sfdx-cli --ignore-scripts # Python requirements: +# setuptools<81 repeated here because --upgrade pip-tools would otherwise +# re-resolve setuptools to >=81 in this layer; the pin must survive both +# pip-install invocations (see the earlier toolchain layer for the full why). COPY ./requirements requirements RUN pip install --no-cache-dir --upgrade pip pip-tools "setuptools<81" \ && pip install --no-cache-dir -r requirements/prod.txt From 58a1ee709bfcb2b0c92de4a85052b177a5faf254 Mon Sep 17 00:00:00 2001 From: James Estevez Date: Wed, 6 May 2026 23:12:29 -0700 Subject: [PATCH 03/10] feat(phase-0): heroku container runtime config + review-app integration Co-authored-by: Cursor --- app.json | 11 ++++++----- heroku.yml | 7 +++++++ 2 files changed, 13 insertions(+), 5 deletions(-) create mode 100644 heroku.yml diff --git a/app.json b/app.json index beeff3aa1..c728ca942 100644 --- a/app.json +++ b/app.json @@ -3,6 +3,7 @@ "description": "Web-based tool for installing Salesforce products", "repository": "https://github.com/SFDO-Tooling/MetaDeploy", "keywords": ["ci", "python", "django", "salesforce", "github"], + "stack": "container", "env": { "DJANGO_ALLOWED_HOSTS": { "description": "Heroku proxies web requests and Django needs to be configured to allow the forwards", @@ -67,19 +68,19 @@ "formation": { "web": { "quantity": 1, - "size": "free" + "size": "basic" }, "devworker": { "quantity": 1, - "size": "free" + "size": "basic" }, "worker": { "quantity": 0, - "size": "free" + "size": "basic" }, "worker-short": { "quantity": 0, - "size": "free" + "size": "basic" } }, "addons": ["heroku-postgresql", "heroku-redis"], @@ -107,7 +108,7 @@ }, "review": { "scripts": { - "postdeploy": "./manage.py populate_db" + "postdeploy": "./manage.py populate_data" } } } diff --git a/heroku.yml b/heroku.yml new file mode 100644 index 000000000..581227b8b --- /dev/null +++ b/heroku.yml @@ -0,0 +1,7 @@ +build: + docker: + web: Dockerfile +release: + image: web + command: + - ./.heroku/release.sh From c6547e529338dbe5e132b4459f48a514d527d662 Mon Sep 17 00:00:00 2001 From: James Estevez Date: Wed, 6 May 2026 23:27:55 -0700 Subject: [PATCH 04/10] fix(phase-0): declare explicit run commands for all four process types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two consecutive review-app builds timed out 'waiting to start' because heroku.yml only declared build.docker.web while app.json's formation declares four process types (web, devworker, worker, worker-short). Heroku's container build dispatcher couldn't reconcile that. Also: the Dockerfile's CMD is start-server.sh (dev-mode: runs migrate + populate_data + 'yarn serve' under config.settings.local), not the production daphne entrypoint. Without explicit run.web in heroku.yml, Heroku would have run start-server.sh on the production review app — wrong command, wrong settings module. run.* mirrors the existing Procfile commands exactly: web -> daphne ASGI server (production) devworker -> honcho dev-worker bundle worker -> Selenium browser worker (note: chrome path is buildpack-flavored; tracked as known follow-on; this dyno stays quantity:0 in app.json review formation) worker-short -> honcho short-job worker bundle Co-authored-by: Cursor --- heroku.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/heroku.yml b/heroku.yml index 581227b8b..5ee3385b1 100644 --- a/heroku.yml +++ b/heroku.yml @@ -1,6 +1,11 @@ build: docker: web: Dockerfile +run: + web: daphne --bind 0.0.0.0 --port $PORT metadeploy.asgi:application + devworker: honcho start -f Procfile_devworker + worker: sh .heroku/start_metadeploy_worker.sh + worker-short: honcho start -f Procfile_worker_short release: image: web command: From c9f9adbd50950b27ce062fbcfa187cca42dac586 Mon Sep 17 00:00:00 2001 From: James Estevez Date: Thu, 7 May 2026 00:35:06 -0700 Subject: [PATCH 05/10] fix(dockerfile): redeclare ARGs after FROM so BUILD_ENV reaches yarn prod Without redeclaring ARG BUILD_ENV / PROD_ASSETS / OMNIOUT_TOKEN inside the python:3.12-slim-bookworm stage, the values declared above the first FROM are out of scope for RUN instructions. The yarn-prod conditional \`[ "${BUILD_ENV}" = "production" ]\` evaluated empty-string against "production", fell through to the else branch (\`mkdir -p dist/prod\`), and shipped an empty dist/prod. The Django index.html template loader then 500'd on \`/\` with TemplateDoesNotExist (the SPA bundle is built into dist/prod/index.html). Surfaced during the first end-to-end smoke of the container build on metadeploy-pr-3588. --- Dockerfile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Dockerfile b/Dockerfile index 63f731a01..d13753241 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,13 @@ ARG OMNIOUT_TOKEN FROM node:22 AS node_base FROM python:3.12-slim-bookworm +# Re-import build args inside this stage. ARGs declared before the first +# FROM are only in scope for FROM lines themselves; they reset to undefined +# after each FROM and must be redeclared to be visible to RUN. +ARG BUILD_ENV=development +ARG PROD_ASSETS +ARG OMNIOUT_TOKEN + # Node and npm COPY --from=node_base /usr/local/lib/node_modules /usr/local/lib/node_modules COPY --from=node_base /usr/local/bin/node /usr/local/bin/node From ad42393574fe6980e12a9db9fe63f070beac0482 Mon Sep 17 00:00:00 2001 From: James Estevez Date: Thu, 7 May 2026 01:26:36 -0700 Subject: [PATCH 06/10] fix(container): align CMD with Procfile, add curl, drop dead app.json blocks CMD: switch from /app/start-server.sh (dev-mode yarn serve under config.settings.local) to the Procfile's web command (daphne --bind 0.0.0.0 --port $PORT metadeploy.asgi:application). This is the same command heroku.yml's run.web declares, so behavior is unchanged on Common Runtime where heroku.yml run wins. It is materially different on Private Spaces, where heroku.yml run is ignored and the in-image CMD is what runs; the previous CMD would have launched the dev server in production. docker-compose.yml has its own command: override invoking start-server.sh, so local dev is unaffected. curl: add to the apt install list. Heroku release-phase log streaming relies on curl in the image; without it, release output silently degrades to app-logs only. Also useful for in-dyno debug. app.json buildpacks block: dead config under stack: container. heroku.yml is the source of truth for container builds; the buildpacks declaration contradicts the stack and is ignored. app.json environments.test block: Heroku CI does not support container builds, and the metadeploy pipeline has zero recorded test-runs. CI moved to GitHub Actions in 2022 (.github/workflows/ test.yml). The block was a silent no-op. --- Dockerfile | 3 ++- app.json | 21 --------------------- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/Dockerfile b/Dockerfile index d13753241..3ec1d7f11 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,6 +34,7 @@ RUN apt-get update \ libxslt-dev \ libpq-dev \ libffi-dev \ + curl \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* @@ -87,4 +88,4 @@ RUN \ SFDX_CLIENT_ID="sample id" \ python manage.py collectstatic --noinput -CMD /app/start-server.sh +CMD ["sh", "-c", "exec daphne --bind 0.0.0.0 --port $PORT metadeploy.asgi:application"] diff --git a/app.json b/app.json index c728ca942..41c3f6697 100644 --- a/app.json +++ b/app.json @@ -84,28 +84,7 @@ } }, "addons": ["heroku-postgresql", "heroku-redis"], - "buildpacks": [ - { - "url": "heroku/nodejs" - }, - { - "url": "heroku/python" - } - ], "environments": { - "test": { - "scripts": { - "test-setup": "pip install --upgrade -r requirements/test.txt", - "test": "pytest" - }, - "env": { - "DJANGO_SETTINGS_MODULE": "config.settings.test", - "DATABASE_URL": "sqlite:///test.db", - "AWS_ACCESS_KEY_ID": "None", - "AWS_SECRET_ACCESS_KEY": "None", - "AWS_BUCKET_NAME": "None" - } - }, "review": { "scripts": { "postdeploy": "./manage.py populate_data" From d8b669c16a5f25ff5450a8d5ddebe063976ba7d7 Mon Sep 17 00:00:00 2001 From: James Estevez Date: Thu, 7 May 2026 01:50:29 -0700 Subject: [PATCH 07/10] docs: heroku container runtime + manual CVE rebuild cadence Documents the container-runtime build/release path (Heroku-built preferred, local container:push fallback), the Heroku Private Spaces CMD-vs-heroku.yml-run quirk, and a manual CVE rebuild cadence (monthly + on Critical CVE) to use until automated rebuild plumbing lands. Replaces the buildpacks-shaped portions of running_heroku.md (which still need a separate rewrite). This page is the public-facing landing for operators. It explains why the Dockerfile CMD must stay aligned with the heroku.yml web run command, and why container:release skips release.command (so manual release.sh runs are required after a local container:push round). --- docs/heroku-container-runtime.md | 79 ++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 docs/heroku-container-runtime.md diff --git a/docs/heroku-container-runtime.md b/docs/heroku-container-runtime.md new file mode 100644 index 000000000..fe6fb6732 --- /dev/null +++ b/docs/heroku-container-runtime.md @@ -0,0 +1,79 @@ +# Heroku container runtime + +MetaDeploy is built and deployed via the [Heroku container runtime](https://devcenter.heroku.com/articles/build-docker-images-heroku-yml) rather than the legacy buildpacks slug. The container image is built from the repository's `Dockerfile` per the spec in `heroku.yml`, and the resulting image runs the `web`, `worker`, `worker-short`, and `devworker` dynos. + +This page documents two things every operator of a MetaDeploy deployment should know: + +1. How the container image is built and released. +2. How to keep the base image patched against published CVEs. + +## Build and release + +`heroku.yml` declares the build, release, and run commands. The relevant fields are: + +```yaml +build: + docker: + web: Dockerfile +release: + image: web + command: + - ./.heroku/release.sh +run: + web: bash -c "exec daphne --bind 0.0.0.0 --port $PORT metadeploy.asgi:application" + worker: ./manage.py rqworker default short + worker-short: ./manage.py rqworker short + devworker: ./manage.py rqworker default short --burst +``` + +Two paths produce a deployed container: + +- **Heroku-built (preferred).** A push to a branch that is wired to a Heroku review-app pipeline, or a direct push to a tracked app, causes Heroku to clone the repo, run `docker build` against the `Dockerfile` declared in `heroku.yml`, run the `release.command` (`./.heroku/release.sh`, which does `python manage.py migrate --noinput`), then promote the new image to the dyno formation. This is the path the GitHub Actions CI exercises for review apps. +- **Locally built (fallback).** If Heroku's build queue is congested or the build environment is otherwise unavailable, you can build the image on your workstation and push it directly to the Heroku container registry: + + ```bash + docker buildx build --platform linux/amd64 --build-arg BUILD_ENV=production \ + -t registry.heroku.com//web --load . + heroku container:push web -a + heroku container:release web -a + ``` + + **Caveat.** `heroku container:release` does **not** execute the `release.command` declared in `heroku.yml`. After a manual `container:release`, run the release script yourself before serving traffic: + + ```bash + heroku run -a -- bash ./.heroku/release.sh + ``` + + On Apple Silicon hosts the `--platform linux/amd64` flag is required so the resulting image runs on Heroku's amd64 dynos. QEMU emulation under Docker Desktop handles the cross-build transparently; expect the first build to take 5–10 minutes longer than a native build. + +## Heroku Private Spaces note + +In Heroku Private Spaces the `run` field in `heroku.yml` is **not** consulted to start the dyno. The container image's `CMD` is used instead. Because of this, the `Dockerfile`'s final `CMD` is kept aligned with the `web` `run` command above (`daphne` against `metadeploy.asgi:application`). If you change one, change the other in the same commit. + +## CVE update mechanism + +The base image is `python:3.12-slim-bookworm` (Debian 12 + CPython 3.12). Vulnerabilities published against CPython, Debian packages, the Node toolchain layer, or the system OpenSSL flow into the deployed image when it is rebuilt. We do **not** currently have automated rebuild-on-CVE plumbing for this repository (no Dependabot or scheduled GitHub Actions job that bumps the `FROM` tag and opens a PR). Until that is in place, follow the **manual rebuild cadence** below. + +### Manual rebuild cadence + +- **Trigger.** A maintainer rebuilds the image **at least monthly**, and additionally on any of: a Critical CVE against `python:3.12-slim-bookworm`, a Critical CVE against a major Debian package known to ship in the image (`openssl`, `libxml2`, `libcurl4`, `nodejs`), or an emergency advisory from Salesforce security. +- **Procedure.** Push a no-op or version-bump commit to the default branch and let the normal Heroku build pipeline rebuild from the latest base image: + + ```bash + git commit --allow-empty -m "chore: rebuild image to pick up base-image CVE patches" + git push origin main + ``` + + The Heroku review-app and staging build pulls the current `python:3.12-slim-bookworm` digest at build time, picking up any Debian / CPython / Node patches published since the previous build. After the staging dyno is healthy, promote the slug to production through the normal pipeline. +- **Verification.** After the rebuild, `heroku run -a -- python -V` prints the current CPython point release, and `heroku run -a -- dpkg -l openssl` shows the patched Debian package version. Spot-check against the upstream advisory. +- **Followup tracking.** When the cross-cutting [SFDO-Tooling apps restart roadmap](https://github.com/SFDO-Tooling) publishes an automated CVE-rebuild mechanism (cron-driven rebuild + redeploy, or a workflow that bumps the `FROM` tag and opens a PR), this repo should adopt it and this section should be replaced by a cross-reference. + +## Local development + +`docker-compose.yml` uses the same `Dockerfile` but with `BUILD_ENV=development` and overrides `CMD` to run the Django development server with hot reload. The production container behavior is not exercised by `docker-compose up`; if you need to verify the production image locally, run: + +```bash +docker run --rm -e PORT=8000 -p 8000:8000 \ + -e DATABASE_URL=... -e REDIS_URL=... \ + registry.heroku.com//web +``` From 7b39278807c8e5ef11b6aa2860deba1b55e3c51b Mon Sep 17 00:00:00 2001 From: James Estevez Date: Thu, 7 May 2026 08:37:10 -0700 Subject: [PATCH 08/10] chore(ci): bump actions/upload-artifact v3 to v4 GitHub deprecated v3 of actions/upload-artifact, and active workflow runs now hard-fail at job-prep with the message: "This request has been automatically failed because it uses a deprecated version of actions/upload-artifact: v3." Bump three call sites: test.yml (Frontend coverage, Backend coverage) and smoke_test.yml (Robot results on failure). The change is unrelated to the Phase 0 container-runtime work but is needed to get this PR's CI green. Without the bump, Build and Lint pass while Frontend and Backend are blocked at start. --- .github/workflows/smoke_test.yml | 2 +- .github/workflows/test.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/smoke_test.yml b/.github/workflows/smoke_test.yml index 75a2e38ec..d2da9f514 100644 --- a/.github/workflows/smoke_test.yml +++ b/.github/workflows/smoke_test.yml @@ -56,7 +56,7 @@ jobs: - name: Store robot results if: failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: robot path: robot/results diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cdfc3240c..1c41837b6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -45,7 +45,7 @@ jobs: - name: Test frontend run: docker compose run --no-deps web yarn test:js:coverage - name: Upload coverage artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: frontend-coverage path: | @@ -74,7 +74,7 @@ jobs: -e SFDX_HUB_KEY="sample key" web yarn test:py - name: Upload coverage artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: backend-coverage path: | From 0c3b017a4f21b768bc19b8f5f4f0418f6349cd12 Mon Sep 17 00:00:00 2001 From: James Estevez Date: Thu, 7 May 2026 08:54:45 -0700 Subject: [PATCH 09/10] chore(probe): trigger heroku build-queue probe sample 2 From e766a8ecf19e6e9f7baa949fabe4f147b79bff67 Mon Sep 17 00:00:00 2001 From: James Estevez Date: Thu, 7 May 2026 09:01:12 -0700 Subject: [PATCH 10/10] chore(probe): trigger heroku build-queue probe sample 3