Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
2068183
fix: jupyterlab, notebook, ssh vulnerabilities
mchekm May 18, 2026
3f2d769
fix: hadolint findings
mchekm May 18, 2026
a70220f
fix: autoresearch hadolint
mchekm May 18, 2026
69641b2
fix: trivy vulnerabilities; bake push: true
mchekm May 18, 2026
e562b5c
fix: pytorch max-parallelism: 4
mchekm May 18, 2026
de920b4
fix: pytorch max-parallelism: 3
mchekm May 18, 2026
d3a7e1e
fix: autoresearch linter
mchekm May 19, 2026
72ae1a4
fix: hadolint findings and RUNPODCTL_VERSION=v2.3.0
mchekm May 19, 2026
d6049ac
fix: hadolint findings
mchekm May 19, 2026
74e86de
fix: base build
mchekm May 19, 2026
629e6c5
fix: hadolint findings
mchekm May 19, 2026
91dcbad
fix: rocm vulnerabilities
mchekm May 19, 2026
d368a69
fix: script to scrub stale metadata
mchekm May 19, 2026
eafb843
align with base branch
mchekm May 13, 2026
cc797ec
feat: upgrade github actions versions and increase pytorch timeout
mchekm May 20, 2026
e04f71a
fix: docker/setup-qemu-action
mchekm May 20, 2026
d8da79a
fix: nvidia-pythorch vulnerabilities
mchekm May 20, 2026
1feffe9
fix: base workflow
mchekm May 20, 2026
fe0cf15
fix: ignore nvidia-pytorch trvy findings with certs
mchekm May 20, 2026
8e54e0f
Merge branch 'feat/image-security-scanner' into fix/image-vulnerabili…
mchekm May 20, 2026
dfdcc8f
fix: rocm vulnerabilities
mchekm May 21, 2026
14fccdc
check if filebrowser generates vulneralities
mchekm May 21, 2026
6936050
fix: nic_sampler vulnerabilities
mchekm May 21, 2026
417b296
chore: added comment
mchekm May 22, 2026
d7a76cc
fix: ignore some of hadolint findings on push
mchekm May 22, 2026
94f453f
fix: relocate scrub-stale-metadata.py
mchekm May 25, 2026
318ee0a
fix: relocate scrub-stale-metadata.py
mchekm May 25, 2026
7e46af8
fix: scrab_stale_metadata
mchekm May 25, 2026
be84342
fix: scrub-stale-metadata.py
mchekm May 27, 2026
5731245
feat: bump version
mchekm May 27, 2026
93447a7
Merge branch 'feat/image-security-scanner' into fix/image-vulnerabili…
mchekm May 27, 2026
0441083
fix: do not run trivy if no changes
mchekm May 29, 2026
0fc2cb4
fix: comments
mchekm Jun 8, 2026
0c95b22
Merge branch 'main' into fix/image-vulnerabilities
mchekm Jun 9, 2026
357ec59
feat: increased runners and bake-action doesn't push images
mchekm Jun 9, 2026
be335b0
fix: vulnerabilities in pip packages
mchekm Jun 9, 2026
75d308e
fix: nvidia requirements
mchekm Jun 9, 2026
19d5d77
feat: push after grype
mchekm Jun 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .github/actions/docker-push/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Docker Push
description: "Push locally-loaded images to their registry refs"

inputs:
image-refs:
description: "JSON array of image references to push"
required: true

runs:
using: composite
steps:
- name: Push
shell: bash
env:
IMAGE_REFS: ${{ inputs.image-refs }}
run: |
set -euo pipefail
mapfile -t refs < <(echo "${IMAGE_REFS}" | jq -r '.[]')
if [ ${#refs[@]} -eq 0 ]; then
echo "No image refs to push"
exit 1
fi

for ref in "${refs[@]}"; do
echo "::group::docker push — ${ref}"
docker push "${ref}"
echo "::endgroup::"
done
33 changes: 32 additions & 1 deletion .github/actions/grype/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,16 @@ description: "Scan Docker images with Grype. For now it's report-only and doesn'

inputs:
image-refs:
description: "JSON array of image references to scan"
description: "JSON array of image refs to scan"
required: true
skip-files:
description: |
Optional newline-separated list of glob patterns to pass to Trivy as
--skip-files. Use this to silence known-benign findings (e.g. demo
cert/key fixtures shipped by vendored upstream libraries in NGC base
images) without affecting the rest of the scan.
required: false
default: ""

runs:
using: composite
Expand All @@ -22,6 +30,7 @@ runs:
shell: bash
env:
IMAGE_REFS: ${{ inputs.image-refs }}
SKIP_FILES: ${{ inputs.skip-files }}
run: |
set -uo pipefail
mapfile -t refs < <(echo "${IMAGE_REFS}" | jq -r '.[]')
Expand All @@ -30,8 +39,30 @@ runs:
exit 1
fi

trivy_cmd=(
trivy image
--timeout 30m
--severity CRITICAL,HIGH
--exit-code 1
--ignore-unfixed
--pkg-types os,library
--format table
--no-progress
)

skip_patterns=()
while IFS= read -r pattern; do
[ -z "$pattern" ] && continue
trivy_cmd+=(--skip-files "$pattern")
skip_patterns+=("$pattern")
done <<< "${SKIP_FILES}"

echo "Scanning ${#refs[@]} image(s):"
printf ' - %s\n' "${refs[@]}"
if [ ${#skip_patterns[@]} -gt 0 ]; then
echo "Skipping file patterns:"
printf ' - %s\n' "${skip_patterns[@]}"
fi

failed=()
for ref in "${refs[@]}"; do
Expand Down
32 changes: 26 additions & 6 deletions .github/workflows/base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ permissions:

jobs:
build-base:
runs-on: blacksmith-8vcpu-ubuntu-2204
runs-on: blacksmith-16vcpu-ubuntu-2204
steps:
- name: Checkout
uses: actions/checkout@v6
Expand All @@ -46,7 +46,8 @@ jobs:
files: |
official-templates/shared/versions.hcl
official-templates/base/docker-bake.hcl
push: true
load: true
push: false

- name: Extract image refs
id: refs
Expand All @@ -59,10 +60,15 @@ jobs:
with:
image-refs: ${{ steps.refs.outputs.refs }}

- name: Push images
uses: ./.github/actions/docker-push
with:
image-refs: ${{ steps.refs.outputs.refs }}

build-autoresearch:
needs: build-base
if: always() && (needs.build-base.result == 'success' || needs.build-base.result == 'skipped')
runs-on: blacksmith-8vcpu-ubuntu-2204
runs-on: blacksmith-16vcpu-ubuntu-2204
steps:
- name: Checkout
uses: actions/checkout@v6
Expand Down Expand Up @@ -99,7 +105,8 @@ jobs:
files: |
official-templates/shared/versions.hcl
official-templates/autoresearch/docker-bake.hcl
push: true
load: true
push: false

- name: Extract image refs
id: refs
Expand All @@ -114,11 +121,17 @@ jobs:
with:
image-refs: ${{ steps.refs.outputs.refs }}

- name: Push images
if: github.event_name == 'workflow_dispatch' || steps.changes.outputs.autoresearch_any_changed == 'true'
uses: ./.github/actions/docker-push
with:
image-refs: ${{ steps.refs.outputs.refs }}

build-pytorch:
needs: build-base
# always() forces job run even if the dependant is skipped (but not if it failed)
if: always() && (needs.build-base.result == 'success' || needs.build-base.result == 'skipped')
runs-on: blacksmith-16vcpu-ubuntu-2404
runs-on: blacksmith-32vcpu-ubuntu-2404
steps:
- name: Checkout
uses: actions/checkout@v6
Expand Down Expand Up @@ -156,7 +169,8 @@ jobs:
files: |
official-templates/shared/versions.hcl
official-templates/pytorch/docker-bake.hcl
push: true
load: true
push: false

- name: Extract image refs
id: refs
Expand All @@ -168,5 +182,11 @@ jobs:
- name: Grype scan
if: github.event_name == 'workflow_dispatch' || steps.changes.outputs.pytorch_any_changed == 'true'
uses: ./.github/actions/grype
with:
image-refs: ${{ steps.refs.outputs.refs }}

- name: Push images
if: github.event_name == 'workflow_dispatch' || steps.changes.outputs.pytorch_any_changed == 'true'
uses: ./.github/actions/docker-push
with:
image-refs: ${{ steps.refs.outputs.refs }}
18 changes: 11 additions & 7 deletions .github/workflows/hadolint-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@ jobs:
strategy:
fail-fast: false
matrix:
# Add rows when new top-level Dockerfiles appear (e.g. nvidia-pytorch).
dockerfile:
- official-templates/base/Dockerfile
- official-templates/pytorch/Dockerfile
- official-templates/autoresearch/Dockerfile
- helper-templates/verify-nccl/Dockerfile
include:
- dockerfile: official-templates/base/Dockerfile
ignore: DL3006,DL3008,DL3013,DL3022
- dockerfile: official-templates/pytorch/Dockerfile
ignore: "DL3013,DL3006"
- dockerfile: official-templates/autoresearch/Dockerfile
ignore: "DL3006"
- dockerfile: helper-templates/verify-nccl/Dockerfile
ignore: "DL3008"
steps:
- name: Checkout
uses: actions/checkout@v6
Expand All @@ -31,4 +34,5 @@ jobs:
dockerfile: ${{ matrix.dockerfile }}
failure-threshold: warning
format: tty
output-file: /dev/stdout
output-file: /dev/stdout
ignore: ${{ matrix.ignore }}
15 changes: 9 additions & 6 deletions .github/workflows/hadolint-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@ jobs:
strategy:
fail-fast: false
matrix:
# Add rows when new top-level Dockerfiles appear
dockerfile:
- official-templates/base/Dockerfile
- official-templates/pytorch/Dockerfile
- official-templates/autoresearch/Dockerfile
- helper-templates/verify-nccl/Dockerfile
include:
- dockerfile: official-templates/base/Dockerfile
ignore: DL3006,DL3008,DL3013,DL3022
- dockerfile: official-templates/pytorch/Dockerfile
ignore: "DL3013,DL3006"
- dockerfile: official-templates/autoresearch/Dockerfile
ignore: "DL3006"
- dockerfile: helper-templates/verify-nccl/Dockerfile
ignore: "DL3008"
steps:
- name: Checkout
uses: actions/checkout@v6
Expand Down
15 changes: 13 additions & 2 deletions .github/workflows/nvidia.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ permissions:

jobs:
build-nvidia:
runs-on: blacksmith-16vcpu-ubuntu-2404
runs-on: blacksmith-32vcpu-ubuntu-2404
steps:
- name: Checkout
uses: actions/checkout@v6
Expand All @@ -40,7 +40,8 @@ jobs:
files: |
official-templates/shared/versions.hcl
official-templates/nvidia-pytorch/docker-bake.hcl
push: true
load: true
push: false

- name: Extract image refs
id: refs
Expand All @@ -52,3 +53,13 @@ jobs:
uses: ./.github/actions/grype
with:
image-refs: ${{ steps.refs.outputs.refs }}
skip-files: |
**/civetweb/resources/cert/*
**/civetweb/resources/ssl_cert.pem
**/civetweb/resources/server.pem
**/civetweb/resources/server_bkup.pem

- name: Push images
uses: ./.github/actions/docker-push
with:
image-refs: ${{ steps.refs.outputs.refs }}
15 changes: 13 additions & 2 deletions .github/workflows/rocm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ permissions:

jobs:
build-rocm:
runs-on: blacksmith-16vcpu-ubuntu-2404
runs-on: blacksmith-32vcpu-ubuntu-2404
steps:
- name: Checkout
uses: actions/checkout@v6
Expand All @@ -40,7 +40,8 @@ jobs:
files: |
official-templates/shared/versions.hcl
official-templates/rocm/docker-bake.hcl
push: true
load: true
push: false

- name: Extract image refs
id: refs
Expand All @@ -50,5 +51,15 @@ jobs:

- name: Grype scan
uses: ./.github/actions/grype
with:
image-refs: ${{ steps.refs.outputs.refs }}
skip-files: |
**/civetweb/resources/cert/*
**/civetweb/resources/ssl_cert.pem
**/civetweb/resources/server.pem
**/civetweb/resources/server_bkup.pem

- name: Push images
uses: ./.github/actions/docker-push
with:
image-refs: ${{ steps.refs.outputs.refs }}
10 changes: 6 additions & 4 deletions helper-templates/verify-nccl/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
pciutils \
&& rm -rf /var/lib/apt/lists/*

RUN git clone https://github.com/NVIDIA/cuda-samples.git && \
cd cuda-samples/Samples/0_Introduction/simpleP2P && \
make
RUN git clone https://github.com/NVIDIA/cuda-samples.git

WORKDIR /verify-nccl/cuda-samples/Samples/0_Introduction/simpleP2P

RUN make

COPY --chmod=755 check_nccl.sh .

# Start Container
CMD tail -f /dev/null
CMD ["tail", "-f", "/dev/null"]
10 changes: 5 additions & 5 deletions official-templates/autoresearch/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
ARG BASE_IMAGE=non-existing
FROM ${BASE_IMAGE}

SHELL ["/bin/bash", "-o", "pipefail", "-c"]

# Install runpodctl for pod management (scaling up GPUs)
ARG RUNPODCTL_VERSION=v2.1.6
ARG RUNPODCTL_VERSION=v2.3.0
RUN wget -qO- https://github.com/runpod/runpodctl/releases/download/${RUNPODCTL_VERSION}/runpodctl-linux-amd64.tar.gz | \
tar -xz -C /usr/local/bin runpodctl

Expand All @@ -14,10 +16,8 @@ RUN git clone --branch ${AUTORESEARCH_REF} --depth 1 \
WORKDIR /opt/autoresearch

# Install Python dependencies
RUN uv sync

# Download data and train tokenizer (~2 min)
RUN uv run prepare.py
RUN uv sync && \
uv run prepare.py # Download data and train tokenizer (~2 min)

# On first boot: copy source files to /workspace (lightweight, persists edits)
# and symlink .venv back to /opt (12GB, stays on fast container layer)
Expand Down
Loading
Loading