diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..f72bbff2 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +.git +.github +.gitignore +__pycache__ +*.pyc +*.pyo +*.egg-info +.env +.venv +docs/ +tests/ +*.md +!README.md diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml index 9f2206d5..6287e429 100644 --- a/.github/workflows/docker-build.yaml +++ b/.github/workflows/docker-build.yaml @@ -26,7 +26,7 @@ jobs: file: docker/Dockerfile platforms: linux/amd64,linux/arm64 push: true - tags: ${{vars.DOCKERHUB_USERNAME}}/${{vars.DOCKERHUB_CONTAINER}}:latest,${{vars.DOCKERHUB_USERNAME}}/${{vars.DOCKERHUB_CONTAINER}}:${{github.ref_name}} + tags: ${{vars.DOCKERHUB_USERNAME}}/${{vars.DOCKERHUB_CONTAINER}}:latest,${{vars.DOCKERHUB_USERNAME}}/${{vars.DOCKERHUB_CONTAINER}}:unstable cache-from: type=gha cache-to: type=gha,mode=max diff --git a/VERSION.txt b/VERSION.txt index 029696b1..86edc68d 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -4.0.18 +4.0.20 diff --git a/debian/changelog b/debian/changelog index 45dd61ba..d07e4a60 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,58 @@ +patchman (4.0.20-1) stable; urgency=medium + + * django 5.2 compatibility updates + * fix zstd decompression for python 3.13+ stdlib compression.zstd + * skip decompression when content is already text + * change erratum synopsis from CharField(255) to TextField + * rename functions to match codebase style + * handle non-http(s) mirror urls gracefully + * escape filterlist values + * auto-commit to update version skip-checks: true + + -- Marcus Furlong Thu, 07 May 2026 01:05:41 +0000 + +patchman (4.0.19-1) stable; urgency=medium + + [ dependabot[bot] ] + * Bump requests from 2.32.4 to 2.33.0 + + [ Marcus Furlong ] + * use sets instead of lists for update tracking + * merge duplicate update-finding methods into find_repo_updates + * remove underscore prefix from kernel helper methods + * don't rename repos from client reports + * filter deb kernel updates by major.minor series + * add package updates list view with table, filters, and nav entry + * add sortable columns to package list and name detail views + + [ dependabot[bot] ] + * Bump django from 4.2.29 to 4.2.30 + * add celery worker resilience for database connection timeouts + * auto-enable wal mode for sqlite backend + * fix duplicate verbose_name_plural in report model meta + * sanitize filter_params in bulk action views + * add null guard for missing references element in updateinfo xml + * return early on yaml parse error in extract_module_metadata + * fix null url handling in osv.dev cve references + * send info messages to stdout instead of stderr + * move function-level import to top-level + * bulk db optimizations for errata processing + * add concurrent processing with deadlock workaround and progress bar fixes + * add fetch_concurrently helper function + + [ Aman Maharjan ] + * fix: deb kernel meta-packages bypass series check causing false HWE updates + * fix: skip meta-packages in deb kernel series check instead of falling back to running kernel + + [ Marcus Furlong ] + * add ubuntu 26.04 resolute codename + + [ dependabot[bot] ] + * Bump gitpython from 3.1.44 to 3.1.47 + * auto-commit to update version skip-checks: true + + -- Marcus Furlong Wed, 29 Apr 2026 01:25:17 +0000 + patchman (4.0.18-1) stable; urgency=medium * handle malformed repos better diff --git a/docker/Dockerfile b/docker/Dockerfile index 1ed337aa..d1c2d56d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,26 +1,85 @@ -FROM debian:bookworm-slim +FROM debian:trixie-slim AS builder -RUN apt -y update && apt -y upgrade -RUN apt install -y apache2 git libapache2-mod-wsgi-py3 mariadb-client python-celery-common python3-celery python3-debian python3-defusedxml python3-lxml python3-mysqldb python3-pip python3-progressbar python3-psycopg2 python3-redis python3-rpm sendmail sharutils uuid-runtime vim weasyprint +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PATCHMAN_HOME=/srv/patchman -WORKDIR /srv/patchman +RUN apt-get -y update && apt-get -y upgrade && \ + apt-get install -y --no-install-recommends \ + python3-pip \ + && rm -rf /var/lib/apt/lists/* -COPY . /srv/patchman/ -COPY ./etc/patchman/apache.conf.example /etc/apache2/sites-available/patchman.conf +WORKDIR ${PATCHMAN_HOME} -RUN /srv/patchman/setup.py install +COPY requirements.txt . +RUN pip install --no-cache-dir --break-system-packages setuptools -r requirements.txt -COPY ./email/patchman-email /usr/bin/patchman-email -COPY ./etc/patchman/patchman-email.conf /etc/patchman/patchman-email.conf -RUN chmod u+x /usr/bin/patchman-email +COPY . ${PATCHMAN_HOME}/ + +RUN ${PATCHMAN_HOME}/setup.py install --no-compile && \ + rm -rf build/ dist/ *.egg-info/ .eggs/ /root/.cache/ + + +FROM debian:trixie-slim AS runtime + +LABEL maintainer="4950815+RicardoJeronimo@users.noreply.github.com" \ + org.opencontainers.image.title="Patchman" \ + org.opencontainers.image.base.name="debian:trixie-slim" \ + org.opencontainers.image.source="https://github.com/RicardoJeronimo/patchman" + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PATCHMAN_HOME=/srv/patchman -RUN a2enmod wsgi -RUN a2ensite patchman +RUN apt-get -y update && apt-get -y upgrade && \ + apt-get install -y --no-install-recommends \ + apache2 \ + curl \ + git \ + gosu \ + libapache2-mod-wsgi-py3 \ + libmagic1 \ + mariadb-client \ + python3-debian \ + python3-defusedxml \ + python3-lxml \ + python3-mysqldb \ + python3-packaging \ + python3-psycopg2 \ + sendmail \ + sharutils \ + uuid-runtime \ + weasyprint \ + && rm -rf /var/lib/apt/lists/* -RUN mkdir -p /var/lib/patchman/db -RUN chown :www-data /var/lib/patchman/db && chmod 2770 /var/lib/patchman/db +WORKDIR ${PATCHMAN_HOME} + +COPY --from=builder /usr/local/lib/ /usr/local/lib/ +COPY --from=builder /usr/local/bin/patchman* /usr/local/bin/ +COPY --from=builder /usr/local/bin/celery /usr/local/bin/ +COPY --from=builder /etc/patchman/ /etc/patchman/ +COPY --from=builder ${PATCHMAN_HOME}/ ${PATCHMAN_HOME}/ + +RUN cp ${PATCHMAN_HOME}/etc/patchman/apache.conf.example /etc/apache2/sites-available/patchman.conf && \ + echo "ServerName localhost" >> /etc/apache2/apache2.conf && \ + a2enmod wsgi && \ + a2ensite patchman && \ + a2dissite 000-default && \ + groupadd --system patchman && \ + useradd --system --gid patchman --no-create-home patchman && \ + mkdir -p /var/lib/patchman/db && \ + chown patchman:www-data /var/lib/patchman/db && \ + chmod 2770 /var/lib/patchman/db + +COPY ./docker/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh +COPY ./email/patchman-email /usr/bin/patchman-email +COPY ./etc/patchman/patchman-email.conf /etc/patchman/patchman-email.conf +RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \ + chmod +x /usr/bin/patchman-email EXPOSE 80 -COPY ./docker/docker-entrypoint.sh docker-entrypoint.sh -ENTRYPOINT ["./docker-entrypoint.sh"] +HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ + CMD curl -f http://localhost/patchman/ || exit 1 + +ENTRYPOINT ["docker-entrypoint.sh"] diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh old mode 100755 new mode 100644 index 340020a8..fe992c21 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -1,87 +1,104 @@ #!/bin/bash +set -euo pipefail + +log() { echo "[INFO] $(date -u '+%Y-%m-%dT%H:%M:%SZ') $*"; } +warn() { echo "[WARN] $(date -u '+%Y-%m-%dT%H:%M:%SZ') $*" >&2; } +die() { echo "[ERROR] $(date -u '+%Y-%m-%dT%H:%M:%SZ') $*" >&2; exit 1; } conf="/etc/patchman/local_settings.py" +[ -f "$conf" ] || die "Configuration file not found: $conf." # Configure DEBUG -if "${DEBUG}"; then +if [ "${DEBUG:-false}" = true ]; then + log "DEBUG mode enabled." sed -i '3 {s/False/True/}' "$conf" fi # Configure ADMINS -if [ -n "${ADMIN_NAME}" ]; then +if [ -n "${ADMIN_NAME:-}" ]; then sed -i '6 {s/Your Name/'"${ADMIN_NAME}"'/}' "$conf" fi -if [ -n "${ADMIN_EMAIL}" ]; then +if [ -n "${ADMIN_EMAIL:-}" ]; then sed -i '6 {s/you@example.com/'"${ADMIN_EMAIL}"'/}' "$conf" fi # Configure DATABASES -if [ -n "${DB_ENGINE}" ]; then +if [ -n "${DB_ENGINE:-}" ]; then sed -i '9,18 {/^#/ ! s/\(.*\)/#\1/}' "$conf" if [[ $(grep -v "#" "$conf" | grep -c "ENGINE") -lt 2 ]]; then - if [ "${DB_ENGINE}" == "MySQL" ]; then - if [ -n "${DB_PORT}" ]; then - dbPort="${DB_PORT}" - else - dbPort="3306" - fi - - cat <<-EOF >> "$conf" - - DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.mysql', - 'NAME': '${DB_DATABASE}', - 'USER': '${DB_USER}', - 'PASSWORD': '${DB_PASSWORD}', - 'HOST': '${DB_HOST}', - 'PORT': '$dbPort', - 'STORAGE_ENGINE': 'INNODB', - 'CHARSET' : 'utf8' - } - } - EOF - - elif [ "${DB_ENGINE}" == "PostgreSQL" ]; then - if [ -n "${DB_PORT}" ]; then - dbPort="${DB_PORT}" - else - dbPort="5432" - fi - - cat <<-EOF >> "$conf" - - DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.postgresql_psycopg2', - 'NAME': '${DB_DATABASE}', - 'USER': '${DB_USER}', - 'PASSWORD': '${DB_PASSWORD}', - 'HOST': '${DB_HOST}', - 'PORT': '$dbPort', - 'CHARSET' : 'utf8' - } - } - EOF - fi + case "${DB_ENGINE}" in + SQLite) + log "Using SQLite database." + ;; + MySQL) + dbPort="${DB_PORT:-3306}" + [ -n "${DB_DATABASE:-}" ] || die "DB_DATABASE is required for MySQL." + [ -n "${DB_USER:-}" ] || die "DB_USER is required for MySQL." + [ -n "${DB_HOST:-}" ] || die "DB_HOST is required for MySQL." + log "Configuring MySQL database at ${DB_HOST}:${dbPort}." + + cat <<-EOF >> "$conf" + + DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.mysql', + 'NAME': '${DB_DATABASE}', + 'USER': '${DB_USER}', + 'PASSWORD': '${DB_PASSWORD}', + 'HOST': '${DB_HOST}', + 'PORT': '$dbPort', + 'STORAGE_ENGINE': 'INNODB', + 'CHARSET' : 'utf8' + } + } + EOF + ;; + + PostgreSQL) + dbPort="${DB_PORT:-5432}" + [ -n "${DB_DATABASE:-}" ] || die "DB_DATABASE is required for PostgreSQL." + [ -n "${DB_USER:-}" ] || die "DB_USER is required for PostgreSQL." + [ -n "${DB_HOST:-}" ] || die "DB_HOST is required for PostgreSQL." + log "Configuring PostgreSQL database at ${DB_HOST}:${dbPort}." + + cat <<-EOF >> "$conf" + + DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.postgresql_psycopg2', + 'NAME': '${DB_DATABASE}', + 'USER': '${DB_USER}', + 'PASSWORD': '${DB_PASSWORD}', + 'HOST': '${DB_HOST}', + 'PORT': '$dbPort', + 'CHARSET' : 'utf8' + } + } + EOF + ;; + + *) + die "Invalid DB_ENGINE: '${DB_ENGINE}'." + ;; + esac fi fi # Configure TIME_ZONE -if [ -n "${TIMEZONE}" ]; then +if [ -n "${TIMEZONE:-}" ]; then sed -i '22 {s/America\/New_York/'"${TIMEZONE/\//\\/}"'/}' "$conf" fi # Configure LANGUAGE_CODE -if [ -n "${LANGUAGE_CODE}" ]; then +if [ -n "${LANGUAGE_CODE:-}" ]; then sed -i '26 {s/en-us/'"${LANGUAGE_CODE}"'/}' "$conf" fi # Configure SECRET_KEY if [ -z "$(grep "SECRET_KEY" "$conf" | cut -d " " -f 3 | tr -d "'")" ]; then - if [ -n "${SECRET_KEY}" ]; then + if [ -n "${SECRET_KEY:-}" ]; then sed -i "29 {s/SECRET_KEY = ''/SECRET_KEY = '${SECRET_KEY}'/}" "$conf" else patchman-set-secret-key @@ -89,102 +106,86 @@ if [ -z "$(grep "SECRET_KEY" "$conf" | cut -d " " -f 3 | tr -d "'")" ]; then fi # Configure MAX_MIRRORS -if [ -n "${MAX_MIRRORS}" ]; then +if [ -n "${MAX_MIRRORS:-}" ]; then sed -i '36 {s/2/'"${MAX_MIRRORS}"'/}' "$conf" fi # Configure MAX_MIRROR_FAILURES -if [ -n "${MAX_MIRROR_FAILURES}" ]; then +if [ -n "${MAX_MIRROR_FAILURES:-}" ]; then sed -i '39 {s/14/'"${MAX_MIRROR_FAILURES}"'/}' "$conf" fi # Configure DAYS_WITHOUT_REPORT -if [ -n "${DAYS_WITHOUT_REPORT}" ]; then +if [ -n "${DAYS_WITHOUT_REPORT:-}" ]; then sed -i '42 {s/14/'"${DAYS_WITHOUT_REPORT}"'/}' "$conf" fi # Configure ERRATA_OS_UPDATES -if [ -n "${ERRATA_OS_UPDATES}" ]; then +if [ -n "${ERRATA_OS_UPDATES:-}" ]; then errataOSUpdates="${ERRATA_OS_UPDATES// /}" sed -i '45 {s/\[.*\]/['"'${errataOSUpdates//,/\', \'}'"']/}' "$conf" fi # Configure ALMA_RELEASES -if [ -n "${ALMA_RELEASES}" ]; then +if [ -n "${ALMA_RELEASES:-}" ]; then sed -i '48 {s/\[.*\]/['"${ALMA_RELEASES}"']/}' "$conf" fi # Configure DEBIAN_CODENAMES -if [ -n "${DEBIAN_CODENAMES}" ]; then +if [ -n "${DEBIAN_CODENAMES:-}" ]; then debianCodenames="${DEBIAN_CODENAMES// /}" sed -i '51 {s/\[.*\]/['"'${debianCodenames//,/\', \'}'"']/}' "$conf" fi # Configure UBUNTU_CODENAMES -if [ -n "${UBUNTU_CODENAMES}" ]; then +if [ -n "${UBUNTU_CODENAMES:-}" ]; then ubuntuCodenames="${UBUNTU_CODENAMES// /}" sed -i '54 {s/\[.*\]/['"'${ubuntuCodenames//,/\', \'}'"']/}' "$conf" fi # Configure CACHES -if "${USE_CACHE}"; then - if [ -n "${REDIS_HOST}" ]; then - redisHost="${REDIS_HOST}" - else - redisHost="127.0.0.1" - fi +redisHost="${REDIS_HOST:-127.0.0.1}" +redisPort="${REDIS_PORT:-6379}" - if [ -n "${REDIS_PORT}" ]; then - redisPort="${REDIS_PORT}" - else - redisPort="6379" - fi - - # Change RedisCache LOCATION +if [ "${USE_CACHE:-false}" = true ]; then + log "Configuring Redis cache at ${redisHost}:${redisPort}." sed -i "62 {s/127.0.0.1:6379/$redisHost:$redisPort/}" "$conf" - if [ -n "${CACHE_TIMEOUT}" ]; then + if [ -n "${CACHE_TIMEOUT:-}" ]; then sed -i "67 {s/0/${CACHE_TIMEOUT}/}" "$conf" fi else - # Change RedisCache to DummyCache to avoid ConnectionError and comment LOCATION + log "Cache disabled, using DummyCache." sed -i '61 {s/redis.RedisCache/dummy.DummyCache/}' "$conf" sed -i '62 {/^#/ ! s/\(.*\)/#\1/}' "$conf" fi # Set sendmail destination -if [ -n "${MTA_HOST}" ]; then +if [ -n "${MTA_HOST:-}" ]; then echo "define(\`SMART_HOST', \`[$MTA_HOST]')dnl" >> /etc/mail/submit.mc m4 /etc/mail/submit.mc > /etc/mail/submit.cf fi # Sync database on container first start if [ ! -f /var/lib/patchman/.firstrun ]; then + log "First run detected, initialising database..." + log "Running makemigrations..." patchman-manage makemigrations + log "Running migrate..." patchman-manage migrate --run-syncdb --fake-initial - patchman-manage collectstatic + log "Running collectstatic..." + patchman-manage collectstatic --noinput - # If SQLite is being used, allow httpd to write - if [ -z "${DB_ENGINE}" ]; then + if [ -z "${DB_ENGINE:-}" ]; then chmod 660 /var/lib/patchman/db/patchman.db fi touch /var/lib/patchman/.firstrun + log "Initialisation complete." fi -# Starts Celery for for realtime processing of reports from clients -if "${USE_CELERY}"; then - if [ -n "${REDIS_HOST}" ]; then - redisHost="${REDIS_HOST}" - else - redisHost="127.0.0.1" - fi - - if [ -n "${REDIS_PORT}" ]; then - redisPort="${REDIS_PORT}" - else - redisPort=6379 - fi +if [ "${USE_CELERY:-false}" = true ]; then + log "Starting Celery worker..." if [ -z "$(grep "USE_ASYNC_PROCESSING" "$conf" | cut -d " " -f 3 | tr -d "'")" ]; then echo "" >> "$conf" @@ -195,14 +196,12 @@ if "${USE_CELERY}"; then echo "CELERY_BROKER_URL = 'redis://$redisHost:$redisPort/0'" >> "$conf" fi - C_FORCE_ROOT=1 celery -b redis://"$redisHost":"$redisPort"/0 -A patchman worker -l INFO -E & + gosu www-data celery \ + -b redis://"$redisHost":"$redisPort"/0 \ + -A patchman worker \ + -l INFO -E & fi # Starts Apache httpd process -/usr/sbin/apache2ctl -DFOREGROUND - -# Wait for any process to exit -wait -n - -# Exit with status of process that exited first -exit $? +log "Starting Apache..." +exec /usr/sbin/apache2ctl -DFOREGROUND diff --git a/errata/migrations/0010_synopsis_textfield.py b/errata/migrations/0010_synopsis_textfield.py new file mode 100644 index 00000000..eae13347 --- /dev/null +++ b/errata/migrations/0010_synopsis_textfield.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.28 on 2026-05-02 03:49 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('errata', '0009_backfill_cached_counts'), + ] + + operations = [ + migrations.AlterField( + model_name='erratum', + name='synopsis', + field=models.TextField(), + ), + ] diff --git a/errata/models.py b/errata/models.py index 8f6a6f74..035a2456 100644 --- a/errata/models.py +++ b/errata/models.py @@ -33,7 +33,7 @@ class Erratum(models.Model): name = models.CharField(max_length=255, unique=True) e_type = models.CharField(max_length=255) issue_date = models.DateTimeField() - synopsis = models.CharField(max_length=255) + synopsis = models.TextField() affected_packages = models.ManyToManyField(Package, blank=True, related_name='affected_by_erratum') fixed_packages = models.ManyToManyField(Package, blank=True, related_name='provides_fix_in_erratum') from operatingsystems.models import OSRelease @@ -91,15 +91,16 @@ def _mark_updates_security(self, updates): error_message(text=e) update.delete() - def fetch_osv_dev_data(self): + def fetch_osv_dev_data(self, session=None): + """ Fetch osv.dev JSON for this erratum. Returns parsed JSON or None. + """ osv_dev_url = f'https://api.osv.dev/v1/vulns/{self.name}' - res = get_url(osv_dev_url) + res = get_url(osv_dev_url, session=session) + if res is None: + return None if res.status_code == 404: - error_message(text=f'404 - Skipping {self.name} - {osv_dev_url}') - return - data = res.content - osv_dev_json = json.loads(data) - self.parse_osv_dev_data(osv_dev_json) + return None + return json.loads(res.content) def parse_osv_dev_data(self, osv_dev_json): from django.db.models import Q diff --git a/errata/sources/distros/alma.py b/errata/sources/distros/alma.py index d9e1dbf9..8b7b3163 100644 --- a/errata/sources/distros/alma.py +++ b/errata/sources/distros/alma.py @@ -14,19 +14,21 @@ # You should have received a copy of the GNU General Public License # along with Patchman. If not, see -import concurrent.futures import json -from django.db import connections - -from operatingsystems.utils import get_or_create_osrelease +from errata.utils import get_or_create_erratum +from modules.utils import get_matching_modules +from operatingsystems.utils import ( + get_or_create_osrelease, normalize_el_osrelease, +) from packages.models import Package from packages.utils import get_or_create_package, parse_package_string from patchman.signals import pbar_start, pbar_update -from util import fetch_content, get_setting_of_type, get_url +from util import fetch_content, get_setting_of_type, get_url, run_concurrently +from util.logging import clear_forked_pbar -def update_alma_errata(concurrent_processing=True): +def update_alma_errata(concurrent_processing=True, max_workers=25): """ Update Alma Linux advisories from errata.almalinux.org: https://errata.almalinux.org/8/errata.full.json https://errata.almalinux.org/9/errata.full.json @@ -40,7 +42,7 @@ def update_alma_errata(concurrent_processing=True): ) for release in alma_releases: advisories = fetch_alma_advisories(release) - process_alma_errata(release, advisories, concurrent_processing) + process_alma_errata(release, advisories, concurrent_processing, max_workers) def fetch_alma_advisories(release): @@ -54,11 +56,11 @@ def fetch_alma_advisories(release): return advisories -def process_alma_errata(release, advisories, concurrent_processing): +def process_alma_errata(release, advisories, concurrent_processing, max_workers=25): """ Process Alma Linux Errata """ if concurrent_processing: - process_alma_errata_concurrently(release, advisories) + process_alma_errata_concurrently(release, advisories, max_workers) else: process_alma_errata_serially(release, advisories) @@ -73,24 +75,24 @@ def process_alma_errata_serially(release, advisories): pbar_update.send(sender=None, index=i + 1) -def process_alma_errata_concurrently(release, advisories): +def process_alma_errata_concurrently(release, advisories, max_workers=25): """ Process Alma Linux Errata concurrently """ - connections.close_all() elen = len(advisories) pbar_start.send(sender=None, ptext=f'Processing {elen} Alma {release} Errata', plen=elen) - i = 0 - with concurrent.futures.ProcessPoolExecutor(max_workers=25) as executor: - futures = [executor.submit(process_alma_erratum, release, advisory) for advisory in advisories] - for future in concurrent.futures.as_completed(futures): - i += 1 - pbar_update.send(sender=None, index=i + 1) + args = [(release, advisory) for advisory in advisories] + for i, _ in enumerate(run_concurrently(process_alma_erratum_wrapper, args, max_workers)): + pbar_update.send(sender=None, index=i + 1) + + +def process_alma_erratum_wrapper(args): + clear_forked_pbar() + return process_alma_erratum(*args) def process_alma_erratum(release, advisory): """ Process a single Alma Linux Erratum """ - from errata.utils import get_or_create_erratum erratum_name = advisory.get('id') issue_date = advisory.get('issued_date') synopsis = advisory.get('title') @@ -110,7 +112,6 @@ def process_alma_erratum(release, advisory): def add_alma_erratum_osreleases(e, release): """ Update OS Release for Alma Linux errata """ - from operatingsystems.utils import normalize_el_osrelease osrelease_name = normalize_el_osrelease(f'Alma Linux {release}') osrelease = get_or_create_osrelease(name=osrelease_name) e.osreleases.add(osrelease) @@ -150,7 +151,6 @@ def add_alma_erratum_packages(e, advisory): def add_alma_erratum_modules(e, advisory): """ Parse and add modules for Alma Linux errata """ - from modules.utils import get_matching_modules fixed_packages = set() modules = advisory.get('modules') for module in modules: @@ -160,8 +160,7 @@ def add_alma_erratum_modules(e, advisory): stream = module.get('stream') version = module.get('version') matching_modules = get_matching_modules(name, stream, version, context, arch) - for match in matching_modules: + for match in matching_modules.prefetch_related('packages'): for fixed_package in match.packages.all(): - match.packages.add(fixed_package) fixed_packages.add(fixed_package) e.add_fixed_packages(fixed_packages) diff --git a/errata/sources/distros/arch.py b/errata/sources/distros/arch.py index e22de403..696c9efb 100644 --- a/errata/sources/distros/arch.py +++ b/errata/sources/distros/arch.py @@ -17,8 +17,7 @@ import concurrent.futures import json -from django.db import connections - +from errata.utils import get_or_create_erratum from operatingsystems.utils import get_or_create_osrelease from packages.models import Package from packages.utils import ( @@ -26,16 +25,17 @@ ) from patchman.signals import pbar_start, pbar_update from util import fetch_content, get_url -from util.logging import error_message +from util.logging import clear_forked_pbar, error_message -def update_arch_errata(concurrent_processing=False): +def update_arch_errata(concurrent_processing=False, max_workers=25): """ Update Arch Linux Errata from the following sources: https://security.archlinux.org/advisories.json """ add_arch_linux_osrelease() advisories = fetch_arch_errata() - parse_arch_errata(advisories, concurrent_processing) + if advisories: + parse_arch_errata(advisories, concurrent_processing, max_workers) def fetch_arch_errata(): @@ -44,14 +44,16 @@ def fetch_arch_errata(): """ res = get_url('https://security.archlinux.org/advisories.json') advisories = fetch_content(res, 'Fetching Arch Advisories') + if advisories is None: + return None return json.loads(advisories) -def parse_arch_errata(advisories, concurrent_processing): +def parse_arch_errata(advisories, concurrent_processing, max_workers=25): """ Parse Arch Linux Errata Advisories """ if concurrent_processing: - parse_arch_errata_concurrently(advisories) + parse_arch_errata_concurrently(advisories, max_workers) else: parse_arch_errata_serially(advisories) @@ -67,15 +69,14 @@ def parse_arch_errata_serially(advisories): pbar_update.send(sender=None, index=i + 1) -def parse_arch_errata_concurrently(advisories): +def parse_arch_errata_concurrently(advisories, max_workers=25): """ Parse Arch Linux Errata Advisories concurrently """ osrelease = get_or_create_osrelease(name='Arch Linux') - connections.close_all() elen = len(advisories) pbar_start.send(sender=None, ptext=f'Processing {elen} Arch Advisories', plen=elen) i = 0 - with concurrent.futures.ProcessPoolExecutor(max_workers=25) as executor: + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: futures = [executor.submit(process_arch_erratum, advisory, osrelease) for advisory in advisories] for future in concurrent.futures.as_completed(futures): i += 1 @@ -85,7 +86,7 @@ def parse_arch_errata_concurrently(advisories): def process_arch_erratum(advisory, osrelease): """ Process a single Arch Linux Erratum """ - from errata.utils import get_or_create_erratum + clear_forked_pbar() try: name = advisory.get('name') issue_date = advisory.get('date') @@ -121,6 +122,8 @@ def add_arch_erratum_references(e, advisory): e.add_reference('ASA', url) raw_url = f'{url}/raw' res = get_url(raw_url) + if res is None: + return data = res.content parse_arch_erratum_raw(e, data.decode()) @@ -152,6 +155,8 @@ def add_arch_erratum_packages(e, advisory): group_id = advisory.get('group') group_url = f'https://security.archlinux.org/group/{group_id}.json' res = get_url(group_url) + if res is None: + return data = res.content group = json.loads(data) packages = group.get('packages') diff --git a/errata/sources/distros/debian.py b/errata/sources/distros/debian.py index 8025b1bf..f14e3799 100644 --- a/errata/sources/distros/debian.py +++ b/errata/sources/distros/debian.py @@ -14,27 +14,29 @@ # You should have received a copy of the GNU General Public License # along with Patchman. If not, see -import concurrent.futures import csv import re from datetime import datetime from io import StringIO from debian.deb822 import Dsc -from django.db import connections +from errata.utils import get_or_create_erratum from operatingsystems.models import OSRelease from operatingsystems.utils import get_or_create_osrelease from packages.models import Package from packages.utils import find_evr, get_or_create_package from patchman.signals import pbar_start, pbar_update -from util import extract, fetch_content, get_setting_of_type, get_url -from util.logging import error_message, warning_message +from util import ( + extract, fetch_concurrently, fetch_content, get_setting_of_type, get_url, + run_concurrently, +) +from util.logging import clear_forked_pbar, error_message, warning_message DSCs = {} -def update_debian_errata(concurrent_processing=True): +def update_debian_errata(concurrent_processing=True, max_workers=25): """ Update Debian errata using: https://salsa.debian.org/security-tracker-team/security-tracker/raw/master/data/DSA/list https://salsa.debian.org/security-tracker-team/security-tracker/raw/master/data/DSA/list @@ -46,8 +48,8 @@ def update_debian_errata(concurrent_processing=True): advisories = dsas + dlas fetch_dscs_from_debian_package_file_maps() accepted_codenames = get_accepted_debian_codenames() - errata = parse_debian_errata(advisories, accepted_codenames) - create_debian_errata(errata, accepted_codenames, concurrent_processing) + errata = parse_debian_errata(advisories, accepted_codenames, concurrent_processing, max_workers) + create_debian_errata(errata, accepted_codenames, concurrent_processing, max_workers) def fetch_debian_dsa_advisories(): @@ -108,12 +110,14 @@ def parse_debian_package_file_map(data, repo): parsing_dsc = False -def parse_debian_errata(advisories, accepted_codenames): +def parse_debian_errata(advisories, accepted_codenames, concurrent_processing=True, max_workers=25): """ Parse Debian DSA/DLA files for security advisories """ distro_pattern = re.compile(r'^\t\[(.+?)\] - .*') title_pattern = re.compile(r'^\[(.+?)\] (.+?) (.+?)[ ]+[-]+ (.*)') + distro_package_pattern = re.compile(r'^\t\[(.+?)\] - (.+?) (.*)') errata = [] + dsc_fetches = [] e = {'packages': {}, 'cve_ids': [], 'releases': []} for line in advisories.splitlines(): if line.startswith('['): @@ -132,9 +136,17 @@ def parse_debian_errata(advisories, accepted_codenames): e['releases'].append(release) if not e.get('packages').get(release): e['packages'][release] = [] - e['packages'][release].append(parse_debian_erratum_package(line, accepted_codenames)) + pkg_match = re.match(distro_package_pattern, line) + if pkg_match and pkg_match.group(1) in accepted_codenames: + source_package = pkg_match.group(2) + source_version = pkg_match.group(3) + dsc_fetches.append((source_package, source_version)) + e['packages'][release].append((source_package, source_version)) + else: + e['packages'][release].append(None) # add the last one errata = add_errata_by_codename(errata, e, accepted_codenames) + fetch_debian_dsc_package_lists(dsc_fetches, concurrent_processing, max_workers) return errata @@ -162,11 +174,11 @@ def parse_debian_erratum_advisory(e, match): return e -def create_debian_errata(errata, accepted_codenames, concurrent_processing): +def create_debian_errata(errata, accepted_codenames, concurrent_processing, max_workers=25): """ Create Debian Errata """ if concurrent_processing: - create_debian_errata_concurrently(errata, accepted_codenames) + create_debian_errata_concurrently(errata, accepted_codenames, max_workers) else: create_debian_errata_serially(errata, accepted_codenames) @@ -181,25 +193,25 @@ def create_debian_errata_serially(errata, accepted_codenames): pbar_update.send(sender=None, index=i + 1) -def create_debian_errata_concurrently(errata, accepted_codenames): +def create_debian_errata_concurrently(errata, accepted_codenames, max_workers=25): """ Create Debian Errata concurrently """ - connections.close_all() elen = len(errata) pbar_start.send(sender=None, ptext=f'Processing {elen} Debian Errata', plen=elen) - i = 0 - with concurrent.futures.ProcessPoolExecutor(max_workers=25) as executor: - futures = [executor.submit(process_debian_erratum, erratum, accepted_codenames) for erratum in errata] - for future in concurrent.futures.as_completed(futures): - i += 1 - pbar_update.send(sender=None, index=i + 1) + args = [(erratum, accepted_codenames) for erratum in errata] + for i, _ in enumerate(run_concurrently(process_debian_erratum_wrapper, args, max_workers)): + pbar_update.send(sender=None, index=i + 1) + + +def process_debian_erratum_wrapper(args): + clear_forked_pbar() + return process_debian_erratum(*args) def process_debian_erratum(erratum, accepted_codenames): """ Process a single Debian Erratum """ try: - from errata.utils import get_or_create_erratum erratum_name = erratum.get('name') e, created = get_or_create_erratum( name=erratum_name, @@ -221,19 +233,23 @@ def process_debian_erratum(erratum, accepted_codenames): error_message(text=exc) -def parse_debian_erratum_package(line, accepted_codenames): - """ Parse the codename and source package from a DSA/DLA file - Returns the source package and source version +def fetch_debian_dsc_package_lists(dsc_fetches, concurrent_processing=True, max_workers=25): + """ Fetch DSC package lists with a progress bar """ - distro_package_pattern = re.compile(r'^\t\[(.+?)\] - (.+?) (.*)') - match = re.match(distro_package_pattern, line) - if match: - codename = match.group(1) - if codename in accepted_codenames: - source_package = match.group(2) - source_version = match.group(3) - fetch_debian_dsc_package_list(source_package, source_version) - return source_package, source_version + flen = len(dsc_fetches) + pbar_start.send(sender=None, ptext=f'Fetching {flen} Debian DSC files', plen=flen) + if concurrent_processing: + for i, _ in enumerate(fetch_concurrently(fetch_dsc_worker, dsc_fetches, max_workers)): + pbar_update.send(sender=None, index=i + 1) + else: + for i, (package, version) in enumerate(dsc_fetches): + fetch_debian_dsc_package_list(package, version) + pbar_update.send(sender=None, index=i + 1) + + +def fetch_dsc_worker(item, session): + package, version = item + fetch_debian_dsc_package_list(package, version, session=session) def get_debian_dsc_package_list(package, version): @@ -246,14 +262,14 @@ def get_debian_dsc_package_list(package, version): return package_list -def fetch_debian_dsc_package_list(package, version): +def fetch_debian_dsc_package_list(package, version, session=None): """ Fetch the package list from a DSC file for a given source package/version """ if not DSCs.get(package) or not DSCs[package].get(version): warning_message(text=f'No DSC found for {package} {version}') return source_url = DSCs[package][version]['url'] - res = get_url(source_url) + res = get_url(source_url, session=session) data = res.content dsc = Dsc(data.decode()) package_list = dsc.get('package-list') @@ -301,6 +317,8 @@ def create_debian_os_releases(codename_to_version): def process_debian_erratum_fixed_packages(e, package_data): """ Process packages fixed in a Debian errata """ + if not package_data: + return source_package, source_version = package_data epoch, ver, rel = find_evr(source_version) package_list = get_debian_dsc_package_list(source_package, source_version) diff --git a/errata/sources/distros/rocky.py b/errata/sources/distros/rocky.py index 272ac2a7..034e043f 100644 --- a/errata/sources/distros/rocky.py +++ b/errata/sources/distros/rocky.py @@ -17,28 +17,33 @@ import concurrent.futures import json -from django.db import connections from django.db.utils import OperationalError from tenacity import ( retry, retry_if_exception_type, stop_after_attempt, wait_exponential, ) +from errata.utils import get_or_create_erratum +from modules.models import Module +from modules.utils import get_matching_modules from operatingsystems.utils import get_or_create_osrelease from packages.models import Package from packages.utils import get_or_create_package, parse_package_string from patchman.signals import pbar_start, pbar_update -from util import fetch_content, get_url -from util.logging import error_message, info_message +from util import fetch_content, get_url, run_concurrently +from util.logging import clear_forked_pbar, error_message, info_message -def update_rocky_errata(concurrent_processing=True): +def update_rocky_errata(concurrent_processing=True, max_workers=25): """ Update Rocky Linux errata """ rocky_errata_api_host = 'https://apollo.build.resf.org' rocky_errata_api_url = '/api/v3/' if check_rocky_errata_endpoint_health(rocky_errata_api_host): - advisories = fetch_rocky_advisories(rocky_errata_api_host, rocky_errata_api_url, concurrent_processing) - process_rocky_errata(advisories, concurrent_processing) + advisories = fetch_rocky_advisories( + rocky_errata_api_host, rocky_errata_api_url, + concurrent_processing, max_workers, + ) + process_rocky_errata(advisories, concurrent_processing, max_workers) def check_rocky_errata_endpoint_health(rocky_errata_api_host): @@ -66,11 +71,11 @@ def check_rocky_errata_endpoint_health(rocky_errata_api_host): return False -def fetch_rocky_advisories(rocky_errata_api_host, rocky_errata_api_url, concurrent_processing): +def fetch_rocky_advisories(rocky_errata_api_host, rocky_errata_api_url, concurrent_processing, max_workers=25): """ Fetch Rocky Linux advisories and return the list """ if concurrent_processing: - return fetch_rocky_advisories_concurrently(rocky_errata_api_host, rocky_errata_api_url) + return fetch_rocky_advisories_concurrently(rocky_errata_api_host, rocky_errata_api_url, max_workers) else: return fetch_rocky_advisories_serially(rocky_errata_api_host, rocky_errata_api_url) @@ -103,7 +108,7 @@ def fetch_rocky_advisories_serially(rocky_errata_api_host, rocky_errata_api_url) return advisories -def fetch_rocky_advisories_concurrently(rocky_errata_api_host, rocky_errata_api_url): +def fetch_rocky_advisories_concurrently(rocky_errata_api_host, rocky_errata_api_url, max_workers=25): """ Fetch Rocky Linux advisories concurrently and return the list """ rocky_errata_advisories_url = rocky_errata_api_host + rocky_errata_api_url + 'advisories/' @@ -119,7 +124,7 @@ def fetch_rocky_advisories_concurrently(rocky_errata_api_host, rocky_errata_api_ ptext = 'Fetching Rocky Advisories' pbar_start.send(sender=None, ptext=ptext, plen=pages) i = 0 - with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor: + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: futures = [executor.submit(get_rocky_advisory, rocky_errata_advisories_url, page) for page in range(1, pages + 1)] for future in concurrent.futures.as_completed(futures): @@ -140,11 +145,11 @@ def get_rocky_advisory(rocky_errata_advisories_url, page): return advisories_dict.get('advisories') -def process_rocky_errata(advisories, concurrent_processing): +def process_rocky_errata(advisories, concurrent_processing, max_workers=25): """ Process Rocky Linux Errata """ if concurrent_processing: - process_rocky_errata_concurrently(advisories) + process_rocky_errata_concurrently(advisories, max_workers) else: process_rocky_errata_serially(advisories) @@ -159,18 +164,18 @@ def process_rocky_errata_serially(advisories): pbar_update.send(sender=None, index=i + 1) -def process_rocky_errata_concurrently(advisories): +def process_rocky_errata_concurrently(advisories, max_workers=25): """ Process Rocky Linux errata concurrently """ - connections.close_all() elen = len(advisories) pbar_start.send(sender=None, ptext=f'Processing {elen} Rocky Errata', plen=elen) - i = 0 - with concurrent.futures.ProcessPoolExecutor(max_workers=25) as executor: - futures = [executor.submit(process_rocky_erratum, advisory) for advisory in advisories] - for future in concurrent.futures.as_completed(futures): - i += 1 - pbar_update.send(sender=None, index=i + 1) + for i, _ in enumerate(run_concurrently(process_rocky_erratum_wrapper, advisories, max_workers)): + pbar_update.send(sender=None, index=i + 1) + + +def process_rocky_erratum_wrapper(advisory): + clear_forked_pbar() + return process_rocky_erratum(advisory) @retry( @@ -181,7 +186,6 @@ def process_rocky_errata_concurrently(advisories): def process_rocky_erratum(advisory): """ Process a single Rocky Linux erratum """ - from errata.utils import get_or_create_erratum try: erratum_name = advisory.get('name') e_type = advisory.get('kind').lower().replace(' ', '') @@ -230,9 +234,9 @@ def add_rocky_erratum_oses(e, advisory): def add_rocky_erratum_packages(e, advisory): """ Parse and add packages for Rocky Linux errata """ - from modules.utils import get_matching_modules packages = advisory.get('packages') fixed_packages = set() + module_package_adds = {} for package in packages: package_name = package.get('nevra') if package_name: @@ -253,5 +257,9 @@ def add_rocky_erratum_packages(e, advisory): arch, ) for match in matching_modules: - match.packages.add(fixed_package) + if match.pk not in module_package_adds: + module_package_adds[match.pk] = set() + module_package_adds[match.pk].add(fixed_package) + for module_pk, pkgs in module_package_adds.items(): + Module.objects.get(pk=module_pk).packages.add(*pkgs) e.add_fixed_packages(fixed_packages) diff --git a/errata/sources/distros/ubuntu.py b/errata/sources/distros/ubuntu.py index 5616331f..0ef25962 100644 --- a/errata/sources/distros/ubuntu.py +++ b/errata/sources/distros/ubuntu.py @@ -14,15 +14,13 @@ # You should have received a copy of the GNU General Public License # along with Patchman. If not, see -import concurrent.futures import csv import json import os from io import StringIO from urllib.parse import urlparse -from django.db import connections - +from errata.utils import get_or_create_erratum from operatingsystems.models import OSRelease, OSVariant from operatingsystems.utils import get_or_create_osrelease from packages.models import Package @@ -33,11 +31,12 @@ from patchman.signals import pbar_start, pbar_update from util import ( bunzip2, fetch_content, get_setting_of_type, get_sha256, get_url, + run_concurrently, ) -from util.logging import error_message +from util.logging import clear_forked_pbar, error_message -def update_ubuntu_errata(concurrent_processing=False): +def update_ubuntu_errata(concurrent_processing=True, max_workers=25): """ Update Ubuntu errata """ codenames = retrieve_ubuntu_codenames() @@ -47,7 +46,7 @@ def update_ubuntu_errata(concurrent_processing=False): expected_checksum = fetch_ubuntu_usn_db_checksum() actual_checksum = get_sha256(data) if actual_checksum == expected_checksum: - parse_usn_data(data, concurrent_processing) + parse_usn_data(data, concurrent_processing, max_workers) else: e = 'Ubuntu USN DB checksum mismatch, skipping Ubuntu errata parsing\n' e += f'{actual_checksum} (actual) != {expected_checksum} (expected)' @@ -70,14 +69,14 @@ def fetch_ubuntu_usn_db_checksum(): return fetch_content(res, 'Fetching Ubuntu Errata Checksum').decode().split()[0] -def parse_usn_data(data, concurrent_processing): +def parse_usn_data(data, concurrent_processing, max_workers=25): """ Parse the Ubuntu USN data """ accepted_releases = get_accepted_ubuntu_codenames() extracted = bunzip2(data).decode() advisories = json.loads(extracted) if concurrent_processing: - parse_usn_data_concurrently(advisories, accepted_releases) + parse_usn_data_concurrently(advisories, accepted_releases, max_workers) else: parse_usn_data_serially(advisories, accepted_releases) @@ -92,25 +91,24 @@ def parse_usn_data_serially(advisories, accepted_releases): pbar_update.send(sender=None, index=i + 1) -def parse_usn_data_concurrently(advisories, accepted_releases): +def parse_usn_data_concurrently(advisories, accepted_releases, max_workers=25): """ Parse the Ubuntu USN data concurrently """ - connections.close_all() elen = len(advisories) pbar_start.send(sender=None, ptext=f'Processing {elen} Ubuntu Errata', plen=elen) - i = 0 - with concurrent.futures.ProcessPoolExecutor(max_workers=25) as executor: - futures = [executor.submit(process_usn, usn_id, advisory, accepted_releases) - for usn_id, advisory in advisories.items()] - for future in concurrent.futures.as_completed(futures): - i += 1 - pbar_update.send(sender=None, index=i + 1) + args = [(usn_id, advisory, accepted_releases) for usn_id, advisory in advisories.items()] + for i, _ in enumerate(run_concurrently(process_usn_wrapper, args, max_workers)): + pbar_update.send(sender=None, index=i + 1) + + +def process_usn_wrapper(args): + clear_forked_pbar() + return process_usn(*args) def process_usn(usn_id, advisory, accepted_releases): """ Process a single USN advisory """ - from errata.utils import get_or_create_erratum try: affected_releases = advisory.get('releases', {}).keys() if not release_is_affected(affected_releases, accepted_releases): @@ -208,7 +206,7 @@ def get_accepted_ubuntu_codenames(): """ Get acceptable Ubuntu OS codenames Can be overridden by specifying UBUNTU_CODENAMES in settings """ - default_codenames = ['jammy', 'noble'] + default_codenames = ['jammy', 'noble', 'resolute'] accepted_codenames = get_setting_of_type( setting_name='UBUNTU_CODENAMES', setting_type=list, diff --git a/errata/sources/repos/yum.py b/errata/sources/repos/yum.py index 72813436..424e688d 100644 --- a/errata/sources/repos/yum.py +++ b/errata/sources/repos/yum.py @@ -14,11 +14,9 @@ # You should have received a copy of the GNU General Public License # along with Patchman. If not, see -import concurrent.futures - -from django.db import connections - from errata.models import Erratum from packages.models import PackageUpdate from patchman.signals import pbar_start, pbar_update -from util import tz_aware_datetime +from util import fetch_concurrently, run_concurrently, tz_aware_datetime from util.logging import warning_message @@ -62,19 +58,25 @@ def get_or_create_erratum(name, e_type, issue_date, synopsis): return e, created -def mark_errata_security_updates(): +def mark_errata_security_updates(concurrent_processing=True, max_workers=25): """ For each set of erratum packages, modify any PackageUpdate that should be marked as a security update. """ - connections.close_all() elen = Erratum.objects.count() pbar_start.send(sender=None, ptext=f'Scanning {elen} Errata for security updates', plen=elen) - i = 0 - with concurrent.futures.ProcessPoolExecutor(max_workers=25) as executor: - futures = [executor.submit(e.scan_for_security_updates) for e in Erratum.objects.all()] - for future in concurrent.futures.as_completed(futures): + if concurrent_processing: + pks = list(Erratum.objects.values_list('pk', flat=True)) + for i, _ in enumerate(run_concurrently(scan_security_worker, pks, max_workers)): + pbar_update.send(sender=None, index=i + 1) + else: + for i, e in enumerate(Erratum.objects.all()): + e.scan_for_security_updates() pbar_update.send(sender=None, index=i + 1) - i += 1 + + +def scan_security_worker(pk): + e = Erratum.objects.get(pk=pk) + return e.scan_for_security_updates() def scan_package_updates_for_affected_packages(): @@ -88,15 +90,36 @@ def scan_package_updates_for_affected_packages(): e.affected_packages.add(pu.oldpackage) -def enrich_errata(): +def enrich_errata(concurrent_processing=True, max_workers=25): """ Enrich Errata with data from osv.dev """ - connections.close_all() - elen = Erratum.objects.count() - pbar_start.send(sender=None, ptext=f'Adding osv.dev data to {elen} Errata', plen=elen) - i = 0 - with concurrent.futures.ProcessPoolExecutor(max_workers=25) as executor: - futures = [executor.submit(e.fetch_osv_dev_data) for e in Erratum.objects.all()] - for future in concurrent.futures.as_completed(futures): + errata = list(Erratum.objects.all()) + elen = len(errata) + + # phase 1: fetch osv.dev data + pbar_start.send(sender=None, ptext=f'Fetching osv.dev data for {elen} Errata', plen=elen) + results = [] + if concurrent_processing: + for i, result in enumerate(fetch_concurrently(fetch_osv_worker, errata, max_workers)): + erratum, osv_data = result + if osv_data is not None: + results.append((erratum, osv_data)) + pbar_update.send(sender=None, index=i + 1) + else: + for i, e in enumerate(errata): + osv_data = e.fetch_osv_dev_data() + if osv_data is not None: + results.append((e, osv_data)) + pbar_update.send(sender=None, index=i + 1) + + # phase 2: parse and write to db (serial, no lock contention) + rlen = len(results) + if rlen > 0: + pbar_start.send(sender=None, ptext=f'Parsing osv.dev data for {rlen} Errata', plen=rlen) + for i, (erratum, osv_data) in enumerate(results): + erratum.parse_osv_dev_data(osv_data) pbar_update.send(sender=None, index=i + 1) - i += 1 + + +def fetch_osv_worker(erratum, session): + return (erratum, erratum.fetch_osv_dev_data(session)) diff --git a/etc/patchman/local_settings.py b/etc/patchman/local_settings.py index 6705f44c..e622a02b 100644 --- a/etc/patchman/local_settings.py +++ b/etc/patchman/local_settings.py @@ -51,7 +51,7 @@ DEBIAN_CODENAMES = ['bookworm', 'trixie'] # list of Ubuntu Linux releases to update -UBUNTU_CODENAMES = ['jammy', 'noble'] +UBUNTU_CODENAMES = ['jammy', 'noble', 'resolute'] # Whether to run patchman under the gunicorn web server RUN_GUNICORN = False diff --git a/hosts/migrations/0013_alter_hostrepo_unique_together_and_more.py b/hosts/migrations/0013_alter_hostrepo_unique_together_and_more.py new file mode 100644 index 00000000..bde135a0 --- /dev/null +++ b/hosts/migrations/0013_alter_hostrepo_unique_together_and_more.py @@ -0,0 +1,21 @@ +# Generated by Django 4.2.28 on 2026-04-01 03:15 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('hosts', '0012_backfill_cached_counts'), + ] + + operations = [ + migrations.AlterUniqueTogether( + name='hostrepo', + unique_together=set(), + ), + migrations.AddConstraint( + model_name='hostrepo', + constraint=models.UniqueConstraint(fields=('host', 'repo'), name='unique_hostrepo'), + ), + ] diff --git a/hosts/models.py b/hosts/models.py index 37e67779..62f54947 100644 --- a/hosts/models.py +++ b/hosts/models.py @@ -576,11 +576,14 @@ def find_deb_kernel_updates(self, kernel_packages, repo_packages, hostrepos): break if prefix is None or prefix in processed_prefixes: continue - processed_prefixes.add(prefix) - # extract kernel series (e.g. '6.8') to avoid cross-track - # comparisons (GA 6.8 vs HWE 6.17 in the same repo) + # skip meta-packages (e.g. linux-image-generic) that have no series; + # the versioned package (e.g. linux-image-5.15.0-176-generic) will + # handle the update check and correctly filter cross-series packages installed_series = self.get_deb_kernel_series(pkg_name) + if installed_series is None: + continue + processed_prefixes.add(prefix) # build endswith filter for flavoured kernels name_filter = Q( @@ -646,7 +649,12 @@ class HostRepo(models.Model): priority = models.IntegerField(default=0) class Meta: - unique_together = ['host', 'repo'] + constraints = [ + models.UniqueConstraint( + fields=['host', 'repo'], + name='unique_hostrepo', + ), + ] ordering = ['host', 'repo'] def __str__(self): diff --git a/modules/migrations/0007_alter_module_unique_together_module_unique_module.py b/modules/migrations/0007_alter_module_unique_together_module_unique_module.py new file mode 100644 index 00000000..bbe8585b --- /dev/null +++ b/modules/migrations/0007_alter_module_unique_together_module_unique_module.py @@ -0,0 +1,21 @@ +# Generated by Django 4.2.28 on 2026-04-01 03:15 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('modules', '0006_alter_module_context_alter_module_name_and_more'), + ] + + operations = [ + migrations.AlterUniqueTogether( + name='module', + unique_together=set(), + ), + migrations.AddConstraint( + model_name='module', + constraint=models.UniqueConstraint(fields=('name', 'stream', 'version', 'context', 'arch', 'repo'), name='unique_module'), + ), + ] diff --git a/modules/models.py b/modules/models.py index aa06a41b..bbad4c09 100644 --- a/modules/models.py +++ b/modules/models.py @@ -35,7 +35,12 @@ class Module(models.Model): class Meta: verbose_name = 'Module' verbose_name_plural = 'Modules' - unique_together = ['name', 'stream', 'version', 'context', 'arch', 'repo'] + constraints = [ + models.UniqueConstraint( + fields=['name', 'stream', 'version', 'context', 'arch', 'repo'], + name='unique_module', + ), + ] ordering = ['name', 'stream'] def __str__(self): diff --git a/operatingsystems/migrations/0012_alter_osrelease_unique_together_and_more.py b/operatingsystems/migrations/0012_alter_osrelease_unique_together_and_more.py new file mode 100644 index 00000000..8f7a5e11 --- /dev/null +++ b/operatingsystems/migrations/0012_alter_osrelease_unique_together_and_more.py @@ -0,0 +1,21 @@ +# Generated by Django 4.2.28 on 2026-04-01 03:15 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('operatingsystems', '0011_alter_osrelease_codename_alter_osrelease_cpe_name_and_more'), + ] + + operations = [ + migrations.AlterUniqueTogether( + name='osrelease', + unique_together=set(), + ), + migrations.AddConstraint( + model_name='osrelease', + constraint=models.UniqueConstraint(fields=('name', 'codename', 'cpe_name'), name='unique_osrelease'), + ), + ] diff --git a/operatingsystems/models.py b/operatingsystems/models.py index 220cbdb3..5e472170 100644 --- a/operatingsystems/models.py +++ b/operatingsystems/models.py @@ -35,7 +35,12 @@ class OSRelease(models.Model): class Meta: verbose_name = 'Operating System Release' verbose_name_plural = 'Operating System Releases' - unique_together = ['name', 'codename', 'cpe_name'] + constraints = [ + models.UniqueConstraint( + fields=['name', 'codename', 'cpe_name'], + name='unique_osrelease', + ), + ] ordering = ['name'] def __str__(self): diff --git a/packages/migrations/0008_alter_package_unique_together_and_more.py b/packages/migrations/0008_alter_package_unique_together_and_more.py new file mode 100644 index 00000000..7a03ee2c --- /dev/null +++ b/packages/migrations/0008_alter_package_unique_together_and_more.py @@ -0,0 +1,29 @@ +# Generated by Django 4.2.28 on 2026-04-01 03:15 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('packages', '0007_alter_package_epoch_alter_package_release_and_more'), + ] + + operations = [ + migrations.AlterUniqueTogether( + name='package', + unique_together=set(), + ), + migrations.AlterUniqueTogether( + name='packageupdate', + unique_together=set(), + ), + migrations.AddConstraint( + model_name='package', + constraint=models.UniqueConstraint(fields=('name', 'epoch', 'version', 'release', 'arch', 'packagetype', 'category'), name='unique_package'), + ), + migrations.AddConstraint( + model_name='packageupdate', + constraint=models.UniqueConstraint(fields=('oldpackage', 'newpackage', 'security'), name='unique_packageupdate'), + ), + ] diff --git a/packages/models.py b/packages/models.py index 99ceaec3..e3446c95 100644 --- a/packages/models.py +++ b/packages/models.py @@ -87,7 +87,12 @@ class Package(models.Model): class Meta: ordering = ['name', 'epoch', 'version', 'release', 'arch'] - unique_together = ['name', 'epoch', 'version', 'release', 'arch', 'packagetype', 'category'] + constraints = [ + models.UniqueConstraint( + fields=['name', 'epoch', 'version', 'release', 'arch', 'packagetype', 'category'], + name='unique_package', + ), + ] def __str__(self): if self.epoch: @@ -226,7 +231,12 @@ class PackageUpdate(models.Model): security = models.BooleanField(default=False) class Meta: - unique_together = ['oldpackage', 'newpackage', 'security'] + constraints = [ + models.UniqueConstraint( + fields=['oldpackage', 'newpackage', 'security'], + name='unique_packageupdate', + ), + ] ordering = ['oldpackage', 'newpackage'] def __str__(self): diff --git a/patchman/settings.py b/patchman/settings.py index 9fe4b84f..cb86644d 100644 --- a/patchman/settings.py +++ b/patchman/settings.py @@ -67,7 +67,6 @@ LANGUAGE_CODE = 'en-us' TIME_ZONE = 'America/NewYork' USE_I18N = True -USE_L10N = True USE_TZ = True DEFAULT_AUTO_FIELD = 'django.db.models.AutoField' diff --git a/patchman/urls.py b/patchman/urls.py index 9f5065fd..e698bed8 100644 --- a/patchman/urls.py +++ b/patchman/urls.py @@ -16,9 +16,8 @@ # along with If not, see from django.conf import settings -from django.conf.urls import handler404, handler500, include # noqa from django.contrib import admin -from django.urls import path +from django.urls import include, path from django.views import static from rest_framework import routers diff --git a/reports/tests/test_parsing.py b/reports/tests/test_parsing.py index f9c94098..e5644a92 100644 --- a/reports/tests/test_parsing.py +++ b/reports/tests/test_parsing.py @@ -18,7 +18,7 @@ from packages.models import Package from reports.utils import ( - _get_package_type, _get_repo_type, parse_packages, parse_repos, + get_package_type, get_repo_type, parse_packages, parse_repos, process_repo_text, ) from repos.models import Repository @@ -122,32 +122,32 @@ def test_parse_repos_strips_quotes(self): CACHES={'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}} ) class GetPackageTypeTests(TestCase): - """Tests for _get_package_type() function.""" + """Tests for get_package_type() function.""" - def test_get_package_type_deb(self): + def testget_package_type_deb(self): """Test DEB package type detection.""" - self.assertEqual(_get_package_type('deb'), Package.DEB) - self.assertEqual(_get_package_type('DEB'), Package.DEB) - self.assertEqual(_get_package_type('Deb'), Package.DEB) + self.assertEqual(get_package_type('deb'), Package.DEB) + self.assertEqual(get_package_type('DEB'), Package.DEB) + self.assertEqual(get_package_type('Deb'), Package.DEB) - def test_get_package_type_rpm(self): + def testget_package_type_rpm(self): """Test RPM package type detection.""" - self.assertEqual(_get_package_type('rpm'), Package.RPM) - self.assertEqual(_get_package_type('RPM'), Package.RPM) + self.assertEqual(get_package_type('rpm'), Package.RPM) + self.assertEqual(get_package_type('RPM'), Package.RPM) - def test_get_package_type_arch(self): + def testget_package_type_arch(self): """Test Arch package type detection.""" - self.assertEqual(_get_package_type('arch'), Package.ARCH) + self.assertEqual(get_package_type('arch'), Package.ARCH) - def test_get_package_type_gentoo(self): + def testget_package_type_gentoo(self): """Test Gentoo package type detection.""" - self.assertEqual(_get_package_type('gentoo'), Package.GENTOO) + self.assertEqual(get_package_type('gentoo'), Package.GENTOO) - def test_get_package_type_unknown(self): + def testget_package_type_unknown(self): """Test unknown package type returns UNKNOWN.""" - self.assertEqual(_get_package_type(''), Package.UNKNOWN) - self.assertEqual(_get_package_type('invalid'), Package.UNKNOWN) - self.assertEqual(_get_package_type(None), Package.UNKNOWN) + self.assertEqual(get_package_type(''), Package.UNKNOWN) + self.assertEqual(get_package_type('invalid'), Package.UNKNOWN) + self.assertEqual(get_package_type(None), Package.UNKNOWN) @override_settings( @@ -155,30 +155,30 @@ def test_get_package_type_unknown(self): CACHES={'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}} ) class GetRepoTypeTests(TestCase): - """Tests for _get_repo_type() function.""" + """Tests for get_repo_type() function.""" - def test_get_repo_type_deb(self): + def testget_repo_type_deb(self): """Test DEB repo type detection.""" - self.assertEqual(_get_repo_type('deb'), Repository.DEB) - self.assertEqual(_get_repo_type('DEB'), Repository.DEB) + self.assertEqual(get_repo_type('deb'), Repository.DEB) + self.assertEqual(get_repo_type('DEB'), Repository.DEB) - def test_get_repo_type_rpm(self): + def testget_repo_type_rpm(self): """Test RPM repo type detection.""" - self.assertEqual(_get_repo_type('rpm'), Repository.RPM) - self.assertEqual(_get_repo_type('RPM'), Repository.RPM) + self.assertEqual(get_repo_type('rpm'), Repository.RPM) + self.assertEqual(get_repo_type('RPM'), Repository.RPM) - def test_get_repo_type_arch(self): + def testget_repo_type_arch(self): """Test Arch repo type detection.""" - self.assertEqual(_get_repo_type('arch'), Repository.ARCH) + self.assertEqual(get_repo_type('arch'), Repository.ARCH) - def test_get_repo_type_gentoo(self): + def testget_repo_type_gentoo(self): """Test Gentoo repo type detection.""" - self.assertEqual(_get_repo_type('gentoo'), Repository.GENTOO) + self.assertEqual(get_repo_type('gentoo'), Repository.GENTOO) - def test_get_repo_type_unknown(self): + def testget_repo_type_unknown(self): """Test unknown repo type returns None.""" - self.assertIsNone(_get_repo_type('')) - self.assertIsNone(_get_repo_type('invalid')) + self.assertIsNone(get_repo_type('')) + self.assertIsNone(get_repo_type('invalid')) @override_settings( diff --git a/reports/utils.py b/reports/utils.py index 4fea67d3..45c26d98 100644 --- a/reports/utils.py +++ b/reports/utils.py @@ -34,7 +34,9 @@ from patchman.signals import pbar_start, pbar_update from repos.models import Mirror, MirrorPackage, Repository from repos.utils import get_or_create_repo -from util.logging import debug_message, error_message, info_message +from util.logging import ( + debug_message, error_message, info_message, warning_message, +) def process_repos(report, host): @@ -216,7 +218,7 @@ def parse_repos(repos_string): return repos -def _get_repo_type(type_str): +def get_repo_type(type_str): """ Convert repo type string to Repository constant """ type_str = type_str.lower() @@ -242,6 +244,9 @@ def process_repo(r_type, r_name, r_id, r_priority, urls, arch): for r_url in urls: if r_type == Repository.GENTOO and r_url.startswith('rsync'): r_url = 'https://api.gentoo.org/mirrors/distfiles.xml' + if not r_url.startswith(('http://', 'https://')): + warning_message(text=f'Skipping non-http(s) mirror URL: {r_url}') + continue try: mirror = Mirror.objects.get(url=r_url.strip('/')) except Mirror.DoesNotExist: @@ -259,7 +264,8 @@ def process_repo(r_type, r_name, r_id, r_priority, urls, arch): repository.repo_id = r_id for url in unknown: - Mirror.objects.create(repo=repository, url=url.rstrip('/')) + if url.startswith(('http://', 'https://')): + Mirror.objects.create(repo=repository, url=url.rstrip('/')) for mirror in Mirror.objects.filter(repo=repository).values('url'): mirror_url = mirror.get('url') @@ -364,7 +370,7 @@ def parse_packages(packages_string): return packages -def _get_package_type(type_str): +def get_package_type(type_str): """ Convert package type string to Package constant """ type_str = type_str.lower() if type_str else '' @@ -398,7 +404,7 @@ def process_package_text(pkg): rel = pkg[3] if pkg[3] else '' arch = pkg[4] if pkg[4] else 'unknown' - p_type = _get_package_type(pkg[5]) + p_type = get_package_type(pkg[5]) p_category = pkg[6] if p_type == Package.GENTOO and len(pkg) > 6 else None p_repo = pkg[7] if p_type == Package.GENTOO and len(pkg) > 7 else None @@ -413,7 +419,7 @@ def process_package_json(pkg): ver = pkg.get('version', '') rel = pkg.get('release', '') arch = pkg.get('arch', 'unknown') - p_type = _get_package_type(pkg.get('type', '')) + p_type = get_package_type(pkg.get('type', '')) p_category = pkg.get('category') if p_type == Package.GENTOO else None p_repo = pkg.get('repo') if p_type == Package.GENTOO else None @@ -453,7 +459,7 @@ def process_packages_json(packages_json, host): def process_repo_json(repo, arch): """ Processes a single JSON repo dict and converts to a repo object """ - r_type = _get_repo_type(repo.get('type', '')) + r_type = get_repo_type(repo.get('type', '')) if r_type is None: return None, 0 diff --git a/reports/views.py b/reports/views.py index 2aab3e54..44cf65b9 100644 --- a/reports/views.py +++ b/reports/views.py @@ -304,8 +304,8 @@ def create(self, request): data = serializer.validated_data # Extract client IP - x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR') - x_real_ip = request.META.get('HTTP_X_REAL_IP') + x_forwarded_for = request.headers.get('x-forwarded-for') + x_real_ip = request.headers.get('x-real-ip') if x_forwarded_for: report_ip = x_forwarded_for.split(',')[0] elif x_real_ip: @@ -336,7 +336,7 @@ def create(self, request): os=data['os'], report_ip=report_ip, protocol='2', - useragent=request.META.get('HTTP_USER_AGENT', ''), + useragent=request.headers.get('user-agent', ''), packages=json.dumps(data.get('packages', [])), repos=json.dumps(data.get('repos', [])), modules=json.dumps(data.get('modules', [])), diff --git a/repos/utils.py b/repos/utils.py index 0d81eb25..87bc2192 100644 --- a/repos/utils.py +++ b/repos/utils.py @@ -197,6 +197,9 @@ def check_for_mirrorlists(repo): Creates MAX_MIRRORS mirrors from list if so. """ for mirror in repo.mirror_set.all(): + if not mirror.url.startswith(('http://', 'https://')): + warning_message(text=f'Skipping non-http(s) mirror URL: {mirror.url}') + continue mirror_urls = get_mirrorlist_urls(mirror.url) if mirror_urls: mirror.mirrorlist = True diff --git a/requirements.txt b/requirements.txt index 62441847..cdfbb825 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ django-filter==25.1 humanize==4.12.1 version-utils==0.3.2 python-magic==0.4.27 -gitpython==3.1.44 +gitpython==3.1.47 tenacity==8.2.3 celery==5.4.0 redis==6.4.0 diff --git a/sbin/patchman b/sbin/patchman index ea81801a..7040f228 100755 --- a/sbin/patchman +++ b/sbin/patchman @@ -46,6 +46,7 @@ from reports.tasks import remove_reports_with_no_hosts from repos.models import Repository from repos.utils import clean_repos from security.utils import update_cves, update_cwes +from util import get_setting_of_type from util.logging import info_message, set_quiet_mode @@ -482,10 +483,20 @@ def process_args(args): dns_checks(args.host) showhelp = False if args.update_errata: + concurrent = get_setting_of_type( + setting_name='CONCURRENT_PROCESSING', + setting_type=bool, + default=True, + ) + max_workers = get_setting_of_type( + setting_name='CONCURRENT_WORKERS', + setting_type=int, + default=25, + ) update_errata(args.erratum_type, args.force, args.repo) scan_package_updates_for_affected_packages() - mark_errata_security_updates() - enrich_errata() + mark_errata_security_updates(concurrent, max_workers) + enrich_errata(concurrent, max_workers) showhelp = False if args.update_cves: update_cves(args.cve, args.fetch_nist_data) diff --git a/security/migrations/0011_alter_cvss_unique_together_and_more.py b/security/migrations/0011_alter_cvss_unique_together_and_more.py new file mode 100644 index 00000000..6051bd5d --- /dev/null +++ b/security/migrations/0011_alter_cvss_unique_together_and_more.py @@ -0,0 +1,29 @@ +# Generated by Django 4.2.28 on 2026-04-01 03:15 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('security', '0010_fix_cvss_vector_string_length'), + ] + + operations = [ + migrations.AlterUniqueTogether( + name='cvss', + unique_together=set(), + ), + migrations.AlterUniqueTogether( + name='reference', + unique_together=set(), + ), + migrations.AddConstraint( + model_name='cvss', + constraint=models.UniqueConstraint(fields=('score', 'severity', 'version', 'vector_string'), name='unique_cvss'), + ), + migrations.AddConstraint( + model_name='reference', + constraint=models.UniqueConstraint(fields=('ref_type', 'url'), name='unique_reference'), + ), + ] diff --git a/security/models.py b/security/models.py index aab22882..9b7dd5b3 100644 --- a/security/models.py +++ b/security/models.py @@ -32,7 +32,12 @@ class Reference(models.Model): url = models.URLField(max_length=512) class Meta: - unique_together = ['ref_type', 'url'] + constraints = [ + models.UniqueConstraint( + fields=['ref_type', 'url'], + name='unique_reference', + ), + ] ordering = ['ref_type', 'url'] def __str__(self): @@ -87,7 +92,12 @@ class CVSS(models.Model): vector_string = models.CharField(max_length=255, blank=True, null=True) class Meta: - unique_together = ['score', 'severity', 'version', 'vector_string'] + constraints = [ + models.UniqueConstraint( + fields=['score', 'severity', 'version', 'vector_string'], + name='unique_cvss', + ), + ] def __str__(self): return f'{self.score} ({self.severity}) [{self.vector_string}]' diff --git a/security/tests/test_models.py b/security/tests/test_models.py index c04acf89..b2c63f84 100644 --- a/security/tests/test_models.py +++ b/security/tests/test_models.py @@ -16,6 +16,7 @@ from decimal import Decimal +from django.db import IntegrityError from django.test import TestCase, override_settings from security.models import CVE, CVSS, CWE, Reference @@ -47,7 +48,6 @@ def test_cve_get_absolute_url(self): def test_cve_unique_id(self): """Test CVE cve_id is unique.""" CVE.objects.create(cve_id='CVE-2024-12345') - from django.db import IntegrityError with self.assertRaises(IntegrityError): CVE.objects.create(cve_id='CVE-2024-12345') @@ -90,7 +90,6 @@ def test_cwe_get_absolute_url(self): def test_cwe_unique_id(self): """Test CWE cwe_id is unique.""" CWE.objects.create(cwe_id='CWE-79', name='XSS') - from django.db import IntegrityError with self.assertRaises(IntegrityError): CWE.objects.create(cwe_id='CWE-79', name='Different name') @@ -155,12 +154,11 @@ def test_reference_str(self): self.assertIn('example.com', str_repr) def test_reference_unique_together(self): - """Test Reference unique_together constraint.""" + """Test Reference unique constraint.""" Reference.objects.create( url='https://example.com/advisory', ref_type='VENDOR', ) - from django.db import IntegrityError with self.assertRaises(IntegrityError): Reference.objects.create( url='https://example.com/advisory', diff --git a/util/__init__.py b/util/__init__.py index 7797f522..e823efea 100644 --- a/util/__init__.py +++ b/util/__init__.py @@ -101,7 +101,7 @@ def fetch_content(response, text='', ljust=35): wait=wait_exponential(multiplier=1, min=1, max=10), reraise=False, ) -def get_url(url, headers=None, params=None): +def get_url(url, headers=None, params=None, session=None): """ Perform a http GET on a URL. Return None on error. """ response = None @@ -109,9 +109,10 @@ def get_url(url, headers=None, params=None): headers = {} if not params: params = {} + requester = session or requests try: debug_message(text=f'Trying {url} headers:{headers} params:{params}') - response = requests.get(url, headers=headers, params=params, stream=True, proxies=proxies, timeout=30) + response = requester.get(url, headers=headers, params=params, stream=True, proxies=proxies, timeout=30) debug_message(text=f'{response.status_code}: {response.headers}') if response.status_code in [403, 404]: return response @@ -120,6 +121,8 @@ def get_url(url, headers=None, params=None): error_message(text=f'Too many redirects - {url}') except ConnectionError: error_message(text=f'Connection error - {url}') + except requests.exceptions.InvalidSchema: + error_message(text=f'Unsupported URL scheme - {url}') return response @@ -193,16 +196,18 @@ def unzstd(contents): """ unzstd contents in memory and return the data """ try: - zstddata = zstd.ZstdDecompressor().stream_reader(contents).read() - return zstddata - except zstd.ZstdError as e: + if hasattr(zstd, 'decompress'): + return zstd.decompress(contents) + return zstd.ZstdDecompressor().stream_reader(contents).read() + except (zstd.ZstdError, Exception) as e: error_message(text=f'zstd: {e}') def extract(data, fmt): """ Extract the contents based on mimetype or file ending. Return the unmodified data if neither mimetype nor file ending matches, otherwise - return the extracted contents. + return the extracted contents. Falls back to unmodified data if + decompression fails (e.g. requests already decompressed the content). """ try: mime = magic.from_buffer(data, mime=True) @@ -211,14 +216,19 @@ def extract(data, fmt): m = magic.open(magic.MAGIC_MIME) m.load() mime = m.buffer(data).split(';')[0] + if mime.startswith('text/'): + return data + extracted = None if mime == 'application/zstd' or fmt.endswith('zst'): - return unzstd(data) - if mime == 'application/x-xz' or fmt.endswith('xz'): - return unxz(data) + extracted = unzstd(data) + elif mime == 'application/x-xz' or fmt.endswith('xz'): + extracted = unxz(data) elif mime == 'application/x-bzip2' or fmt.endswith('bz2'): - return bunzip2(data) + extracted = bunzip2(data) elif mime == 'application/gzip' or fmt.endswith('gz'): - return gunzip(data) + extracted = gunzip(data) + if extracted is not None: + return extracted return data @@ -297,3 +307,47 @@ def get_datetime_now(): """ Return the current timezone-aware datetime removing microseconds """ return datetime.now().astimezone().replace(microsecond=0) + + +def fetch_concurrently(func, items, max_workers=25): + """ Run func across items using threads with pooled HTTP sessions, + yielding results as they complete. Ideal for I/O-bound work + (network fetches). func(item, session) receives a shared + requests.Session with connection pooling. + """ + import concurrent.futures + + from requests.adapters import HTTPAdapter + + session = requests.Session() + adapter = HTTPAdapter(pool_connections=max_workers, pool_maxsize=max_workers) + session.mount('https://', adapter) + session.mount('http://', adapter) + items = list(items) + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = {executor.submit(func, item, session): item for item in items} + for future in concurrent.futures.as_completed(futures): + yield future.result() + + +def run_concurrently(func, items, max_workers=25): + """ Run func across items using multiprocessing, yielding results as + they complete. Uses multiprocessing.Pool on Python < 3.12 to avoid + ProcessPoolExecutor deadlock (CPython #105829). + """ + import concurrent.futures + import multiprocessing + import sys + + from django.db import connections + connections.close_all() + items = list(items) + if sys.version_info >= (3, 12): + with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor: + futures = [executor.submit(func, item) for item in items] + for future in concurrent.futures.as_completed(futures): + yield future.result() + else: + with multiprocessing.Pool(processes=max_workers) as pool: + for result in pool.imap_unordered(func, items, chunksize=1): + yield result diff --git a/util/context_processors.py b/util/context_processors.py index 97bbcf2c..0e9dc6b2 100644 --- a/util/context_processors.py +++ b/util/context_processors.py @@ -29,7 +29,7 @@ from util import get_setting_of_type -def _get_git_ref(): +def get_git_ref(): """Get current git ref if in a git repo.""" git_dir = Path(__file__).parent.parent / '.git' if not git_dir.exists(): @@ -49,7 +49,7 @@ def _get_git_ref(): return None -def _get_version(): +def get_version(): """Get version from package metadata or VERSION.txt.""" # Try importlib.metadata first (for installed packages) try: @@ -66,8 +66,8 @@ def _get_version(): # Cache version info at module load time (once per process) -_PATCHMAN_VERSION = _get_version() -_PATCHMAN_GIT_REF = _get_git_ref() +_PATCHMAN_VERSION = get_version() +_PATCHMAN_GIT_REF = get_git_ref() if _PATCHMAN_GIT_REF: _PATCHMAN_VERSION_DISPLAY = f'v{_PATCHMAN_VERSION} ({_PATCHMAN_GIT_REF})' else: diff --git a/util/filterspecs.py b/util/filterspecs.py index eac0f747..b2f160c0 100644 --- a/util/filterspecs.py +++ b/util/filterspecs.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU General Public License # along with Patchman. If not, see +from html import escape from operator import itemgetter from django.db.models.query import QuerySet @@ -70,7 +71,7 @@ def output(self, qs): style = 'list-group-item-success' qs[self.name] = k output += f'{v}\n' + output += f'"list-group-item {style}">{escape(str(v))}\n' output += '' return output diff --git a/util/logging.py b/util/logging.py index bf532e54..44644ecb 100644 --- a/util/logging.py +++ b/util/logging.py @@ -16,6 +16,7 @@ import logging +import os from django.conf import settings from tqdm import tqdm @@ -24,6 +25,8 @@ debug_message_s, error_message_s, info_message_s, warning_message_s, ) +tqdm.monitor_interval = 0 + log_format = '[%(asctime)s] %(levelname)s: %(message)s' if settings.DEBUG: logging_level = logging.DEBUG @@ -37,6 +40,18 @@ pbar = None +def clear_forked_pbar(): + """ Clear any tqdm instances inherited from a parent process via fork. + Prevents subprocess tqdm.write() from redrawing a stale progress bar + on the parent's terminal. Only clears if running in a child process. + """ + if os.getpid() != _main_pid and tqdm._instances: + tqdm._instances.clear() + + +_main_pid = os.getpid() + + def get_quiet_mode(): """ Get the global quiet_mode """