From 3925a87c20f9ab10821eb6cf3e144e29e949b34a Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Wed, 1 Apr 2026 22:57:04 +0200 Subject: [PATCH 1/2] feat: pin GitHub Actions to SHA hashes and add security linters Prevent supply chain attacks by replacing mutable tag references with immutable commit SHAs across all workflow files. Add CI linter to enforce SHA pinning and a PR security workflow to detect hidden Unicode characters (trojan-source prevention). Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/main.yaml | 37 +++--- .github/workflows/pr-security-lint.yaml | 26 ++++ .../workflows/test-on-weaviate-version.yml | 14 +-- tools/linter_actions_pinned.sh | 49 ++++++++ tools/linter_hidden_unicode.sh | 119 ++++++++++++++++++ 5 files changed, 221 insertions(+), 24 deletions(-) create mode 100644 .github/workflows/pr-security-lint.yaml create mode 100755 tools/linter_actions_pinned.sh create mode 100755 tools/linter_hidden_unicode.sh diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index f7225e2..38f0413 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -26,7 +26,7 @@ jobs: dry-run: ${{ steps.check-dry-run.outputs.enabled }} steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 2 @@ -64,22 +64,25 @@ jobs: contents: read steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + + - name: Lint pinned actions + run: bash tools/linter_actions_pinned.sh - name: Get list of changed C# files id: changed-files - uses: tj-actions/changed-files@v46 + uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46 with: files: | **.cs - name: Setup .NET - uses: actions/setup-dotnet@v5 + uses: actions/setup-dotnet@c2fa09f4bde5ebb9d1777cf28262a3eb3db3ced7 # v5 with: dotnet-version: ${{ env.DOTNET_VERSION }} - name: Cache NuGet packages - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 with: path: ~/.nuget/packages key: ${{ runner.os }}-nuget-${{ hashFiles('**/*.*proj') }} @@ -116,7 +119,7 @@ jobs: fi - name: Upload test results - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 if: failure() with: name: test-results-unit @@ -124,7 +127,7 @@ jobs: retention-days: 7 - name: Upload coverage data - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 if: always() with: name: coverage-unit @@ -166,15 +169,15 @@ jobs: pull-requests: write steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Setup .NET - uses: actions/setup-dotnet@v5 + uses: actions/setup-dotnet@c2fa09f4bde5ebb9d1777cf28262a3eb3db3ced7 # v5 with: dotnet-version: ${{ env.DOTNET_VERSION }} - name: Download all coverage artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: pattern: coverage-* path: ./all-coverage @@ -192,21 +195,21 @@ jobs: -title:"Weaviate C# Client Coverage" - name: Upload HTML coverage report - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: coverage-report-html path: ./coveragereport retention-days: 7 - name: Add coverage to PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2 if: github.event_name == 'pull_request' with: header: coverage path: ./coveragereport/SummaryGithub.md - name: Download all test results - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 continue-on-error: true with: pattern: test-results-* @@ -249,18 +252,18 @@ jobs: if [ -z "${{ secrets.NUGET_APIKEY }}" ]; then echo "Warning: NUGET_APIKEY is not set"; fi - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 0 fetch-tags: true - name: Setup .NET - uses: actions/setup-dotnet@v5 + uses: actions/setup-dotnet@c2fa09f4bde5ebb9d1777cf28262a3eb3db3ced7 # v5 with: dotnet-version: ${{ env.DOTNET_VERSION }} - name: Cache NuGet packages - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 with: path: ~/.nuget/packages key: ${{ runner.os }}-nuget-${{ hashFiles('**/.*proj') }} @@ -275,7 +278,7 @@ jobs: run: dotnet nuget push './out/*.nupkg' --skip-duplicate --api-key ${{ secrets.NUGET_APIKEY }} --source https://api.nuget.org/v3/index.json - name: GH Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844 # v1 with: generate_release_notes: true draft: true diff --git a/.github/workflows/pr-security-lint.yaml b/.github/workflows/pr-security-lint.yaml new file mode 100644 index 0000000..4f5c6c1 --- /dev/null +++ b/.github/workflows/pr-security-lint.yaml @@ -0,0 +1,26 @@ +name: PR Security Lint + +on: + pull_request_target: + types: [opened, synchronize, reopened] + +# No permissions at workflow level — grant only what's needed at job level +permissions: {} + +jobs: + hidden-unicode-check: + name: Check for hidden Unicode characters + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout base branch + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + with: + ref: ${{ github.event.pull_request.base.sha }} + + - name: Check PR diff for hidden Unicode + env: + GH_TOKEN: ${{ github.token }} + run: | + gh pr diff ${{ github.event.pull_request.number }} | bash tools/linter_hidden_unicode.sh --stdin diff --git a/.github/workflows/test-on-weaviate-version.yml b/.github/workflows/test-on-weaviate-version.yml index 658c08e..9e5e92d 100644 --- a/.github/workflows/test-on-weaviate-version.yml +++ b/.github/workflows/test-on-weaviate-version.yml @@ -36,15 +36,15 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Setup .NET - uses: actions/setup-dotnet@v5 + uses: actions/setup-dotnet@c2fa09f4bde5ebb9d1777cf28262a3eb3db3ced7 # v5 with: dotnet-version: ${{ env.DOTNET_VERSION }} - name: Cache NuGet packages - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 with: path: ~/.nuget/packages key: ${{ runner.os }}-nuget-${{ hashFiles('**/*.*proj') }} @@ -55,7 +55,7 @@ jobs: run: dotnet restore - name: Login to Docker Hub - uses: docker/login-action@v3 + uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3 if: ${{ !github.event.pull_request.head.repo.fork && github.triggering_actor != 'dependabot[bot]' }} with: username: ${{ secrets.DOCKER_USERNAME }} @@ -86,7 +86,7 @@ jobs: run: /bin/bash ci/stop_weaviate.sh ${{ inputs.weaviate-version }} - name: Upload test results - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 if: failure() with: name: test-results-integration-${{ inputs.weaviate-version }} @@ -94,7 +94,7 @@ jobs: retention-days: 7 - name: Upload coverage data - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 if: always() with: name: coverage-integration-${{ inputs.weaviate-version }} @@ -102,7 +102,7 @@ jobs: retention-days: 7 - name: Test Report - uses: dorny/test-reporter@v1 + uses: dorny/test-reporter@3eeb9fc888e82e8be2fb356bbeec2750231672bc # v1 if: failure() && !inputs.dry-run with: name: Integration Tests - Weaviate ${{ inputs.weaviate-version }} diff --git a/tools/linter_actions_pinned.sh b/tools/linter_actions_pinned.sh new file mode 100755 index 0000000..ebe39b3 --- /dev/null +++ b/tools/linter_actions_pinned.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Lint GitHub Actions workflow files to ensure all external actions are pinned to SHA hashes. +# Usage: bash tools/linter_actions_pinned.sh + +set -euo pipefail + +ERRORS=0 + +for workflow in .github/workflows/*.yaml .github/workflows/*.yml; do + [ -f "$workflow" ] || continue + + while IFS= read -r line; do + lineno=$(echo "$line" | cut -d: -f1) + content=$(echo "$line" | cut -d: -f2-) + + # Extract the action reference (everything after "uses:") + action_ref=$(echo "$content" | sed -n 's/.*uses:[[:space:]]*//p' | xargs) + + # Skip local actions (starting with ./) + if [[ "$action_ref" == ./* ]]; then + continue + fi + + # Extract the version part (after @, before space or # comment) + version=$(echo "$action_ref" | sed -n 's/.*@\([^ #]*\).*/\1/p') + + if [ -z "$version" ]; then + echo "::error file=${workflow},line=${lineno}::Action missing version pin: ${action_ref}" + ERRORS=$((ERRORS + 1)) + continue + fi + + # Check that the version is a 40-character hex SHA + if ! echo "$version" | grep -qE '^[0-9a-f]{40}$'; then + echo "::error file=${workflow},line=${lineno}::Action not pinned to SHA: ${action_ref} (version: ${version})" + ERRORS=$((ERRORS + 1)) + fi + done < <(grep -n 'uses:' "$workflow") +done + +if [ "$ERRORS" -gt 0 ]; then + echo "" + echo "ERROR: Found ${ERRORS} action(s) not pinned to a SHA hash." + echo "Replace tag references (e.g., @v5) with the full commit SHA (e.g., @93cb6ef...)" + echo "Preserve the tag as a comment: uses: actions/checkout@ # v5" + exit 1 +fi + +echo "All GitHub Actions are pinned to SHA hashes." diff --git a/tools/linter_hidden_unicode.sh b/tools/linter_hidden_unicode.sh new file mode 100755 index 0000000..07f58ee --- /dev/null +++ b/tools/linter_hidden_unicode.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# Lint for hidden/invisible Unicode characters in diffs (trojan-source attack prevention). +# Requires Perl (pre-installed on GitHub Actions Ubuntu runners). +# +# Usage: +# bash tools/linter_hidden_unicode.sh --stdin # read diff from stdin (CI mode) +# bash tools/linter_hidden_unicode.sh # diff against a base ref +# bash tools/linter_hidden_unicode.sh # diff staged changes (git diff --cached) + +set -euo pipefail + +# Binary file extensions to skip +BINARY_PATTERN='\.(png|jpg|jpeg|gif|ico|svg|woff2?|ttf|eot|otf|zip|tar|gz|bz2|xz|7z|rar|pdf|dll|exe|so|dylib|o|obj|class|jar|war|pyc|pyo|wasm|bin|dat|db|sqlite|nupkg|snupkg)$' + +get_diff() { + if [[ "${1:-}" == "--stdin" ]]; then + cat + elif [[ -n "${1:-}" ]]; then + # Validate ref argument to prevent command injection + if ! [[ "$1" =~ ^[a-zA-Z0-9._/-]+$ ]]; then + echo "ERROR: Invalid ref argument: $1" >&2 + exit 1 + fi + git diff "$1" + else + git diff --cached + fi +} + +# Perl script that: +# 1. Tracks current file from diff headers +# 2. Skips binary files +# 3. Scans only added lines (starting with +, excluding +++ headers) +# 4. Detects ~30+ categories of invisible/suspicious Unicode characters +PERL_SCRIPT=' +use utf8; +use strict; +use warnings; + +my $file = ""; +my $line_in_file = 0; +my $errors = 0; +my $in_binary = 0; +my $binary_pattern = qr/'"$BINARY_PATTERN"'/i; + +while () { + chomp; + + # Track file from diff headers + if (/^\+\+\+ b\/(.+)$/) { + $file = $1; + $line_in_file = 0; + $in_binary = ($file =~ $binary_pattern) ? 1 : 0; + next; + } + + # Skip binary file markers + if (/^Binary files/) { + $in_binary = 1; + next; + } + + # Track hunk headers for line numbers + if (/^@@ -\d+(?:,\d+)? \+(\d+)/) { + $line_in_file = $1 - 1; + next; + } + + # Count lines in the new file + if (/^\+/ || /^ /) { + $line_in_file++; + } + + # Only scan added lines, skip binary files + next if $in_binary; + next unless /^\+/; + next if /^\+\+\+/; + + # Remove the leading + for scanning + my $content = substr($_, 1); + + # Check for suspicious invisible Unicode characters: + # - Bidi overrides and isolates (U+200E-200F, U+202A-202E, U+2066-2069) + # - Zero-width characters (U+200B-200D, U+2060) + # - Byte order mark mid-line (U+FEFF) + # - Soft hyphen (U+00AD) + # - Mongolian vowel separator (U+180E) + # - Combining grapheme joiner (U+034F) + # - Function application and invisible operators (U+2061-2064) + # - Hangul fillers (U+115F, U+1160, U+3164, U+FFA0) + # - Interlinear annotation (U+FFF9-FFFB) + # - Object replacement / replacement char (U+FFFC-FFFD) -- FFFD is sometimes legitimate + # - Unicode tag block (U+E0001, U+E0020-E007F) + # - Deprecated format chars (U+206A-206F) + if ($content =~ /([\x{00AD}\x{034F}\x{115F}\x{1160}\x{180E}\x{200B}-\x{200F}\x{202A}-\x{202E}\x{2060}-\x{2064}\x{2066}-\x{2069}\x{206A}-\x{206F}\x{3164}\x{FE00}-\x{FE0F}\x{FEFF}\x{FFA0}\x{FFF9}-\x{FFFB}\x{E0001}\x{E0020}-\x{E007F}])/) { + my $char = $1; + my $codepoint = sprintf("U+%04X", ord($char)); + my $col = $-[1] + 1; + + if ($ENV{GITHUB_ACTIONS}) { + print "::error file=${file},line=${line_in_file},col=${col}::Hidden Unicode character ${codepoint} found\n"; + } else { + print "ERROR: $file:$line_in_file:$col - Hidden Unicode character $codepoint found\n"; + } + $errors++; + } +} + +if ($errors > 0) { + print "\nFound $errors hidden Unicode character(s) in added lines.\n"; + print "These may indicate a trojan-source attack. See https://trojansource.codes/\n"; + exit 1; +} else { + print "No hidden Unicode characters detected.\n"; + exit 0; +} +' + +get_diff "$@" | perl -CS -e "$PERL_SCRIPT" From 6fd6695cfa81d2854978d38eff7d55d1464b1a6d Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Wed, 1 Apr 2026 23:34:15 +0200 Subject: [PATCH 2/2] fix: harden hidden Unicode linter against bypass and injection - Narrow +++ exclusion to only match real diff headers, preventing attackers from evading scanning with +++prefixed lines - Escape filenames in GitHub Actions annotations to prevent command injection via crafted PR filenames - Add git ref validation with clear error on missing refs - Add pull-requests: read permission required by gh pr diff Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/pr-security-lint.yaml | 1 + tools/linter_hidden_unicode.sh | 28 +++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pr-security-lint.yaml b/.github/workflows/pr-security-lint.yaml index 4f5c6c1..3dc27e5 100644 --- a/.github/workflows/pr-security-lint.yaml +++ b/.github/workflows/pr-security-lint.yaml @@ -13,6 +13,7 @@ jobs: runs-on: ubuntu-latest permissions: contents: read + pull-requests: read steps: - name: Checkout base branch uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 diff --git a/tools/linter_hidden_unicode.sh b/tools/linter_hidden_unicode.sh index 07f58ee..c63a6ed 100755 --- a/tools/linter_hidden_unicode.sh +++ b/tools/linter_hidden_unicode.sh @@ -21,6 +21,10 @@ get_diff() { echo "ERROR: Invalid ref argument: $1" >&2 exit 1 fi + if ! git rev-parse --verify "$1" >/dev/null 2>&1; then + echo "ERROR: Git ref not found: $1" >&2 + exit 2 + fi git diff "$1" else git diff --cached @@ -37,6 +41,24 @@ use utf8; use strict; use warnings; +sub escape_property { + my ($s) = @_; + $s =~ s/%/%25/g; + $s =~ s/\r/%0D/g; + $s =~ s/\n/%0A/g; + $s =~ s/:/%3A/g; + $s =~ s/,/%2C/g; + return $s; +} + +sub escape_message { + my ($s) = @_; + $s =~ s/%/%25/g; + $s =~ s/\r/%0D/g; + $s =~ s/\n/%0A/g; + return $s; +} + my $file = ""; my $line_in_file = 0; my $errors = 0; @@ -74,7 +96,7 @@ while () { # Only scan added lines, skip binary files next if $in_binary; next unless /^\+/; - next if /^\+\+\+/; + next if /^\+\+\+ (?:$|b\/|\/dev\/null)/; # Remove the leading + for scanning my $content = substr($_, 1); @@ -98,7 +120,9 @@ while () { my $col = $-[1] + 1; if ($ENV{GITHUB_ACTIONS}) { - print "::error file=${file},line=${line_in_file},col=${col}::Hidden Unicode character ${codepoint} found\n"; + my $efile = escape_property($file); + my $emsg = escape_message("Hidden Unicode character ${codepoint} found"); + print "::error file=${efile},line=${line_in_file},col=${col}::${emsg}\n"; } else { print "ERROR: $file:$line_in_file:$col - Hidden Unicode character $codepoint found\n"; }