diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index f7225e2..38f0413 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -26,7 +26,7 @@ jobs: dry-run: ${{ steps.check-dry-run.outputs.enabled }} steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 2 @@ -64,22 +64,25 @@ jobs: contents: read steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + + - name: Lint pinned actions + run: bash tools/linter_actions_pinned.sh - name: Get list of changed C# files id: changed-files - uses: tj-actions/changed-files@v46 + uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46 with: files: | **.cs - name: Setup .NET - uses: actions/setup-dotnet@v5 + uses: actions/setup-dotnet@c2fa09f4bde5ebb9d1777cf28262a3eb3db3ced7 # v5 with: dotnet-version: ${{ env.DOTNET_VERSION }} - name: Cache NuGet packages - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 with: path: ~/.nuget/packages key: ${{ runner.os }}-nuget-${{ hashFiles('**/*.*proj') }} @@ -116,7 +119,7 @@ jobs: fi - name: Upload test results - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 if: failure() with: name: test-results-unit @@ -124,7 +127,7 @@ jobs: retention-days: 7 - name: Upload coverage data - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 if: always() with: name: coverage-unit @@ -166,15 +169,15 @@ jobs: pull-requests: write steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Setup .NET - uses: actions/setup-dotnet@v5 + uses: actions/setup-dotnet@c2fa09f4bde5ebb9d1777cf28262a3eb3db3ced7 # v5 with: dotnet-version: ${{ env.DOTNET_VERSION }} - name: Download all coverage artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: pattern: coverage-* path: ./all-coverage @@ -192,21 +195,21 @@ jobs: -title:"Weaviate C# Client Coverage" - name: Upload HTML coverage report - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: coverage-report-html path: ./coveragereport retention-days: 7 - name: Add coverage to PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2 if: github.event_name == 'pull_request' with: header: coverage path: ./coveragereport/SummaryGithub.md - name: Download all test results - uses: actions/download-artifact@v4 + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 continue-on-error: true with: pattern: test-results-* @@ -249,18 +252,18 @@ jobs: if [ -z "${{ secrets.NUGET_APIKEY }}" ]; then echo "Warning: NUGET_APIKEY is not set"; fi - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 with: fetch-depth: 0 fetch-tags: true - name: Setup .NET - uses: actions/setup-dotnet@v5 + uses: actions/setup-dotnet@c2fa09f4bde5ebb9d1777cf28262a3eb3db3ced7 # v5 with: dotnet-version: ${{ env.DOTNET_VERSION }} - name: Cache NuGet packages - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 with: path: ~/.nuget/packages key: ${{ runner.os }}-nuget-${{ hashFiles('**/.*proj') }} @@ -275,7 +278,7 @@ jobs: run: dotnet nuget push './out/*.nupkg' --skip-duplicate --api-key ${{ secrets.NUGET_APIKEY }} --source https://api.nuget.org/v3/index.json - name: GH Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844 # v1 with: generate_release_notes: true draft: true diff --git a/.github/workflows/pr-security-lint.yaml b/.github/workflows/pr-security-lint.yaml new file mode 100644 index 0000000..3dc27e5 --- /dev/null +++ b/.github/workflows/pr-security-lint.yaml @@ -0,0 +1,27 @@ +name: PR Security Lint + +on: + pull_request_target: + types: [opened, synchronize, reopened] + +# No permissions at workflow level — grant only what's needed at job level +permissions: {} + +jobs: + hidden-unicode-check: + name: Check for hidden Unicode characters + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + steps: + - name: Checkout base branch + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + with: + ref: ${{ github.event.pull_request.base.sha }} + + - name: Check PR diff for hidden Unicode + env: + GH_TOKEN: ${{ github.token }} + run: | + gh pr diff ${{ github.event.pull_request.number }} | bash tools/linter_hidden_unicode.sh --stdin diff --git a/.github/workflows/test-on-weaviate-version.yml b/.github/workflows/test-on-weaviate-version.yml index 658c08e..9e5e92d 100644 --- a/.github/workflows/test-on-weaviate-version.yml +++ b/.github/workflows/test-on-weaviate-version.yml @@ -36,15 +36,15 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Setup .NET - uses: actions/setup-dotnet@v5 + uses: actions/setup-dotnet@c2fa09f4bde5ebb9d1777cf28262a3eb3db3ced7 # v5 with: dotnet-version: ${{ env.DOTNET_VERSION }} - name: Cache NuGet packages - uses: actions/cache@v4 + uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 with: path: ~/.nuget/packages key: ${{ runner.os }}-nuget-${{ hashFiles('**/*.*proj') }} @@ -55,7 +55,7 @@ jobs: run: dotnet restore - name: Login to Docker Hub - uses: docker/login-action@v3 + uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3 if: ${{ !github.event.pull_request.head.repo.fork && github.triggering_actor != 'dependabot[bot]' }} with: username: ${{ secrets.DOCKER_USERNAME }} @@ -86,7 +86,7 @@ jobs: run: /bin/bash ci/stop_weaviate.sh ${{ inputs.weaviate-version }} - name: Upload test results - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 if: failure() with: name: test-results-integration-${{ inputs.weaviate-version }} @@ -94,7 +94,7 @@ jobs: retention-days: 7 - name: Upload coverage data - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 if: always() with: name: coverage-integration-${{ inputs.weaviate-version }} @@ -102,7 +102,7 @@ jobs: retention-days: 7 - name: Test Report - uses: dorny/test-reporter@v1 + uses: dorny/test-reporter@3eeb9fc888e82e8be2fb356bbeec2750231672bc # v1 if: failure() && !inputs.dry-run with: name: Integration Tests - Weaviate ${{ inputs.weaviate-version }} diff --git a/tools/linter_actions_pinned.sh b/tools/linter_actions_pinned.sh new file mode 100755 index 0000000..ebe39b3 --- /dev/null +++ b/tools/linter_actions_pinned.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Lint GitHub Actions workflow files to ensure all external actions are pinned to SHA hashes. +# Usage: bash tools/linter_actions_pinned.sh + +set -euo pipefail + +ERRORS=0 + +for workflow in .github/workflows/*.yaml .github/workflows/*.yml; do + [ -f "$workflow" ] || continue + + while IFS= read -r line; do + lineno=$(echo "$line" | cut -d: -f1) + content=$(echo "$line" | cut -d: -f2-) + + # Extract the action reference (everything after "uses:") + action_ref=$(echo "$content" | sed -n 's/.*uses:[[:space:]]*//p' | xargs) + + # Skip local actions (starting with ./) + if [[ "$action_ref" == ./* ]]; then + continue + fi + + # Extract the version part (after @, before space or # comment) + version=$(echo "$action_ref" | sed -n 's/.*@\([^ #]*\).*/\1/p') + + if [ -z "$version" ]; then + echo "::error file=${workflow},line=${lineno}::Action missing version pin: ${action_ref}" + ERRORS=$((ERRORS + 1)) + continue + fi + + # Check that the version is a 40-character hex SHA + if ! echo "$version" | grep -qE '^[0-9a-f]{40}$'; then + echo "::error file=${workflow},line=${lineno}::Action not pinned to SHA: ${action_ref} (version: ${version})" + ERRORS=$((ERRORS + 1)) + fi + done < <(grep -n 'uses:' "$workflow") +done + +if [ "$ERRORS" -gt 0 ]; then + echo "" + echo "ERROR: Found ${ERRORS} action(s) not pinned to a SHA hash." + echo "Replace tag references (e.g., @v5) with the full commit SHA (e.g., @93cb6ef...)" + echo "Preserve the tag as a comment: uses: actions/checkout@ # v5" + exit 1 +fi + +echo "All GitHub Actions are pinned to SHA hashes." diff --git a/tools/linter_hidden_unicode.sh b/tools/linter_hidden_unicode.sh new file mode 100755 index 0000000..c63a6ed --- /dev/null +++ b/tools/linter_hidden_unicode.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +# Lint for hidden/invisible Unicode characters in diffs (trojan-source attack prevention). +# Requires Perl (pre-installed on GitHub Actions Ubuntu runners). +# +# Usage: +# bash tools/linter_hidden_unicode.sh --stdin # read diff from stdin (CI mode) +# bash tools/linter_hidden_unicode.sh # diff against a base ref +# bash tools/linter_hidden_unicode.sh # diff staged changes (git diff --cached) + +set -euo pipefail + +# Binary file extensions to skip +BINARY_PATTERN='\.(png|jpg|jpeg|gif|ico|svg|woff2?|ttf|eot|otf|zip|tar|gz|bz2|xz|7z|rar|pdf|dll|exe|so|dylib|o|obj|class|jar|war|pyc|pyo|wasm|bin|dat|db|sqlite|nupkg|snupkg)$' + +get_diff() { + if [[ "${1:-}" == "--stdin" ]]; then + cat + elif [[ -n "${1:-}" ]]; then + # Validate ref argument to prevent command injection + if ! [[ "$1" =~ ^[a-zA-Z0-9._/-]+$ ]]; then + echo "ERROR: Invalid ref argument: $1" >&2 + exit 1 + fi + if ! git rev-parse --verify "$1" >/dev/null 2>&1; then + echo "ERROR: Git ref not found: $1" >&2 + exit 2 + fi + git diff "$1" + else + git diff --cached + fi +} + +# Perl script that: +# 1. Tracks current file from diff headers +# 2. Skips binary files +# 3. Scans only added lines (starting with +, excluding +++ headers) +# 4. Detects ~30+ categories of invisible/suspicious Unicode characters +PERL_SCRIPT=' +use utf8; +use strict; +use warnings; + +sub escape_property { + my ($s) = @_; + $s =~ s/%/%25/g; + $s =~ s/\r/%0D/g; + $s =~ s/\n/%0A/g; + $s =~ s/:/%3A/g; + $s =~ s/,/%2C/g; + return $s; +} + +sub escape_message { + my ($s) = @_; + $s =~ s/%/%25/g; + $s =~ s/\r/%0D/g; + $s =~ s/\n/%0A/g; + return $s; +} + +my $file = ""; +my $line_in_file = 0; +my $errors = 0; +my $in_binary = 0; +my $binary_pattern = qr/'"$BINARY_PATTERN"'/i; + +while () { + chomp; + + # Track file from diff headers + if (/^\+\+\+ b\/(.+)$/) { + $file = $1; + $line_in_file = 0; + $in_binary = ($file =~ $binary_pattern) ? 1 : 0; + next; + } + + # Skip binary file markers + if (/^Binary files/) { + $in_binary = 1; + next; + } + + # Track hunk headers for line numbers + if (/^@@ -\d+(?:,\d+)? \+(\d+)/) { + $line_in_file = $1 - 1; + next; + } + + # Count lines in the new file + if (/^\+/ || /^ /) { + $line_in_file++; + } + + # Only scan added lines, skip binary files + next if $in_binary; + next unless /^\+/; + next if /^\+\+\+ (?:$|b\/|\/dev\/null)/; + + # Remove the leading + for scanning + my $content = substr($_, 1); + + # Check for suspicious invisible Unicode characters: + # - Bidi overrides and isolates (U+200E-200F, U+202A-202E, U+2066-2069) + # - Zero-width characters (U+200B-200D, U+2060) + # - Byte order mark mid-line (U+FEFF) + # - Soft hyphen (U+00AD) + # - Mongolian vowel separator (U+180E) + # - Combining grapheme joiner (U+034F) + # - Function application and invisible operators (U+2061-2064) + # - Hangul fillers (U+115F, U+1160, U+3164, U+FFA0) + # - Interlinear annotation (U+FFF9-FFFB) + # - Object replacement / replacement char (U+FFFC-FFFD) -- FFFD is sometimes legitimate + # - Unicode tag block (U+E0001, U+E0020-E007F) + # - Deprecated format chars (U+206A-206F) + if ($content =~ /([\x{00AD}\x{034F}\x{115F}\x{1160}\x{180E}\x{200B}-\x{200F}\x{202A}-\x{202E}\x{2060}-\x{2064}\x{2066}-\x{2069}\x{206A}-\x{206F}\x{3164}\x{FE00}-\x{FE0F}\x{FEFF}\x{FFA0}\x{FFF9}-\x{FFFB}\x{E0001}\x{E0020}-\x{E007F}])/) { + my $char = $1; + my $codepoint = sprintf("U+%04X", ord($char)); + my $col = $-[1] + 1; + + if ($ENV{GITHUB_ACTIONS}) { + my $efile = escape_property($file); + my $emsg = escape_message("Hidden Unicode character ${codepoint} found"); + print "::error file=${efile},line=${line_in_file},col=${col}::${emsg}\n"; + } else { + print "ERROR: $file:$line_in_file:$col - Hidden Unicode character $codepoint found\n"; + } + $errors++; + } +} + +if ($errors > 0) { + print "\nFound $errors hidden Unicode character(s) in added lines.\n"; + print "These may indicate a trojan-source attack. See https://trojansource.codes/\n"; + exit 1; +} else { + print "No hidden Unicode characters detected.\n"; + exit 0; +} +' + +get_diff "$@" | perl -CS -e "$PERL_SCRIPT"