From 293737ec2dd9f08698e74f4bc12e170169dca8cf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 13:55:53 +0000 Subject: [PATCH 1/3] Initial plan From 3b1e4031b928067995c953aeacdb25680735a4b3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 14:01:58 +0000 Subject: [PATCH 2/3] Add custom instructions validation script with tests and documentation --- .gitignore | 1 + AGENTS.md | 63 ++- README.md | 79 +++- test_validate_custom_instructions.py | 370 +++++++++++++++++ validate_custom_instructions.py | 580 +++++++++++++++++++++++++++ 5 files changed, 1067 insertions(+), 26 deletions(-) create mode 100644 test_validate_custom_instructions.py create mode 100644 validate_custom_instructions.py diff --git a/.gitignore b/.gitignore index d4e2963..44c9726 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,6 @@ __pycache__/ *.pyc *.pyo *.csv +*.xlsx *.log .DS_Store diff --git a/AGENTS.md b/AGENTS.md index e70fcf6..209e31b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -20,21 +20,24 @@ The tool performs three main types of assessments: ``` GitHubAssessment/ -├── security_assessment.py # Main: Repository security controls assessment -├── idp_assessment.py # Main: Identity & access management assessment -├── assess_copilot_repos.py # Main: GitHub Copilot best practices validation -├── list_repos_gh_cli.py # Utility: Basic repository listing -├── list_repos_gh_cli_optimized.py # Utility: Optimized repository listing with Copilot checks -├── list_and_check_repos.py # Utility: Combined listing and Copilot directory checking -├── .gitignore # Excludes .venv, __pycache__, *.csv, *.log -├── README.md # Human-readable documentation -└── AGENTS.md # This file - LLM agent documentation +├── security_assessment.py # Main: Repository security controls assessment +├── idp_assessment.py # Main: Identity & access management assessment +├── assess_copilot_repos.py # Main: GitHub Copilot best practices validation +├── validate_custom_instructions.py # Main: Custom Instructions existence & size validation +├── test_validate_custom_instructions.py # Tests: Automated tests for validate_custom_instructions.py +├── list_repos_gh_cli.py # Utility: Basic repository listing +├── list_repos_gh_cli_optimized.py # Utility: Optimized repository listing with Copilot checks +├── list_and_check_repos.py # Utility: Combined listing and Copilot directory checking +├── .gitignore # Excludes .venv, __pycache__, *.csv, *.xlsx, *.log +├── README.md # Human-readable documentation +└── AGENTS.md # This file - LLM agent documentation ``` ### Generated Files (Excluded from Git) - `github_security_assessment_YYYYMMDD_HHMMSS.csv` - Security assessment reports - `github_idp_assessment_YYYYMMDD_HHMMSS.csv` - IDP assessment reports - `github_copilot_assessment_YYYYMMDD_HHMMSS.csv` - Copilot assessment reports +- `custom_instructions_violations_YYYYMMDD_HHMMSS.xlsx` - Custom Instructions violations report - `.venv/` - Python virtual environment directory ## Codebase Architecture @@ -164,7 +167,47 @@ Instructions Dir, Instructions Count, Agents Dir, Agents Count, Collections Dir, Collections Count, Scripts Dir, Scripts Count, Overall Copilot Status, Recommendations, Errors ``` -#### 4. Utility Scripts +#### 4. validate_custom_instructions.py + +**Purpose**: Validates that GitHub Copilot Repository Custom Instructions files exist and comply +with the 4,000-character limit defined in the GitHub Copilot documentation. + +**Reference**: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/response-customization?tool=webui#about-repository-custom-instructions + +**Key Functions**: +- `check_gh_installed()` - Validates GitHub CLI +- `check_rate_limit()` - Monitors API rate limits +- `run_gh_command(command)` - Executes GitHub CLI commands returning JSON +- `fetch_repositories()` - Fetches all repositories with parallel execution +- `decode_file_content(api_response)` - Decodes base64-encoded file content from GitHub API +- `get_custom_instruction_files(repo_name)` - Retrieves all custom instruction files for a repo +- `assess_repo(repo)` - Main assessment for a single repository +- `check_all_repositories(repos)` - Parallel assessment of all repositories +- `validate_existence(results)` - Checks that at least one custom instruction file exists +- `validate_file_sizes(results)` - Checks all files are within the 4,000-character limit +- `export_violations_to_excel(violations)` - Exports violations to an Excel file +- `main()` - Entry point; halts with exit code 1 when a validation fails + +**Files Checked**: +- `.github/copilot-instructions.md` - Primary repository-level custom instruction file +- `.github/instructions/*.instructions.md` - Additional instruction files + +**Validation Rules**: +1. **Existence**: At least one custom instruction file must be present +2. **Size**: Each file must not exceed 4,000 characters + +**Output**: +- Console: Pass/Fail per validation rule with GitHub Copilot rule references +- Excel: `custom_instructions_violations_YYYYMMDD_HHMMSS.xlsx` (only when violations found) + +**Exit Codes**: +- `0` - All validations passed +- `1` - One or more validations failed + +**Dependencies**: +- `openpyxl` (optional; falls back to CSV if not installed): `pip install openpyxl` + +#### 5. Utility Scripts **list_repos_gh_cli.py** (143 lines): - Basic repository listing using GitHub CLI diff --git a/README.md b/README.md index a6b33ba..832d7c8 100644 --- a/README.md +++ b/README.md @@ -158,7 +158,50 @@ python assess_copilot_repos.py - Overall Copilot readiness status - Recommendations for missing components -### 2. Security Assessment +### 2. Custom Instructions Validation + +Validates that GitHub Copilot Repository Custom Instructions files exist and comply with the +4,000-character limit imposed by GitHub Copilot. + +> 📖 Reference: [GitHub Copilot Repository Custom Instructions](https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/response-customization?tool=webui#about-repository-custom-instructions) + +**Install the required library:** +```bash +pip install openpyxl +``` + +**Run the validation:** +```bash +python validate_custom_instructions.py +``` + +**What it checks:** +- **Existence** – at least one of the following files must be present in a repository: + - `.github/copilot-instructions.md` + - `.github/instructions/*.instructions.md` +- **Size limit** – each custom instruction file must not exceed **4,000 characters** + +**Validation flow:** +1. Fetches all accessible repositories +2. Checks each repository for custom instruction files +3. **Fails immediately** if no custom instruction files are found across all repositories +4. **Fails immediately** if any file exceeds the 4,000-character limit and generates an Excel report + +**Output:** +- Console summary with ✅/❌ validation results per rule +- Excel file `custom_instructions_violations_YYYYMMDD_HHMMSS.xlsx` (only when violations are found) + - Lists every violating file with its repository, path, character count, and excess characters + +**Exit codes:** +- `0` – All validations passed +- `1` – One or more validations failed + +**Running tests:** +```bash +python test_validate_custom_instructions.py -v +``` + +### 3. Security Assessment Evaluates repository-level security controls across all accessible repositories. @@ -186,7 +229,7 @@ python security_assessment.py - Overall security compliance status - Error details (if any) -### 3. Identity & Access Management (IDP) Assessment +### 4. Identity & Access Management (IDP) Assessment Evaluates organization-level identity, authentication, and access controls. @@ -296,20 +339,23 @@ This tool supports assessment alignment with: ``` GitHubAssessment/ -├── security_assessment.py # Repository security controls assessment -├── idp_assessment.py # Identity & access management assessment -├── assess_copilot_repos.py # GitHub Copilot best practices validation -├── list_repos_gh_cli.py # Repository listing utility -├── list_repos_gh_cli_optimized.py # Optimized repository listing -├── list_and_check_repos.py # Combined listing and checking -├── MCP/ # Model Context Protocol configurations -├── .venv/ # Python virtual environment -├── .gitignore # Excludes .venv, CSV files, logs -├── github_security_assessment_*.csv # Generated security reports -├── github_idp_assessment_*.csv # Generated IDP reports -├── github_copilot_assessment_*.csv # Generated Copilot reports -├── README.md # This file - Human-readable documentation -└── AGENTS.md # LLM/AI agent documentation for code assistance +├── security_assessment.py # Repository security controls assessment +├── idp_assessment.py # Identity & access management assessment +├── assess_copilot_repos.py # GitHub Copilot best practices validation +├── validate_custom_instructions.py # Custom Instructions existence & size validation +├── test_validate_custom_instructions.py # Automated tests for the above +├── list_repos_gh_cli.py # Repository listing utility +├── list_repos_gh_cli_optimized.py # Optimized repository listing +├── list_and_check_repos.py # Combined listing and checking +├── MCP/ # Model Context Protocol configurations +├── .venv/ # Python virtual environment +├── .gitignore # Excludes .venv, CSV files, Excel files, logs +├── github_security_assessment_*.csv # Generated security reports +├── github_idp_assessment_*.csv # Generated IDP reports +├── github_copilot_assessment_*.csv # Generated Copilot reports +├── custom_instructions_violations_*.xlsx # Generated violations reports +├── README.md # This file - Human-readable documentation +└── AGENTS.md # LLM/AI agent documentation for code assistance ``` ## Output and Reports @@ -507,6 +553,7 @@ gh config list - [x] Security assessment with parallel execution - [x] IDP assessment with Enterprise SSO support - [x] Copilot best practices validation +- [x] Custom Instructions existence & size validation with Excel report - [x] CSV export with timestamped files - [ ] Advanced analytics dashboard - [ ] Trend analysis across multiple assessments diff --git a/test_validate_custom_instructions.py b/test_validate_custom_instructions.py new file mode 100644 index 0000000..4220eb7 --- /dev/null +++ b/test_validate_custom_instructions.py @@ -0,0 +1,370 @@ +#!/usr/bin/env python3 +""" +Automated tests for validate_custom_instructions.py + +Tests cover: +- No custom instruction file found (existence validation failure) +- Valid file within the 4000-character limit +- Invalid file above the 4000-character limit (size validation failure) + +Usage: + python test_validate_custom_instructions.py + python -m pytest test_validate_custom_instructions.py -v +""" + +import base64 +import json +import sys +import unittest +from unittest.mock import MagicMock, patch, call + +# Ensure the module under test is importable from this directory +sys.path.insert(0, '.') + +import validate_custom_instructions as vci + + +# --------------------------------------------------------------------------- +# Helper functions +# --------------------------------------------------------------------------- + +def _make_file_api_response(content: str) -> dict: + """Simulate a GitHub API file response with base64-encoded content.""" + encoded = base64.b64encode(content.encode('utf-8')).decode('utf-8') + return { + 'type': 'file', + 'content': encoded, + 'encoding': 'base64', + } + + +# --------------------------------------------------------------------------- +# Unit tests: decode_file_content +# --------------------------------------------------------------------------- + +class TestDecodeFileContent(unittest.TestCase): + """Tests for the decode_file_content helper.""" + + def test_decodes_valid_base64_content(self): + text = "Hello, Copilot!" + response = _make_file_api_response(text) + result = vci.decode_file_content(response) + self.assertEqual(result, text) + + def test_returns_none_for_non_dict_response(self): + self.assertIsNone(vci.decode_file_content(None)) + self.assertIsNone(vci.decode_file_content([])) + + def test_returns_none_for_empty_content_field(self): + self.assertIsNone(vci.decode_file_content({'content': ''})) + + def test_returns_none_for_missing_content_field(self): + self.assertIsNone(vci.decode_file_content({'type': 'file'})) + + +# --------------------------------------------------------------------------- +# Unit tests: validate_existence +# --------------------------------------------------------------------------- + +class TestValidateExistence(unittest.TestCase): + """Tests for the validate_existence validation function.""" + + def test_fails_when_no_files_found(self): + """Scenario: No custom instruction file found in any repository.""" + results = [ + {'repo': 'org/repo1', 'files_found': [], 'violations': [], 'has_custom_instructions': False, 'error': None}, + {'repo': 'org/repo2', 'files_found': [], 'violations': [], 'has_custom_instructions': False, 'error': None}, + ] + self.assertFalse(vci.validate_existence(results)) + + def test_passes_when_at_least_one_file_found(self): + """Scenario: At least one custom instruction file exists.""" + results = [ + { + 'repo': 'org/repo1', + 'files_found': [ + {'path': '.github/copilot-instructions.md', 'char_count': 100, 'exceeds_limit': False} + ], + 'violations': [], + 'has_custom_instructions': True, + 'error': None, + }, + ] + self.assertTrue(vci.validate_existence(results)) + + def test_fails_with_empty_results_list(self): + """Edge case: Empty results (no repos scanned).""" + self.assertFalse(vci.validate_existence([])) + + +# --------------------------------------------------------------------------- +# Unit tests: validate_file_sizes +# --------------------------------------------------------------------------- + +class TestValidateFileSizes(unittest.TestCase): + """Tests for the validate_file_sizes validation function.""" + + def test_returns_empty_list_when_all_files_within_limit(self): + """Scenario: Valid file within the 4000-character limit.""" + results = [ + { + 'repo': 'org/repo1', + 'files_found': [ + {'path': '.github/copilot-instructions.md', 'char_count': 500, 'exceeds_limit': False} + ], + 'violations': [], + 'has_custom_instructions': True, + 'error': None, + }, + ] + violations = vci.validate_file_sizes(results) + self.assertEqual(violations, []) + + def test_returns_violations_when_file_exceeds_limit(self): + """Scenario: Invalid file above 4000 characters.""" + violation = { + 'repository': 'org/repo1', + 'file_path': '.github/copilot-instructions.md', + 'char_count': 4500, + 'limit': 4000, + 'excess_chars': 500, + } + results = [ + { + 'repo': 'org/repo1', + 'files_found': [ + {'path': '.github/copilot-instructions.md', 'char_count': 4500, 'exceeds_limit': True} + ], + 'violations': [violation], + 'has_custom_instructions': True, + 'error': None, + }, + ] + violations = vci.validate_file_sizes(results) + self.assertEqual(len(violations), 1) + self.assertEqual(violations[0]['char_count'], 4500) + self.assertEqual(violations[0]['excess_chars'], 500) + + def test_returns_violations_for_multiple_repos(self): + """Multiple repositories with multiple violations.""" + v1 = { + 'repository': 'org/repo1', + 'file_path': '.github/copilot-instructions.md', + 'char_count': 5000, + 'limit': 4000, + 'excess_chars': 1000, + } + v2 = { + 'repository': 'org/repo2', + 'file_path': '.github/instructions/coding.instructions.md', + 'char_count': 4001, + 'limit': 4000, + 'excess_chars': 1, + } + results = [ + {'repo': 'org/repo1', 'files_found': [], 'violations': [v1], 'has_custom_instructions': True, 'error': None}, + {'repo': 'org/repo2', 'files_found': [], 'violations': [v2], 'has_custom_instructions': True, 'error': None}, + ] + violations = vci.validate_file_sizes(results) + self.assertEqual(len(violations), 2) + + def test_file_at_exact_limit_is_valid(self): + """A file with exactly 4000 characters should not be flagged.""" + results = [ + { + 'repo': 'org/repo1', + 'files_found': [ + {'path': '.github/copilot-instructions.md', 'char_count': 4000, 'exceeds_limit': False} + ], + 'violations': [], + 'has_custom_instructions': True, + 'error': None, + }, + ] + violations = vci.validate_file_sizes(results) + self.assertEqual(violations, []) + + +# --------------------------------------------------------------------------- +# Unit tests: assess_repo (integration of get_custom_instruction_files) +# --------------------------------------------------------------------------- + +class TestAssessRepo(unittest.TestCase): + """Tests for assess_repo function using mocked GitHub CLI calls.""" + + def _mock_repo(self, name='org/repo'): + return {'nameWithOwner': name} + + @patch('validate_custom_instructions.run_gh_command') + def test_no_custom_instruction_files_found(self, mock_run): + """Scenario: Repository has no custom instruction files.""" + # All API calls return None (file not found) + mock_run.return_value = None + + result = vci.assess_repo(self._mock_repo()) + + self.assertFalse(result['has_custom_instructions']) + self.assertEqual(result['files_found'], []) + self.assertEqual(result['violations'], []) + self.assertIsNone(result['error']) + + @patch('validate_custom_instructions.run_gh_command') + def test_valid_file_within_limit(self, mock_run): + """Scenario: Repository has a valid custom instruction file within the 4000-char limit.""" + valid_content = "Follow PEP 8 coding standards.\n" * 10 # ~310 chars + file_response = _make_file_api_response(valid_content) + + def side_effect(command): + if 'copilot-instructions.md' in command and '/contents/' in command and 'instructions/' not in command: + return file_response + return None # instructions/ dir not found + + mock_run.side_effect = side_effect + + result = vci.assess_repo(self._mock_repo()) + + self.assertTrue(result['has_custom_instructions']) + self.assertEqual(len(result['files_found']), 1) + self.assertEqual(result['violations'], []) + self.assertFalse(result['files_found'][0]['exceeds_limit']) + + @patch('validate_custom_instructions.run_gh_command') + def test_invalid_file_above_limit(self, mock_run): + """Scenario: Repository has a custom instruction file exceeding 4000 characters.""" + over_limit_content = "A" * 4500 # 4500 chars, over the 4000 limit + file_response = _make_file_api_response(over_limit_content) + + def side_effect(command): + if 'copilot-instructions.md' in command and '/contents/' in command and 'instructions/' not in command: + return file_response + return None + + mock_run.side_effect = side_effect + + result = vci.assess_repo(self._mock_repo()) + + self.assertTrue(result['has_custom_instructions']) + self.assertEqual(len(result['violations']), 1) + violation = result['violations'][0] + self.assertEqual(violation['char_count'], 4500) + self.assertEqual(violation['limit'], 4000) + self.assertEqual(violation['excess_chars'], 500) + + @patch('validate_custom_instructions.run_gh_command') + def test_instructions_dir_files_are_checked(self, mock_run): + """ + Scenario: .github/instructions/ contains an over-limit .instructions.md file. + """ + over_limit_content = "B" * 4001 + file_in_dir_response = _make_file_api_response(over_limit_content) + + dir_listing = [ + { + 'type': 'file', + 'name': 'coding.instructions.md', + 'path': '.github/instructions/coding.instructions.md', + } + ] + + def side_effect(command): + if 'copilot-instructions.md' in command and 'instructions/' not in command: + return None # root copilot-instructions.md not present + if 'contents/.github/instructions"' in command or command.endswith('contents/.github/instructions'): + return dir_listing + if 'coding.instructions.md' in command: + return file_in_dir_response + return None + + mock_run.side_effect = side_effect + + result = vci.assess_repo(self._mock_repo()) + + self.assertTrue(result['has_custom_instructions']) + self.assertEqual(len(result['violations']), 1) + self.assertEqual(result['violations'][0]['char_count'], 4001) + + @patch('validate_custom_instructions.run_gh_command') + def test_file_at_exact_limit_is_not_a_violation(self, mock_run): + """A file with exactly 4000 characters must not be a violation.""" + exact_content = "C" * 4000 + file_response = _make_file_api_response(exact_content) + + def side_effect(command): + if 'copilot-instructions.md' in command and 'instructions/' not in command: + return file_response + return None + + mock_run.side_effect = side_effect + + result = vci.assess_repo(self._mock_repo()) + + self.assertTrue(result['has_custom_instructions']) + self.assertEqual(result['violations'], []) + self.assertFalse(result['files_found'][0]['exceeds_limit']) + + +# --------------------------------------------------------------------------- +# Unit tests: export_violations_to_excel +# --------------------------------------------------------------------------- + +class TestExportViolationsToExcel(unittest.TestCase): + """Tests for the Excel export function.""" + + def setUp(self): + import tempfile + self._tmp = tempfile.mkdtemp() + vci.CONFIG['output_dir'] = self._tmp + + def tearDown(self): + import shutil + shutil.rmtree(self._tmp, ignore_errors=True) + vci.CONFIG['output_dir'] = '.' + + def test_returns_none_when_no_violations(self): + result = vci.export_violations_to_excel([]) + self.assertIsNone(result) + + def test_creates_excel_file_for_violations(self): + violations = [ + { + 'repository': 'org/repo1', + 'file_path': '.github/copilot-instructions.md', + 'char_count': 5000, + 'limit': 4000, + 'excess_chars': 1000, + } + ] + output_path = vci.export_violations_to_excel(violations) + self.assertIsNotNone(output_path) + import os + self.assertTrue(os.path.exists(output_path)) + + def test_excel_file_contains_correct_data(self): + violations = [ + { + 'repository': 'org/my-repo', + 'file_path': '.github/copilot-instructions.md', + 'char_count': 4500, + 'limit': 4000, + 'excess_chars': 500, + } + ] + output_path = vci.export_violations_to_excel(violations) + self.assertIsNotNone(output_path) + + if vci.OPENPYXL_AVAILABLE: + import openpyxl as ox + wb = ox.load_workbook(output_path) + ws = wb.active + # Row 1 is the header, row 2 is the first data row + self.assertEqual(ws.cell(row=2, column=1).value, 'org/my-repo') + self.assertEqual(ws.cell(row=2, column=3).value, 4500) + self.assertEqual(ws.cell(row=2, column=5).value, 500) + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/validate_custom_instructions.py b/validate_custom_instructions.py new file mode 100644 index 0000000..0ae8388 --- /dev/null +++ b/validate_custom_instructions.py @@ -0,0 +1,580 @@ +#!/usr/bin/env python3 +""" +GitHub Copilot Custom Instructions Validation Tool +Validates the existence and size of Repository Custom Instructions files across GitHub repositories. + +According to GitHub Copilot documentation, Repository Custom Instructions must not exceed 4000 +characters. See: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/ +response-customization?tool=webui#about-repository-custom-instructions + +Requirements: + - GitHub CLI (gh) installed and authenticated + - Python 3.7+ + - openpyxl (for Excel report generation: pip install openpyxl) + +Usage: + python validate_custom_instructions.py + +Configuration: + Edit the CONFIG section below to customize behavior +""" + +import subprocess +import json +import sys +import os +import base64 +import threading +from concurrent.futures import ThreadPoolExecutor, as_completed +from time import time, sleep +from datetime import datetime +from pathlib import Path + +try: + import openpyxl + from openpyxl.styles import Font, PatternFill, Alignment + OPENPYXL_AVAILABLE = True +except ImportError: + OPENPYXL_AVAILABLE = False + +# ============================================================================ +# CONFIGURATION +# ============================================================================ + +CONFIG = { + # GitHub CLI command (change if gh is not in PATH) + 'gh_command': 'gh', + + # Custom instruction file patterns to check + 'custom_instruction_files': ['.github/copilot-instructions.md'], + 'custom_instruction_dirs': ['.github/instructions'], + 'custom_instruction_extension': '.instructions.md', + + # Maximum allowed characters per custom instruction file + # Reference: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/ + # response-customization?tool=webui#about-repository-custom-instructions + 'max_chars': 4000, + + # Performance settings + 'max_workers_fetch': 10, + 'max_workers_check': 15, + + # Rate limiting + 'enable_rate_limit_check': True, + 'rate_limit_threshold': 100, + 'rate_limit_wait_time': 60, + 'request_delay': 0.05, + + # Output settings + 'output_dir': '.', + 'excel_prefix': 'custom_instructions_violations', + 'include_timestamp': True, + + # Personal account identifier (auto-detected from GitHub CLI if empty) + 'personal_account': '', + + # Verbose output + 'verbose': True, +} + +# ============================================================================ +# CORE FUNCTIONS +# ============================================================================ + +rate_limit_lock = threading.Lock() +rate_limit_info = {'remaining': None, 'reset_time': None, 'checked': False} + + +def log(message, verbose_only=False): + """Print message if verbose or not verbose_only""" + if not verbose_only or CONFIG['verbose']: + print(message) + + +def check_gh_installed(): + """Check if GitHub CLI is installed and accessible""" + try: + subprocess.run( + [CONFIG['gh_command'], '--version'], + capture_output=True, + check=True, + timeout=5 + ) + return True + except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired): + return False + + +def check_rate_limit(): + """Check GitHub API rate limit status""" + try: + result = subprocess.run( + f"{CONFIG['gh_command']} api rate_limit", + shell=True, + capture_output=True, + text=True, + check=True, + timeout=10 + ) + data = json.loads(result.stdout) + core_rate = data.get('resources', {}).get('core', {}) + return { + 'remaining': core_rate.get('remaining', 5000), + 'limit': core_rate.get('limit', 5000), + 'reset_time': core_rate.get('reset', 0) + } + except Exception: + return None + + +def wait_for_rate_limit(): + """Wait if rate limit is approaching threshold""" + if not CONFIG['enable_rate_limit_check']: + return + + with rate_limit_lock: + if not rate_limit_info['checked'] or rate_limit_info['remaining'] is None: + limit_data = check_rate_limit() + if limit_data: + rate_limit_info['remaining'] = limit_data['remaining'] + rate_limit_info['reset_time'] = limit_data['reset_time'] + rate_limit_info['checked'] = True + + log(f"📊 Rate Limit: {limit_data['remaining']}/{limit_data['limit']} requests remaining", verbose_only=True) + + if limit_data['remaining'] < CONFIG['rate_limit_threshold']: + wait_time = CONFIG['rate_limit_wait_time'] + log(f"⚠️ Rate limit threshold reached ({limit_data['remaining']} remaining)") + log(f" Waiting {wait_time} seconds before continuing...") + sleep(wait_time) + rate_limit_info['checked'] = False + + if CONFIG['request_delay'] > 0: + sleep(CONFIG['request_delay']) + + +def run_gh_command(command): + """Run GitHub CLI command and return JSON output""" + wait_for_rate_limit() + + try: + result = subprocess.run( + command, + shell=True, + capture_output=True, + text=True, + check=True, + timeout=30 + ) + return json.loads(result.stdout) + except (subprocess.CalledProcessError, json.JSONDecodeError, subprocess.TimeoutExpired): + return None + + +def fetch_repositories(): + """Fetch all accessible repositories in parallel""" + log("Fetching repositories in parallel...") + + if not CONFIG['personal_account']: + user_info = run_gh_command(f"{CONFIG['gh_command']} api user --jq '.login'") + if user_info: + CONFIG['personal_account'] = str(user_info).strip('"') + log(f"Detected personal account: {CONFIG['personal_account']}", verbose_only=True) + + if CONFIG['enable_rate_limit_check']: + limit_data = check_rate_limit() + if limit_data: + log(f"📊 Initial Rate Limit: {limit_data['remaining']}/{limit_data['limit']} requests remaining") + + all_repos = [] + + with ThreadPoolExecutor(max_workers=CONFIG['max_workers_fetch']) as executor: + futures = [] + + futures.append(executor.submit( + run_gh_command, + f"{CONFIG['gh_command']} repo list --json nameWithOwner,name,owner --limit 1000" + )) + + orgs_future = executor.submit( + run_gh_command, + f"{CONFIG['gh_command']} api user/orgs --paginate" + ) + orgs_data = orgs_future.result() + + if orgs_data: + for org in orgs_data: + futures.append(executor.submit( + run_gh_command, + f"{CONFIG['gh_command']} repo list {org['login']} --json nameWithOwner,name,owner --limit 1000" + )) + + for future in as_completed(futures): + result = future.result() + if result: + all_repos.extend(result) + + # Remove duplicates + seen = set() + unique_repos = [] + for repo in all_repos: + if repo['nameWithOwner'] not in seen: + seen.add(repo['nameWithOwner']) + unique_repos.append(repo) + + return unique_repos + + +def decode_file_content(api_response): + """Decode base64 file content from GitHub API response""" + if not isinstance(api_response, dict): + return None + raw_content = api_response.get('content', '') + if not raw_content: + return None + try: + return base64.b64decode(raw_content).decode('utf-8') + except Exception: + return None + + +def get_custom_instruction_files(repo_name): + """ + Retrieve all custom instruction files for a repository. + + Checks for: + - .github/copilot-instructions.md + - .github/instructions/*.instructions.md + + Returns a list of dicts with keys: path, char_count, exceeds_limit + """ + files = [] + + # Check root-level custom instructions file + for file_path in CONFIG['custom_instruction_files']: + api_path = f"repos/{repo_name}/contents/{file_path}" + response = run_gh_command(f"{CONFIG['gh_command']} api {api_path}") + content = decode_file_content(response) + if content is not None: + char_count = len(content) + files.append({ + 'path': file_path, + 'char_count': char_count, + 'exceeds_limit': char_count > CONFIG['max_chars'], + }) + + # Check .github/instructions/ directory for *.instructions.md files + for dir_path in CONFIG['custom_instruction_dirs']: + api_path = f"repos/{repo_name}/contents/{dir_path}" + response = run_gh_command(f"{CONFIG['gh_command']} api {api_path}") + if isinstance(response, list): + for item in response: + if (item.get('type') == 'file' and + item.get('name', '').endswith(CONFIG['custom_instruction_extension'])): + file_response = run_gh_command( + f"{CONFIG['gh_command']} api repos/{repo_name}/contents/{item['path']}" + ) + content = decode_file_content(file_response) + if content is not None: + char_count = len(content) + files.append({ + 'path': item['path'], + 'char_count': char_count, + 'exceeds_limit': char_count > CONFIG['max_chars'], + }) + + return files + + +def assess_repo(repo): + """Assess a single repository for custom instruction file compliance""" + full_name = repo['nameWithOwner'] + result = { + 'repo': full_name, + 'files_found': [], + 'violations': [], + 'has_custom_instructions': False, + 'error': None, + } + + try: + files = get_custom_instruction_files(full_name) + result['files_found'] = files + result['has_custom_instructions'] = len(files) > 0 + + for f in files: + if f['exceeds_limit']: + result['violations'].append({ + 'repository': full_name, + 'file_path': f['path'], + 'char_count': f['char_count'], + 'limit': CONFIG['max_chars'], + 'excess_chars': f['char_count'] - CONFIG['max_chars'], + }) + except Exception as e: + result['error'] = str(e) + + return result + + +def check_all_repositories(repos): + """Check all repositories for custom instruction file compliance in parallel""" + log("\nValidating custom instruction files (parallel execution)...") + + results = [] + total = len(repos) + + with ThreadPoolExecutor(max_workers=CONFIG['max_workers_check']) as executor: + future_to_repo = {executor.submit(assess_repo, repo): repo for repo in repos} + + completed = 0 + for future in as_completed(future_to_repo): + result = future.result() + results.append(result) + completed += 1 + log(f"⚡ Progress: {completed}/{total} repositories checked ({(completed/total*100):.0f}%)", verbose_only=True) + + results.sort(key=lambda x: x['repo']) + return results + + +def export_violations_to_excel(violations): + """ + Export files exceeding the 4000-character limit to an Excel report. + Returns the path of the created file, or None if no violations. + """ + if not violations: + return None + + if not OPENPYXL_AVAILABLE: + log("⚠️ openpyxl not installed. Install with: pip install openpyxl") + log(" Falling back to CSV output for violations.") + return _export_violations_to_csv(violations) + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"{CONFIG['excel_prefix']}_{timestamp}.xlsx" + filepath = Path(CONFIG['output_dir']) / filename + filepath.parent.mkdir(parents=True, exist_ok=True) + + wb = openpyxl.Workbook() + ws = wb.active + ws.title = "Violations" + + # Header styles + header_font = Font(bold=True, color="FFFFFF") + header_fill = PatternFill(start_color="CC0000", end_color="CC0000", fill_type="solid") + header_alignment = Alignment(horizontal="center", vertical="center", wrap_text=True) + + headers = [ + "Repository", + "File Path", + "Character Count", + f"Limit ({CONFIG['max_chars']} chars)", + "Excess Characters", + "GitHub Copilot Rule", + ] + + for col_idx, header in enumerate(headers, start=1): + cell = ws.cell(row=1, column=col_idx, value=header) + cell.font = header_font + cell.fill = header_fill + cell.alignment = header_alignment + + # Data rows + rule_reference = ( + "Repository Custom Instructions must not exceed 4000 characters. " + "See: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/" + "response-customization?tool=webui#about-repository-custom-instructions" + ) + + for row_idx, violation in enumerate(violations, start=2): + ws.cell(row=row_idx, column=1, value=violation['repository']) + ws.cell(row=row_idx, column=2, value=violation['file_path']) + ws.cell(row=row_idx, column=3, value=violation['char_count']) + ws.cell(row=row_idx, column=4, value=violation['limit']) + ws.cell(row=row_idx, column=5, value=violation['excess_chars']) + ws.cell(row=row_idx, column=6, value=rule_reference) + + # Auto-fit column widths + column_widths = [40, 50, 18, 18, 18, 80] + for col_idx, width in enumerate(column_widths, start=1): + ws.column_dimensions[openpyxl.utils.get_column_letter(col_idx)].width = width + + ws.freeze_panes = "A2" + + wb.save(filepath) + log(f"✅ Excel violations report created: {filepath}") + log(f" Total violations: {len(violations)}") + return str(filepath) + + +def _export_violations_to_csv(violations): + """Fallback: export violations to CSV when openpyxl is not available""" + import csv as csv_module + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"{CONFIG['excel_prefix']}_{timestamp}.csv" + filepath = Path(CONFIG['output_dir']) / filename + filepath.parent.mkdir(parents=True, exist_ok=True) + + fieldnames = ['repository', 'file_path', 'char_count', 'limit', 'excess_chars'] + with open(filepath, 'w', newline='', encoding='utf-8') as f: + writer = csv_module.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(violations) + + log(f"✅ CSV violations report created: {filepath}") + log(f" Total violations: {len(violations)}") + return str(filepath) + + +def print_summary(results): + """Print a summary of the assessment""" + total_repos = len(results) + repos_with_files = sum(1 for r in results if r['has_custom_instructions']) + repos_with_violations = sum(1 for r in results if r['violations']) + repos_with_errors = sum(1 for r in results if r['error']) + total_files = sum(len(r['files_found']) for r in results) + total_violations = sum(len(r['violations']) for r in results) + + log("\n" + "=" * 80) + log("SUMMARY - CUSTOM INSTRUCTIONS VALIDATION") + log("=" * 80) + log(f"Total repositories scanned: {total_repos}") + log(f"Repositories with custom instructions: {repos_with_files}") + log(f"Total custom instruction files found: {total_files}") + log(f"Repositories with violations: {repos_with_violations}") + log(f"Total files exceeding {CONFIG['max_chars']} chars: {total_violations}") + log(f"Repositories with errors: {repos_with_errors}") + + if repos_with_violations > 0: + log(f"\n❌ VIOLATIONS (files exceeding {CONFIG['max_chars']} characters):") + for result in results: + for v in result['violations']: + log(f" • {v['repository']} | {v['file_path']} | {v['char_count']} chars (+{v['excess_chars']} over limit)") + log(f"\n📖 GitHub Copilot Rule: Repository Custom Instructions must not exceed {CONFIG['max_chars']} characters.") + log(" Reference: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/response-customization?tool=webui#about-repository-custom-instructions") + + +# ============================================================================ +# VALIDATION FUNCTIONS +# ============================================================================ + +def validate_existence(results): + """ + Validate that at least one custom instructions file exists across all repositories. + Returns True if at least one file is found, False otherwise. + """ + total_files = sum(len(r['files_found']) for r in results) + if total_files == 0: + log("\n❌ VALIDATION FAILED: No Repository Custom Instructions files found.") + log(" GitHub Copilot supports Repository Custom Instructions configured via:") + log(" - .github/copilot-instructions.md") + log(" - .github/instructions/*.instructions.md") + log(" Reference: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/response-customization?tool=webui#about-repository-custom-instructions") + return False + log(f"\n✅ VALIDATION PASSED: Found {total_files} custom instruction file(s).") + return True + + +def validate_file_sizes(results): + """ + Validate that all custom instruction files are within the 4000-character limit. + Returns list of violations (empty list means all files are valid). + """ + all_violations = [] + for result in results: + all_violations.extend(result['violations']) + + if all_violations: + log(f"\n❌ VALIDATION FAILED: {len(all_violations)} file(s) exceed the {CONFIG['max_chars']}-character limit.") + log(f" GitHub Copilot Rule: Repository Custom Instructions must not exceed {CONFIG['max_chars']} characters.") + log(" Reference: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/response-customization?tool=webui#about-repository-custom-instructions") + else: + log(f"\n✅ VALIDATION PASSED: All custom instruction files are within the {CONFIG['max_chars']}-character limit.") + + return all_violations + + +# ============================================================================ +# MAIN FUNCTION +# ============================================================================ + +def main(): + """Main execution function""" + start_time = time() + + log("=" * 80) + log("GITHUB COPILOT CUSTOM INSTRUCTIONS VALIDATION TOOL") + log("=" * 80) + log(f"Validation Rules:") + log(f" • Existence: At least one custom instruction file must be present") + log(f" • Size Limit: Each file must not exceed {CONFIG['max_chars']} characters") + log(f" • Reference: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/response-customization") + + if not check_gh_installed(): + log("\n❌ GitHub CLI (gh) is not installed or not in PATH!") + log("\nTo install:") + log(" Windows: winget install --id GitHub.cli") + log(" macOS: brew install gh") + log(" Linux: See https://cli.github.com/") + log("\nAfter installation, authenticate with: gh auth login") + return 1 + + # Fetch repositories + fetch_start = time() + repos = fetch_repositories() + fetch_time = time() - fetch_start + + if not repos: + log("\n❌ Could not fetch repositories. Make sure you're authenticated:") + log(" gh auth login") + return 1 + + log(f"✓ Found {len(repos)} repositories in {fetch_time:.2f}s") + + # Check all repositories + check_start = time() + results = check_all_repositories(repos) + check_time = time() - check_start + + # Print summary + print_summary(results) + + log(f"\n⚡ PERFORMANCE METRICS:") + log(f" Repository fetch: {fetch_time:.2f}s") + log(f" Validation check: {check_time:.2f}s") + log(f" Total execution: {(time() - start_time):.2f}s") + + # --- Validation 1: Existence --- + existence_ok = validate_existence(results) + if not existence_ok: + log("\n" + "=" * 80) + log("❌ Validation failed: no custom instruction files found.") + log("=" * 80) + return 1 + + # --- Validation 2: File size --- + violations = validate_file_sizes(results) + if violations: + export_violations_to_excel(violations) + log("\n" + "=" * 80) + log("❌ Validation failed: one or more custom instruction files exceed the 4000-character limit.") + log("=" * 80) + return 1 + + log("\n" + "=" * 80) + log("✅ All custom instruction validations passed!") + log("=" * 80) + return 0 + + +if __name__ == "__main__": + try: + sys.exit(main()) + except KeyboardInterrupt: + log("\n\n⚠️ Operation cancelled by user") + sys.exit(130) + except Exception as e: + log(f"\n❌ Unexpected error: {e}") + sys.exit(1) From 73fb49af8e13b111718bff6b569053b0d73c235b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 14:03:13 +0000 Subject: [PATCH 3/3] Fix docstring: update Python requirement to 3.8+ and clarify openpyxl is optional --- validate_custom_instructions.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/validate_custom_instructions.py b/validate_custom_instructions.py index 0ae8388..0c9b483 100644 --- a/validate_custom_instructions.py +++ b/validate_custom_instructions.py @@ -9,8 +9,9 @@ Requirements: - GitHub CLI (gh) installed and authenticated - - Python 3.7+ - - openpyxl (for Excel report generation: pip install openpyxl) + - Python 3.8+ + - openpyxl (optional, for Excel report generation: pip install openpyxl) + Falls back to CSV output if not installed. Usage: python validate_custom_instructions.py