From 293737ec2dd9f08698e74f4bc12e170169dca8cf Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 12 May 2026 13:55:53 +0000
Subject: [PATCH 1/3] Initial plan


From 3b1e4031b928067995c953aeacdb25680735a4b3 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 12 May 2026 14:01:58 +0000
Subject: [PATCH 2/3] Add custom instructions validation script with tests and
 documentation

---
 .gitignore                           |   1 +
 AGENTS.md                            |  63 ++-
 README.md                            |  79 +++-
 test_validate_custom_instructions.py | 370 +++++++++++++++++
 validate_custom_instructions.py      | 580 +++++++++++++++++++++++++++
 5 files changed, 1067 insertions(+), 26 deletions(-)
 create mode 100644 test_validate_custom_instructions.py
 create mode 100644 validate_custom_instructions.py

diff --git a/.gitignore b/.gitignore
index d4e2963..44c9726 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,5 +3,6 @@ __pycache__/
 *.pyc
 *.pyo
 *.csv
+*.xlsx
 *.log
 .DS_Store
diff --git a/AGENTS.md b/AGENTS.md
index e70fcf6..209e31b 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -20,21 +20,24 @@ The tool performs three main types of assessments:
 
 ```
 GitHubAssessment/
-├── security_assessment.py           # Main: Repository security controls assessment
-├── idp_assessment.py                # Main: Identity & access management assessment  
-├── assess_copilot_repos.py          # Main: GitHub Copilot best practices validation
-├── list_repos_gh_cli.py            # Utility: Basic repository listing
-├── list_repos_gh_cli_optimized.py  # Utility: Optimized repository listing with Copilot checks
-├── list_and_check_repos.py         # Utility: Combined listing and Copilot directory checking
-├── .gitignore                      # Excludes .venv, __pycache__, *.csv, *.log
-├── README.md                       # Human-readable documentation
-└── AGENTS.md                       # This file - LLM agent documentation
+├── security_assessment.py                   # Main: Repository security controls assessment
+├── idp_assessment.py                        # Main: Identity & access management assessment  
+├── assess_copilot_repos.py                  # Main: GitHub Copilot best practices validation
+├── validate_custom_instructions.py          # Main: Custom Instructions existence & size validation
+├── test_validate_custom_instructions.py     # Tests: Automated tests for validate_custom_instructions.py
+├── list_repos_gh_cli.py                    # Utility: Basic repository listing
+├── list_repos_gh_cli_optimized.py          # Utility: Optimized repository listing with Copilot checks
+├── list_and_check_repos.py                 # Utility: Combined listing and Copilot directory checking
+├── .gitignore                              # Excludes .venv, __pycache__, *.csv, *.xlsx, *.log
+├── README.md                               # Human-readable documentation
+└── AGENTS.md                               # This file - LLM agent documentation
 ```
 
 ### Generated Files (Excluded from Git)
 - `github_security_assessment_YYYYMMDD_HHMMSS.csv` - Security assessment reports
 - `github_idp_assessment_YYYYMMDD_HHMMSS.csv` - IDP assessment reports
 - `github_copilot_assessment_YYYYMMDD_HHMMSS.csv` - Copilot assessment reports
+- `custom_instructions_violations_YYYYMMDD_HHMMSS.xlsx` - Custom Instructions violations report
 - `.venv/` - Python virtual environment directory
 
 ## Codebase Architecture
@@ -164,7 +167,47 @@ Instructions Dir, Instructions Count, Agents Dir, Agents Count, Collections Dir,
 Collections Count, Scripts Dir, Scripts Count, Overall Copilot Status, Recommendations, Errors
 ```
 
-#### 4. Utility Scripts
+#### 4. validate_custom_instructions.py
+
+**Purpose**: Validates that GitHub Copilot Repository Custom Instructions files exist and comply
+with the 4,000-character limit defined in the GitHub Copilot documentation.
+
+**Reference**: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/response-customization?tool=webui#about-repository-custom-instructions
+
+**Key Functions**:
+- `check_gh_installed()` - Validates GitHub CLI
+- `check_rate_limit()` - Monitors API rate limits
+- `run_gh_command(command)` - Executes GitHub CLI commands returning JSON
+- `fetch_repositories()` - Fetches all repositories with parallel execution
+- `decode_file_content(api_response)` - Decodes base64-encoded file content from GitHub API
+- `get_custom_instruction_files(repo_name)` - Retrieves all custom instruction files for a repo
+- `assess_repo(repo)` - Main assessment for a single repository
+- `check_all_repositories(repos)` - Parallel assessment of all repositories
+- `validate_existence(results)` - Checks that at least one custom instruction file exists
+- `validate_file_sizes(results)` - Checks all files are within the 4,000-character limit
+- `export_violations_to_excel(violations)` - Exports violations to an Excel file
+- `main()` - Entry point; halts with exit code 1 when a validation fails
+
+**Files Checked**:
+- `.github/copilot-instructions.md` - Primary repository-level custom instruction file
+- `.github/instructions/*.instructions.md` - Additional instruction files
+
+**Validation Rules**:
+1. **Existence**: At least one custom instruction file must be present
+2. **Size**: Each file must not exceed 4,000 characters
+
+**Output**:
+- Console: Pass/Fail per validation rule with GitHub Copilot rule references
+- Excel: `custom_instructions_violations_YYYYMMDD_HHMMSS.xlsx` (only when violations found)
+
+**Exit Codes**:
+- `0` - All validations passed
+- `1` - One or more validations failed
+
+**Dependencies**:
+- `openpyxl` (optional; falls back to CSV if not installed): `pip install openpyxl`
+
+#### 5. Utility Scripts
 
 **list_repos_gh_cli.py** (143 lines):
 - Basic repository listing using GitHub CLI
diff --git a/README.md b/README.md
index a6b33ba..832d7c8 100644
--- a/README.md
+++ b/README.md
@@ -158,7 +158,50 @@ python assess_copilot_repos.py
 - Overall Copilot readiness status
 - Recommendations for missing components
 
-### 2. Security Assessment
+### 2. Custom Instructions Validation
+
+Validates that GitHub Copilot Repository Custom Instructions files exist and comply with the
+4,000-character limit imposed by GitHub Copilot.
+
+> 📖 Reference: [GitHub Copilot Repository Custom Instructions](https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/response-customization?tool=webui#about-repository-custom-instructions)
+
+**Install the required library:**
+```bash
+pip install openpyxl
+```
+
+**Run the validation:**
+```bash
+python validate_custom_instructions.py
+```
+
+**What it checks:**
+- **Existence** – at least one of the following files must be present in a repository:
+  - `.github/copilot-instructions.md`
+  - `.github/instructions/*.instructions.md`
+- **Size limit** – each custom instruction file must not exceed **4,000 characters**
+
+**Validation flow:**
+1. Fetches all accessible repositories
+2. Checks each repository for custom instruction files
+3. **Fails immediately** if no custom instruction files are found across all repositories
+4. **Fails immediately** if any file exceeds the 4,000-character limit and generates an Excel report
+
+**Output:**
+- Console summary with ✅/❌ validation results per rule
+- Excel file `custom_instructions_violations_YYYYMMDD_HHMMSS.xlsx` (only when violations are found)
+  - Lists every violating file with its repository, path, character count, and excess characters
+
+**Exit codes:**
+- `0` – All validations passed
+- `1` – One or more validations failed
+
+**Running tests:**
+```bash
+python test_validate_custom_instructions.py -v
+```
+
+### 3. Security Assessment
 
 Evaluates repository-level security controls across all accessible repositories.
 
@@ -186,7 +229,7 @@ python security_assessment.py
 - Overall security compliance status
 - Error details (if any)
 
-### 3. Identity & Access Management (IDP) Assessment
+### 4. Identity & Access Management (IDP) Assessment
 
 Evaluates organization-level identity, authentication, and access controls.
 
@@ -296,20 +339,23 @@ This tool supports assessment alignment with:
 
 ```
 GitHubAssessment/
-├── security_assessment.py           # Repository security controls assessment
-├── idp_assessment.py                # Identity & access management assessment
-├── assess_copilot_repos.py          # GitHub Copilot best practices validation
-├── list_repos_gh_cli.py            # Repository listing utility
-├── list_repos_gh_cli_optimized.py  # Optimized repository listing
-├── list_and_check_repos.py         # Combined listing and checking
-├── MCP/                            # Model Context Protocol configurations
-├── .venv/                          # Python virtual environment
-├── .gitignore                      # Excludes .venv, CSV files, logs
-├── github_security_assessment_*.csv     # Generated security reports
-├── github_idp_assessment_*.csv          # Generated IDP reports
-├── github_copilot_assessment_*.csv      # Generated Copilot reports
-├── README.md                       # This file - Human-readable documentation
-└── AGENTS.md                       # LLM/AI agent documentation for code assistance
+├── security_assessment.py                   # Repository security controls assessment
+├── idp_assessment.py                        # Identity & access management assessment
+├── assess_copilot_repos.py                  # GitHub Copilot best practices validation
+├── validate_custom_instructions.py          # Custom Instructions existence & size validation
+├── test_validate_custom_instructions.py     # Automated tests for the above
+├── list_repos_gh_cli.py                    # Repository listing utility
+├── list_repos_gh_cli_optimized.py          # Optimized repository listing
+├── list_and_check_repos.py                 # Combined listing and checking
+├── MCP/                                    # Model Context Protocol configurations
+├── .venv/                                  # Python virtual environment
+├── .gitignore                              # Excludes .venv, CSV files, Excel files, logs
+├── github_security_assessment_*.csv             # Generated security reports
+├── github_idp_assessment_*.csv                  # Generated IDP reports
+├── github_copilot_assessment_*.csv              # Generated Copilot reports
+├── custom_instructions_violations_*.xlsx        # Generated violations reports
+├── README.md                               # This file - Human-readable documentation
+└── AGENTS.md                               # LLM/AI agent documentation for code assistance
 ```
 
 ## Output and Reports
@@ -507,6 +553,7 @@ gh config list
 - [x] Security assessment with parallel execution
 - [x] IDP assessment with Enterprise SSO support
 - [x] Copilot best practices validation
+- [x] Custom Instructions existence & size validation with Excel report
 - [x] CSV export with timestamped files
 - [ ] Advanced analytics dashboard
 - [ ] Trend analysis across multiple assessments
diff --git a/test_validate_custom_instructions.py b/test_validate_custom_instructions.py
new file mode 100644
index 0000000..4220eb7
--- /dev/null
+++ b/test_validate_custom_instructions.py
@@ -0,0 +1,370 @@
+#!/usr/bin/env python3
+"""
+Automated tests for validate_custom_instructions.py
+
+Tests cover:
+- No custom instruction file found (existence validation failure)
+- Valid file within the 4000-character limit
+- Invalid file above the 4000-character limit (size validation failure)
+
+Usage:
+    python test_validate_custom_instructions.py
+    python -m pytest test_validate_custom_instructions.py -v
+"""
+
+import base64
+import json
+import sys
+import unittest
+from unittest.mock import MagicMock, patch, call
+
+# Ensure the module under test is importable from this directory
+sys.path.insert(0, '.')
+
+import validate_custom_instructions as vci
+
+
+# ---------------------------------------------------------------------------
+# Helper functions
+# ---------------------------------------------------------------------------
+
+def _make_file_api_response(content: str) -> dict:
+    """Simulate a GitHub API file response with base64-encoded content."""
+    encoded = base64.b64encode(content.encode('utf-8')).decode('utf-8')
+    return {
+        'type': 'file',
+        'content': encoded,
+        'encoding': 'base64',
+    }
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: decode_file_content
+# ---------------------------------------------------------------------------
+
+class TestDecodeFileContent(unittest.TestCase):
+    """Tests for the decode_file_content helper."""
+
+    def test_decodes_valid_base64_content(self):
+        text = "Hello, Copilot!"
+        response = _make_file_api_response(text)
+        result = vci.decode_file_content(response)
+        self.assertEqual(result, text)
+
+    def test_returns_none_for_non_dict_response(self):
+        self.assertIsNone(vci.decode_file_content(None))
+        self.assertIsNone(vci.decode_file_content([]))
+
+    def test_returns_none_for_empty_content_field(self):
+        self.assertIsNone(vci.decode_file_content({'content': ''}))
+
+    def test_returns_none_for_missing_content_field(self):
+        self.assertIsNone(vci.decode_file_content({'type': 'file'}))
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: validate_existence
+# ---------------------------------------------------------------------------
+
+class TestValidateExistence(unittest.TestCase):
+    """Tests for the validate_existence validation function."""
+
+    def test_fails_when_no_files_found(self):
+        """Scenario: No custom instruction file found in any repository."""
+        results = [
+            {'repo': 'org/repo1', 'files_found': [], 'violations': [], 'has_custom_instructions': False, 'error': None},
+            {'repo': 'org/repo2', 'files_found': [], 'violations': [], 'has_custom_instructions': False, 'error': None},
+        ]
+        self.assertFalse(vci.validate_existence(results))
+
+    def test_passes_when_at_least_one_file_found(self):
+        """Scenario: At least one custom instruction file exists."""
+        results = [
+            {
+                'repo': 'org/repo1',
+                'files_found': [
+                    {'path': '.github/copilot-instructions.md', 'char_count': 100, 'exceeds_limit': False}
+                ],
+                'violations': [],
+                'has_custom_instructions': True,
+                'error': None,
+            },
+        ]
+        self.assertTrue(vci.validate_existence(results))
+
+    def test_fails_with_empty_results_list(self):
+        """Edge case: Empty results (no repos scanned)."""
+        self.assertFalse(vci.validate_existence([]))
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: validate_file_sizes
+# ---------------------------------------------------------------------------
+
+class TestValidateFileSizes(unittest.TestCase):
+    """Tests for the validate_file_sizes validation function."""
+
+    def test_returns_empty_list_when_all_files_within_limit(self):
+        """Scenario: Valid file within the 4000-character limit."""
+        results = [
+            {
+                'repo': 'org/repo1',
+                'files_found': [
+                    {'path': '.github/copilot-instructions.md', 'char_count': 500, 'exceeds_limit': False}
+                ],
+                'violations': [],
+                'has_custom_instructions': True,
+                'error': None,
+            },
+        ]
+        violations = vci.validate_file_sizes(results)
+        self.assertEqual(violations, [])
+
+    def test_returns_violations_when_file_exceeds_limit(self):
+        """Scenario: Invalid file above 4000 characters."""
+        violation = {
+            'repository': 'org/repo1',
+            'file_path': '.github/copilot-instructions.md',
+            'char_count': 4500,
+            'limit': 4000,
+            'excess_chars': 500,
+        }
+        results = [
+            {
+                'repo': 'org/repo1',
+                'files_found': [
+                    {'path': '.github/copilot-instructions.md', 'char_count': 4500, 'exceeds_limit': True}
+                ],
+                'violations': [violation],
+                'has_custom_instructions': True,
+                'error': None,
+            },
+        ]
+        violations = vci.validate_file_sizes(results)
+        self.assertEqual(len(violations), 1)
+        self.assertEqual(violations[0]['char_count'], 4500)
+        self.assertEqual(violations[0]['excess_chars'], 500)
+
+    def test_returns_violations_for_multiple_repos(self):
+        """Multiple repositories with multiple violations."""
+        v1 = {
+            'repository': 'org/repo1',
+            'file_path': '.github/copilot-instructions.md',
+            'char_count': 5000,
+            'limit': 4000,
+            'excess_chars': 1000,
+        }
+        v2 = {
+            'repository': 'org/repo2',
+            'file_path': '.github/instructions/coding.instructions.md',
+            'char_count': 4001,
+            'limit': 4000,
+            'excess_chars': 1,
+        }
+        results = [
+            {'repo': 'org/repo1', 'files_found': [], 'violations': [v1], 'has_custom_instructions': True, 'error': None},
+            {'repo': 'org/repo2', 'files_found': [], 'violations': [v2], 'has_custom_instructions': True, 'error': None},
+        ]
+        violations = vci.validate_file_sizes(results)
+        self.assertEqual(len(violations), 2)
+
+    def test_file_at_exact_limit_is_valid(self):
+        """A file with exactly 4000 characters should not be flagged."""
+        results = [
+            {
+                'repo': 'org/repo1',
+                'files_found': [
+                    {'path': '.github/copilot-instructions.md', 'char_count': 4000, 'exceeds_limit': False}
+                ],
+                'violations': [],
+                'has_custom_instructions': True,
+                'error': None,
+            },
+        ]
+        violations = vci.validate_file_sizes(results)
+        self.assertEqual(violations, [])
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: assess_repo (integration of get_custom_instruction_files)
+# ---------------------------------------------------------------------------
+
+class TestAssessRepo(unittest.TestCase):
+    """Tests for assess_repo function using mocked GitHub CLI calls."""
+
+    def _mock_repo(self, name='org/repo'):
+        return {'nameWithOwner': name}
+
+    @patch('validate_custom_instructions.run_gh_command')
+    def test_no_custom_instruction_files_found(self, mock_run):
+        """Scenario: Repository has no custom instruction files."""
+        # All API calls return None (file not found)
+        mock_run.return_value = None
+
+        result = vci.assess_repo(self._mock_repo())
+
+        self.assertFalse(result['has_custom_instructions'])
+        self.assertEqual(result['files_found'], [])
+        self.assertEqual(result['violations'], [])
+        self.assertIsNone(result['error'])
+
+    @patch('validate_custom_instructions.run_gh_command')
+    def test_valid_file_within_limit(self, mock_run):
+        """Scenario: Repository has a valid custom instruction file within the 4000-char limit."""
+        valid_content = "Follow PEP 8 coding standards.\n" * 10  # ~310 chars
+        file_response = _make_file_api_response(valid_content)
+
+        def side_effect(command):
+            if 'copilot-instructions.md' in command and '/contents/' in command and 'instructions/' not in command:
+                return file_response
+            return None  # instructions/ dir not found
+
+        mock_run.side_effect = side_effect
+
+        result = vci.assess_repo(self._mock_repo())
+
+        self.assertTrue(result['has_custom_instructions'])
+        self.assertEqual(len(result['files_found']), 1)
+        self.assertEqual(result['violations'], [])
+        self.assertFalse(result['files_found'][0]['exceeds_limit'])
+
+    @patch('validate_custom_instructions.run_gh_command')
+    def test_invalid_file_above_limit(self, mock_run):
+        """Scenario: Repository has a custom instruction file exceeding 4000 characters."""
+        over_limit_content = "A" * 4500  # 4500 chars, over the 4000 limit
+        file_response = _make_file_api_response(over_limit_content)
+
+        def side_effect(command):
+            if 'copilot-instructions.md' in command and '/contents/' in command and 'instructions/' not in command:
+                return file_response
+            return None
+
+        mock_run.side_effect = side_effect
+
+        result = vci.assess_repo(self._mock_repo())
+
+        self.assertTrue(result['has_custom_instructions'])
+        self.assertEqual(len(result['violations']), 1)
+        violation = result['violations'][0]
+        self.assertEqual(violation['char_count'], 4500)
+        self.assertEqual(violation['limit'], 4000)
+        self.assertEqual(violation['excess_chars'], 500)
+
+    @patch('validate_custom_instructions.run_gh_command')
+    def test_instructions_dir_files_are_checked(self, mock_run):
+        """
+        Scenario: .github/instructions/ contains an over-limit .instructions.md file.
+        """
+        over_limit_content = "B" * 4001
+        file_in_dir_response = _make_file_api_response(over_limit_content)
+
+        dir_listing = [
+            {
+                'type': 'file',
+                'name': 'coding.instructions.md',
+                'path': '.github/instructions/coding.instructions.md',
+            }
+        ]
+
+        def side_effect(command):
+            if 'copilot-instructions.md' in command and 'instructions/' not in command:
+                return None  # root copilot-instructions.md not present
+            if 'contents/.github/instructions"' in command or command.endswith('contents/.github/instructions'):
+                return dir_listing
+            if 'coding.instructions.md' in command:
+                return file_in_dir_response
+            return None
+
+        mock_run.side_effect = side_effect
+
+        result = vci.assess_repo(self._mock_repo())
+
+        self.assertTrue(result['has_custom_instructions'])
+        self.assertEqual(len(result['violations']), 1)
+        self.assertEqual(result['violations'][0]['char_count'], 4001)
+
+    @patch('validate_custom_instructions.run_gh_command')
+    def test_file_at_exact_limit_is_not_a_violation(self, mock_run):
+        """A file with exactly 4000 characters must not be a violation."""
+        exact_content = "C" * 4000
+        file_response = _make_file_api_response(exact_content)
+
+        def side_effect(command):
+            if 'copilot-instructions.md' in command and 'instructions/' not in command:
+                return file_response
+            return None
+
+        mock_run.side_effect = side_effect
+
+        result = vci.assess_repo(self._mock_repo())
+
+        self.assertTrue(result['has_custom_instructions'])
+        self.assertEqual(result['violations'], [])
+        self.assertFalse(result['files_found'][0]['exceeds_limit'])
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: export_violations_to_excel
+# ---------------------------------------------------------------------------
+
+class TestExportViolationsToExcel(unittest.TestCase):
+    """Tests for the Excel export function."""
+
+    def setUp(self):
+        import tempfile
+        self._tmp = tempfile.mkdtemp()
+        vci.CONFIG['output_dir'] = self._tmp
+
+    def tearDown(self):
+        import shutil
+        shutil.rmtree(self._tmp, ignore_errors=True)
+        vci.CONFIG['output_dir'] = '.'
+
+    def test_returns_none_when_no_violations(self):
+        result = vci.export_violations_to_excel([])
+        self.assertIsNone(result)
+
+    def test_creates_excel_file_for_violations(self):
+        violations = [
+            {
+                'repository': 'org/repo1',
+                'file_path': '.github/copilot-instructions.md',
+                'char_count': 5000,
+                'limit': 4000,
+                'excess_chars': 1000,
+            }
+        ]
+        output_path = vci.export_violations_to_excel(violations)
+        self.assertIsNotNone(output_path)
+        import os
+        self.assertTrue(os.path.exists(output_path))
+
+    def test_excel_file_contains_correct_data(self):
+        violations = [
+            {
+                'repository': 'org/my-repo',
+                'file_path': '.github/copilot-instructions.md',
+                'char_count': 4500,
+                'limit': 4000,
+                'excess_chars': 500,
+            }
+        ]
+        output_path = vci.export_violations_to_excel(violations)
+        self.assertIsNotNone(output_path)
+
+        if vci.OPENPYXL_AVAILABLE:
+            import openpyxl as ox
+            wb = ox.load_workbook(output_path)
+            ws = wb.active
+            # Row 1 is the header, row 2 is the first data row
+            self.assertEqual(ws.cell(row=2, column=1).value, 'org/my-repo')
+            self.assertEqual(ws.cell(row=2, column=3).value, 4500)
+            self.assertEqual(ws.cell(row=2, column=5).value, 500)
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/validate_custom_instructions.py b/validate_custom_instructions.py
new file mode 100644
index 0000000..0ae8388
--- /dev/null
+++ b/validate_custom_instructions.py
@@ -0,0 +1,580 @@
+#!/usr/bin/env python3
+"""
+GitHub Copilot Custom Instructions Validation Tool
+Validates the existence and size of Repository Custom Instructions files across GitHub repositories.
+
+According to GitHub Copilot documentation, Repository Custom Instructions must not exceed 4000
+characters. See: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/
+response-customization?tool=webui#about-repository-custom-instructions
+
+Requirements:
+    - GitHub CLI (gh) installed and authenticated
+    - Python 3.7+
+    - openpyxl (for Excel report generation: pip install openpyxl)
+
+Usage:
+    python validate_custom_instructions.py
+
+Configuration:
+    Edit the CONFIG section below to customize behavior
+"""
+
+import subprocess
+import json
+import sys
+import os
+import base64
+import threading
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from time import time, sleep
+from datetime import datetime
+from pathlib import Path
+
+try:
+    import openpyxl
+    from openpyxl.styles import Font, PatternFill, Alignment
+    OPENPYXL_AVAILABLE = True
+except ImportError:
+    OPENPYXL_AVAILABLE = False
+
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+
+CONFIG = {
+    # GitHub CLI command (change if gh is not in PATH)
+    'gh_command': 'gh',
+
+    # Custom instruction file patterns to check
+    'custom_instruction_files': ['.github/copilot-instructions.md'],
+    'custom_instruction_dirs': ['.github/instructions'],
+    'custom_instruction_extension': '.instructions.md',
+
+    # Maximum allowed characters per custom instruction file
+    # Reference: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/
+    # response-customization?tool=webui#about-repository-custom-instructions
+    'max_chars': 4000,
+
+    # Performance settings
+    'max_workers_fetch': 10,
+    'max_workers_check': 15,
+
+    # Rate limiting
+    'enable_rate_limit_check': True,
+    'rate_limit_threshold': 100,
+    'rate_limit_wait_time': 60,
+    'request_delay': 0.05,
+
+    # Output settings
+    'output_dir': '.',
+    'excel_prefix': 'custom_instructions_violations',
+    'include_timestamp': True,
+
+    # Personal account identifier (auto-detected from GitHub CLI if empty)
+    'personal_account': '',
+
+    # Verbose output
+    'verbose': True,
+}
+
+# ============================================================================
+# CORE FUNCTIONS
+# ============================================================================
+
+rate_limit_lock = threading.Lock()
+rate_limit_info = {'remaining': None, 'reset_time': None, 'checked': False}
+
+
+def log(message, verbose_only=False):
+    """Print message if verbose or not verbose_only"""
+    if not verbose_only or CONFIG['verbose']:
+        print(message)
+
+
+def check_gh_installed():
+    """Check if GitHub CLI is installed and accessible"""
+    try:
+        subprocess.run(
+            [CONFIG['gh_command'], '--version'],
+            capture_output=True,
+            check=True,
+            timeout=5
+        )
+        return True
+    except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
+        return False
+
+
+def check_rate_limit():
+    """Check GitHub API rate limit status"""
+    try:
+        result = subprocess.run(
+            f"{CONFIG['gh_command']} api rate_limit",
+            shell=True,
+            capture_output=True,
+            text=True,
+            check=True,
+            timeout=10
+        )
+        data = json.loads(result.stdout)
+        core_rate = data.get('resources', {}).get('core', {})
+        return {
+            'remaining': core_rate.get('remaining', 5000),
+            'limit': core_rate.get('limit', 5000),
+            'reset_time': core_rate.get('reset', 0)
+        }
+    except Exception:
+        return None
+
+
+def wait_for_rate_limit():
+    """Wait if rate limit is approaching threshold"""
+    if not CONFIG['enable_rate_limit_check']:
+        return
+
+    with rate_limit_lock:
+        if not rate_limit_info['checked'] or rate_limit_info['remaining'] is None:
+            limit_data = check_rate_limit()
+            if limit_data:
+                rate_limit_info['remaining'] = limit_data['remaining']
+                rate_limit_info['reset_time'] = limit_data['reset_time']
+                rate_limit_info['checked'] = True
+
+                log(f"📊 Rate Limit: {limit_data['remaining']}/{limit_data['limit']} requests remaining", verbose_only=True)
+
+                if limit_data['remaining'] < CONFIG['rate_limit_threshold']:
+                    wait_time = CONFIG['rate_limit_wait_time']
+                    log(f"⚠️  Rate limit threshold reached ({limit_data['remaining']} remaining)")
+                    log(f"   Waiting {wait_time} seconds before continuing...")
+                    sleep(wait_time)
+                    rate_limit_info['checked'] = False
+
+        if CONFIG['request_delay'] > 0:
+            sleep(CONFIG['request_delay'])
+
+
+def run_gh_command(command):
+    """Run GitHub CLI command and return JSON output"""
+    wait_for_rate_limit()
+
+    try:
+        result = subprocess.run(
+            command,
+            shell=True,
+            capture_output=True,
+            text=True,
+            check=True,
+            timeout=30
+        )
+        return json.loads(result.stdout)
+    except (subprocess.CalledProcessError, json.JSONDecodeError, subprocess.TimeoutExpired):
+        return None
+
+
+def fetch_repositories():
+    """Fetch all accessible repositories in parallel"""
+    log("Fetching repositories in parallel...")
+
+    if not CONFIG['personal_account']:
+        user_info = run_gh_command(f"{CONFIG['gh_command']} api user --jq '.login'")
+        if user_info:
+            CONFIG['personal_account'] = str(user_info).strip('"')
+            log(f"Detected personal account: {CONFIG['personal_account']}", verbose_only=True)
+
+    if CONFIG['enable_rate_limit_check']:
+        limit_data = check_rate_limit()
+        if limit_data:
+            log(f"📊 Initial Rate Limit: {limit_data['remaining']}/{limit_data['limit']} requests remaining")
+
+    all_repos = []
+
+    with ThreadPoolExecutor(max_workers=CONFIG['max_workers_fetch']) as executor:
+        futures = []
+
+        futures.append(executor.submit(
+            run_gh_command,
+            f"{CONFIG['gh_command']} repo list --json nameWithOwner,name,owner --limit 1000"
+        ))
+
+        orgs_future = executor.submit(
+            run_gh_command,
+            f"{CONFIG['gh_command']} api user/orgs --paginate"
+        )
+        orgs_data = orgs_future.result()
+
+        if orgs_data:
+            for org in orgs_data:
+                futures.append(executor.submit(
+                    run_gh_command,
+                    f"{CONFIG['gh_command']} repo list {org['login']} --json nameWithOwner,name,owner --limit 1000"
+                ))
+
+        for future in as_completed(futures):
+            result = future.result()
+            if result:
+                all_repos.extend(result)
+
+    # Remove duplicates
+    seen = set()
+    unique_repos = []
+    for repo in all_repos:
+        if repo['nameWithOwner'] not in seen:
+            seen.add(repo['nameWithOwner'])
+            unique_repos.append(repo)
+
+    return unique_repos
+
+
+def decode_file_content(api_response):
+    """Decode base64 file content from GitHub API response"""
+    if not isinstance(api_response, dict):
+        return None
+    raw_content = api_response.get('content', '')
+    if not raw_content:
+        return None
+    try:
+        return base64.b64decode(raw_content).decode('utf-8')
+    except Exception:
+        return None
+
+
+def get_custom_instruction_files(repo_name):
+    """
+    Retrieve all custom instruction files for a repository.
+
+    Checks for:
+    - .github/copilot-instructions.md
+    - .github/instructions/*.instructions.md
+
+    Returns a list of dicts with keys: path, char_count, exceeds_limit
+    """
+    files = []
+
+    # Check root-level custom instructions file
+    for file_path in CONFIG['custom_instruction_files']:
+        api_path = f"repos/{repo_name}/contents/{file_path}"
+        response = run_gh_command(f"{CONFIG['gh_command']} api {api_path}")
+        content = decode_file_content(response)
+        if content is not None:
+            char_count = len(content)
+            files.append({
+                'path': file_path,
+                'char_count': char_count,
+                'exceeds_limit': char_count > CONFIG['max_chars'],
+            })
+
+    # Check .github/instructions/ directory for *.instructions.md files
+    for dir_path in CONFIG['custom_instruction_dirs']:
+        api_path = f"repos/{repo_name}/contents/{dir_path}"
+        response = run_gh_command(f"{CONFIG['gh_command']} api {api_path}")
+        if isinstance(response, list):
+            for item in response:
+                if (item.get('type') == 'file' and
+                        item.get('name', '').endswith(CONFIG['custom_instruction_extension'])):
+                    file_response = run_gh_command(
+                        f"{CONFIG['gh_command']} api repos/{repo_name}/contents/{item['path']}"
+                    )
+                    content = decode_file_content(file_response)
+                    if content is not None:
+                        char_count = len(content)
+                        files.append({
+                            'path': item['path'],
+                            'char_count': char_count,
+                            'exceeds_limit': char_count > CONFIG['max_chars'],
+                        })
+
+    return files
+
+
+def assess_repo(repo):
+    """Assess a single repository for custom instruction file compliance"""
+    full_name = repo['nameWithOwner']
+    result = {
+        'repo': full_name,
+        'files_found': [],
+        'violations': [],
+        'has_custom_instructions': False,
+        'error': None,
+    }
+
+    try:
+        files = get_custom_instruction_files(full_name)
+        result['files_found'] = files
+        result['has_custom_instructions'] = len(files) > 0
+
+        for f in files:
+            if f['exceeds_limit']:
+                result['violations'].append({
+                    'repository': full_name,
+                    'file_path': f['path'],
+                    'char_count': f['char_count'],
+                    'limit': CONFIG['max_chars'],
+                    'excess_chars': f['char_count'] - CONFIG['max_chars'],
+                })
+    except Exception as e:
+        result['error'] = str(e)
+
+    return result
+
+
+def check_all_repositories(repos):
+    """Check all repositories for custom instruction file compliance in parallel"""
+    log("\nValidating custom instruction files (parallel execution)...")
+
+    results = []
+    total = len(repos)
+
+    with ThreadPoolExecutor(max_workers=CONFIG['max_workers_check']) as executor:
+        future_to_repo = {executor.submit(assess_repo, repo): repo for repo in repos}
+
+        completed = 0
+        for future in as_completed(future_to_repo):
+            result = future.result()
+            results.append(result)
+            completed += 1
+            log(f"⚡ Progress: {completed}/{total} repositories checked ({(completed/total*100):.0f}%)", verbose_only=True)
+
+    results.sort(key=lambda x: x['repo'])
+    return results
+
+
+def export_violations_to_excel(violations):
+    """
+    Export files exceeding the 4000-character limit to an Excel report.
+    Returns the path of the created file, or None if no violations.
+    """
+    if not violations:
+        return None
+
+    if not OPENPYXL_AVAILABLE:
+        log("⚠️  openpyxl not installed. Install with: pip install openpyxl")
+        log("   Falling back to CSV output for violations.")
+        return _export_violations_to_csv(violations)
+
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"{CONFIG['excel_prefix']}_{timestamp}.xlsx"
+    filepath = Path(CONFIG['output_dir']) / filename
+    filepath.parent.mkdir(parents=True, exist_ok=True)
+
+    wb = openpyxl.Workbook()
+    ws = wb.active
+    ws.title = "Violations"
+
+    # Header styles
+    header_font = Font(bold=True, color="FFFFFF")
+    header_fill = PatternFill(start_color="CC0000", end_color="CC0000", fill_type="solid")
+    header_alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
+
+    headers = [
+        "Repository",
+        "File Path",
+        "Character Count",
+        f"Limit ({CONFIG['max_chars']} chars)",
+        "Excess Characters",
+        "GitHub Copilot Rule",
+    ]
+
+    for col_idx, header in enumerate(headers, start=1):
+        cell = ws.cell(row=1, column=col_idx, value=header)
+        cell.font = header_font
+        cell.fill = header_fill
+        cell.alignment = header_alignment
+
+    # Data rows
+    rule_reference = (
+        "Repository Custom Instructions must not exceed 4000 characters. "
+        "See: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/"
+        "response-customization?tool=webui#about-repository-custom-instructions"
+    )
+
+    for row_idx, violation in enumerate(violations, start=2):
+        ws.cell(row=row_idx, column=1, value=violation['repository'])
+        ws.cell(row=row_idx, column=2, value=violation['file_path'])
+        ws.cell(row=row_idx, column=3, value=violation['char_count'])
+        ws.cell(row=row_idx, column=4, value=violation['limit'])
+        ws.cell(row=row_idx, column=5, value=violation['excess_chars'])
+        ws.cell(row=row_idx, column=6, value=rule_reference)
+
+    # Auto-fit column widths
+    column_widths = [40, 50, 18, 18, 18, 80]
+    for col_idx, width in enumerate(column_widths, start=1):
+        ws.column_dimensions[openpyxl.utils.get_column_letter(col_idx)].width = width
+
+    ws.freeze_panes = "A2"
+
+    wb.save(filepath)
+    log(f"✅ Excel violations report created: {filepath}")
+    log(f"   Total violations: {len(violations)}")
+    return str(filepath)
+
+
+def _export_violations_to_csv(violations):
+    """Fallback: export violations to CSV when openpyxl is not available"""
+    import csv as csv_module
+
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"{CONFIG['excel_prefix']}_{timestamp}.csv"
+    filepath = Path(CONFIG['output_dir']) / filename
+    filepath.parent.mkdir(parents=True, exist_ok=True)
+
+    fieldnames = ['repository', 'file_path', 'char_count', 'limit', 'excess_chars']
+    with open(filepath, 'w', newline='', encoding='utf-8') as f:
+        writer = csv_module.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(violations)
+
+    log(f"✅ CSV violations report created: {filepath}")
+    log(f"   Total violations: {len(violations)}")
+    return str(filepath)
+
+
+def print_summary(results):
+    """Print a summary of the assessment"""
+    total_repos = len(results)
+    repos_with_files = sum(1 for r in results if r['has_custom_instructions'])
+    repos_with_violations = sum(1 for r in results if r['violations'])
+    repos_with_errors = sum(1 for r in results if r['error'])
+    total_files = sum(len(r['files_found']) for r in results)
+    total_violations = sum(len(r['violations']) for r in results)
+
+    log("\n" + "=" * 80)
+    log("SUMMARY - CUSTOM INSTRUCTIONS VALIDATION")
+    log("=" * 80)
+    log(f"Total repositories scanned:            {total_repos}")
+    log(f"Repositories with custom instructions: {repos_with_files}")
+    log(f"Total custom instruction files found:  {total_files}")
+    log(f"Repositories with violations:          {repos_with_violations}")
+    log(f"Total files exceeding {CONFIG['max_chars']} chars:       {total_violations}")
+    log(f"Repositories with errors:              {repos_with_errors}")
+
+    if repos_with_violations > 0:
+        log(f"\n❌ VIOLATIONS (files exceeding {CONFIG['max_chars']} characters):")
+        for result in results:
+            for v in result['violations']:
+                log(f"   • {v['repository']} | {v['file_path']} | {v['char_count']} chars (+{v['excess_chars']} over limit)")
+        log(f"\n📖 GitHub Copilot Rule: Repository Custom Instructions must not exceed {CONFIG['max_chars']} characters.")
+        log("   Reference: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/response-customization?tool=webui#about-repository-custom-instructions")
+
+
+# ============================================================================
+# VALIDATION FUNCTIONS
+# ============================================================================
+
+def validate_existence(results):
+    """
+    Validate that at least one custom instructions file exists across all repositories.
+    Returns True if at least one file is found, False otherwise.
+    """
+    total_files = sum(len(r['files_found']) for r in results)
+    if total_files == 0:
+        log("\n❌ VALIDATION FAILED: No Repository Custom Instructions files found.")
+        log("   GitHub Copilot supports Repository Custom Instructions configured via:")
+        log("     - .github/copilot-instructions.md")
+        log("     - .github/instructions/*.instructions.md")
+        log("   Reference: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/response-customization?tool=webui#about-repository-custom-instructions")
+        return False
+    log(f"\n✅ VALIDATION PASSED: Found {total_files} custom instruction file(s).")
+    return True
+
+
+def validate_file_sizes(results):
+    """
+    Validate that all custom instruction files are within the 4000-character limit.
+    Returns list of violations (empty list means all files are valid).
+    """
+    all_violations = []
+    for result in results:
+        all_violations.extend(result['violations'])
+
+    if all_violations:
+        log(f"\n❌ VALIDATION FAILED: {len(all_violations)} file(s) exceed the {CONFIG['max_chars']}-character limit.")
+        log(f"   GitHub Copilot Rule: Repository Custom Instructions must not exceed {CONFIG['max_chars']} characters.")
+        log("   Reference: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/response-customization?tool=webui#about-repository-custom-instructions")
+    else:
+        log(f"\n✅ VALIDATION PASSED: All custom instruction files are within the {CONFIG['max_chars']}-character limit.")
+
+    return all_violations
+
+
+# ============================================================================
+# MAIN FUNCTION
+# ============================================================================
+
+def main():
+    """Main execution function"""
+    start_time = time()
+
+    log("=" * 80)
+    log("GITHUB COPILOT CUSTOM INSTRUCTIONS VALIDATION TOOL")
+    log("=" * 80)
+    log(f"Validation Rules:")
+    log(f"  • Existence: At least one custom instruction file must be present")
+    log(f"  • Size Limit: Each file must not exceed {CONFIG['max_chars']} characters")
+    log(f"  • Reference: https://docs.github.com/en/enterprise-cloud@latest/copilot/concepts/prompting/response-customization")
+
+    if not check_gh_installed():
+        log("\n❌ GitHub CLI (gh) is not installed or not in PATH!")
+        log("\nTo install:")
+        log("  Windows: winget install --id GitHub.cli")
+        log("  macOS:   brew install gh")
+        log("  Linux:   See https://cli.github.com/")
+        log("\nAfter installation, authenticate with: gh auth login")
+        return 1
+
+    # Fetch repositories
+    fetch_start = time()
+    repos = fetch_repositories()
+    fetch_time = time() - fetch_start
+
+    if not repos:
+        log("\n❌ Could not fetch repositories. Make sure you're authenticated:")
+        log("  gh auth login")
+        return 1
+
+    log(f"✓ Found {len(repos)} repositories in {fetch_time:.2f}s")
+
+    # Check all repositories
+    check_start = time()
+    results = check_all_repositories(repos)
+    check_time = time() - check_start
+
+    # Print summary
+    print_summary(results)
+
+    log(f"\n⚡ PERFORMANCE METRICS:")
+    log(f"   Repository fetch: {fetch_time:.2f}s")
+    log(f"   Validation check: {check_time:.2f}s")
+    log(f"   Total execution:  {(time() - start_time):.2f}s")
+
+    # --- Validation 1: Existence ---
+    existence_ok = validate_existence(results)
+    if not existence_ok:
+        log("\n" + "=" * 80)
+        log("❌ Validation failed: no custom instruction files found.")
+        log("=" * 80)
+        return 1
+
+    # --- Validation 2: File size ---
+    violations = validate_file_sizes(results)
+    if violations:
+        export_violations_to_excel(violations)
+        log("\n" + "=" * 80)
+        log("❌ Validation failed: one or more custom instruction files exceed the 4000-character limit.")
+        log("=" * 80)
+        return 1
+
+    log("\n" + "=" * 80)
+    log("✅ All custom instruction validations passed!")
+    log("=" * 80)
+    return 0
+
+
+if __name__ == "__main__":
+    try:
+        sys.exit(main())
+    except KeyboardInterrupt:
+        log("\n\n⚠️  Operation cancelled by user")
+        sys.exit(130)
+    except Exception as e:
+        log(f"\n❌ Unexpected error: {e}")
+        sys.exit(1)

From 73fb49af8e13b111718bff6b569053b0d73c235b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 12 May 2026 14:03:13 +0000
Subject: [PATCH 3/3] Fix docstring: update Python requirement to 3.8+ and
 clarify openpyxl is optional

---
 validate_custom_instructions.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/validate_custom_instructions.py b/validate_custom_instructions.py
index 0ae8388..0c9b483 100644
--- a/validate_custom_instructions.py
+++ b/validate_custom_instructions.py
@@ -9,8 +9,9 @@
 
 Requirements:
     - GitHub CLI (gh) installed and authenticated
-    - Python 3.7+
-    - openpyxl (for Excel report generation: pip install openpyxl)
+    - Python 3.8+
+    - openpyxl (optional, for Excel report generation: pip install openpyxl)
+      Falls back to CSV output if not installed.
 
 Usage:
     python validate_custom_instructions.py