diff --git a/eng/ci/public-build.yml b/eng/ci/public-build.yml index d48a4638..431d1a1c 100644 --- a/eng/ci/public-build.yml +++ b/eng/ci/public-build.yml @@ -86,6 +86,15 @@ extends: parameters: PROJECT_DIRECTORY: 'workers' PoolName: 1es-pool-azfunc-public + + - stage: RunWorkerProfiling + dependsOn: CheckPythonWorkerDependencies + jobs: + - template: /eng/templates/jobs/ci-profiling.yml@self + parameters: + PROJECT_DIRECTORY: 'workers' + PoolName: 1es-pool-azfunc-public + - stage: RunWorkerEmulatorTests dependsOn: CheckPythonWorkerDependencies jobs: diff --git a/eng/scripts/download-baseline-profile.sh b/eng/scripts/download-baseline-profile.sh new file mode 100644 index 00000000..00b4e988 --- /dev/null +++ b/eng/scripts/download-baseline-profile.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# Download baseline performance profile from dev branch builds +# This script is used in CI to fetch baseline profiles for comparison + +set -e + +ORGANIZATION="${1:-}" +PROJECT="${2:-}" +PIPELINE_ID="${3:-}" +ARTIFACT_NAME="${4:-}" +OUTPUT_DIR="${5:-baseline_results}" +ACCESS_TOKEN="${SYSTEM_ACCESSTOKEN:-}" + +if [ -z "$ORGANIZATION" ] || [ -z "$PROJECT" ] || [ -z "$PIPELINE_ID" ] || [ -z "$ARTIFACT_NAME" ]; then + echo "Usage: $0 [output_dir]" + echo "Example: $0 myorg myproject 123 performance-profile-python-3.13" + exit 1 +fi + +echo "Searching for baseline artifacts from dev branch..." +echo "Organization: $ORGANIZATION" +echo "Project: $PROJECT" +echo "Pipeline ID: $PIPELINE_ID" +echo "Artifact: $ARTIFACT_NAME" + +mkdir -p "$OUTPUT_DIR" + +# Azure DevOps REST API endpoint +API_VERSION="7.1-preview.7" +BUILDS_API="https://dev.azure.com/${ORGANIZATION}/${PROJECT}/_apis/build/builds" + +# Get the latest successful build from dev branch +echo "Fetching latest successful build from dev branch..." + +BUILD_RESPONSE=$(curl -s -u ":${ACCESS_TOKEN}" \ + "${BUILDS_API}?definitions=${PIPELINE_ID}&branchName=refs/heads/dev&statusFilter=completed&resultFilter=succeeded&\$top=1&api-version=${API_VERSION}") + +BUILD_ID=$(echo "$BUILD_RESPONSE" | grep -o '"id":[0-9]*' | head -1 | grep -o '[0-9]*') + +if [ -z "$BUILD_ID" ]; then + echo "⚠ No successful builds found on dev branch" + exit 1 +fi + +echo "✓ Found build ID: $BUILD_ID" + +# Get artifacts for this build +echo "Fetching artifacts from build $BUILD_ID..." + +ARTIFACTS_API="${BUILDS_API}/${BUILD_ID}/artifacts" +ARTIFACTS_RESPONSE=$(curl -s -u ":${ACCESS_TOKEN}" \ + "${ARTIFACTS_API}?api-version=${API_VERSION}") + +# Check if artifact exists +if echo "$ARTIFACTS_RESPONSE" | grep -q "\"name\":\"${ARTIFACT_NAME}\""; then + echo "✓ Found artifact: $ARTIFACT_NAME" + + # Get download URL + DOWNLOAD_URL=$(echo "$ARTIFACTS_RESPONSE" | grep -A 10 "\"name\":\"${ARTIFACT_NAME}\"" | grep -o '"downloadUrl":"[^"]*"' | cut -d'"' -f4) + + if [ -n "$DOWNLOAD_URL" ]; then + echo "Downloading artifact..." + + # Download and extract artifact + curl -s -u ":${ACCESS_TOKEN}" -o "${OUTPUT_DIR}/artifact.zip" "$DOWNLOAD_URL" + + if [ -f "${OUTPUT_DIR}/artifact.zip" ]; then + unzip -q -o "${OUTPUT_DIR}/artifact.zip" -d "$OUTPUT_DIR" + rm -f "${OUTPUT_DIR}/artifact.zip" + + echo "✓ Baseline artifact downloaded to $OUTPUT_DIR" + + # List downloaded files + echo "Downloaded files:" + ls -lh "$OUTPUT_DIR" + + exit 0 + else + echo "✗ Failed to download artifact" + exit 1 + fi + else + echo "✗ Could not find download URL for artifact" + exit 1 + fi +else + echo "⚠ Artifact not found: $ARTIFACT_NAME" + echo "This might be the first profiling run on dev branch" + exit 1 +fi diff --git a/eng/templates/jobs/ci-profiling.yml b/eng/templates/jobs/ci-profiling.yml new file mode 100644 index 00000000..56f7c36e --- /dev/null +++ b/eng/templates/jobs/ci-profiling.yml @@ -0,0 +1,192 @@ +parameters: + PROJECT_DIRECTORY: 'workers' + +jobs: + - job: "ProfilingPython" + displayName: "Performance Profiling - Proxy Worker" + + pool: + name: ${{ parameters.PoolName }} + image: 1es-ubuntu-22.04 + os: linux + + strategy: + matrix: + Python313: + PYTHON_VERSION: '3.13' + Python314: + PYTHON_VERSION: '3.14' + + steps: + - bash: | + echo "Disk space before cleanup:" + df -h + + # Clean up Docker resources + docker system prune -af --volumes || true + + # Clean pip cache + pip cache purge || true + + # Clean apt cache + sudo apt-get clean || true + sudo rm -rf /var/lib/apt/lists/* || true + + echo "Disk space after cleanup:" + df -h + displayName: 'Free disk space' + + - task: UsePythonVersion@0 + inputs: + versionSpec: $(PYTHON_VERSION) + + - task: UseDotNet@2 + displayName: 'Install .NET 8' + inputs: + version: 8.0.x + + - bash: | + chmod +x eng/scripts/install-dependencies.sh + chmod +x eng/scripts/test-setup.sh + + eng/scripts/install-dependencies.sh $(PYTHON_VERSION) ${{ parameters.PROJECT_DIRECTORY }} + eng/scripts/test-setup.sh + displayName: 'Install dependencies' + condition: and(eq(variables.isSdkRelease, false), eq(variables.isExtensionsRelease, false), eq(variables['USETESTPYTHONSDK'], false), eq(variables['USETESTPYTHONEXTENSIONS'], false)) + + - bash: | + echo "Running performance profiling for proxy_worker..." + + # Create profile output directory + mkdir -p profile_results + + # Run tests with profiling enabled + python -m pytest -v --profile-dispatcher \ + --profile-output-dir=profile_results \ + --profile-show-report \ + tests/unittest_proxy/test_dispatcher.py + + echo "Performance profiling completed" + + # Display profile summary + if [ -f profile_results/dispatcher_session_profile.txt ]; then + echo "=== Performance Profile Summary ===" + head -n 50 profile_results/dispatcher_session_profile.txt + fi + displayName: "Run Performance Profiling" + env: + PYTHON_VERSION: $(PYTHON_VERSION) + workingDirectory: $(Build.SourcesDirectory)/${{ parameters.PROJECT_DIRECTORY }} + + - bash: | + if [ -f profile_results/dispatcher_session_profile.json ]; then + echo "Checking for baseline profile to compare against..." + + # Check if this is a PR build + if [ -n "$(System.PullRequest.PullRequestId)" ]; then + echo "==========================================" + echo "PR Build Detected - Running Comparison" + echo "PR #$(System.PullRequest.PullRequestId)" + echo "==========================================" + + # Download baseline from dev branch + chmod +x eng/scripts/download-baseline-profile.sh + + if eng/scripts/download-baseline-profile.sh \ + "$(System.TeamFoundationCollectionUri)" \ + "$(System.TeamProject)" \ + "$(System.DefinitionId)" \ + "performance-profile-python-$(PYTHON_VERSION)" \ + "baseline_results"; then + + echo "" + echo "✓ Baseline downloaded successfully" + + # Find the baseline JSON file (might be in subdirectory) + BASELINE_FILE=$(find baseline_results -name "dispatcher_session_profile.json" | head -1) + + if [ -n "$BASELINE_FILE" ]; then + echo "Running performance comparison..." + + python tests/unittest_proxy/compare_profiles.py \ + "$BASELINE_FILE" \ + profile_results/dispatcher_session_profile.json \ + --output profile_results/comparison_report.txt \ + --show-all + + echo "" + echo "╔════════════════════════════════════════════════════════════╗" + echo "║ PERFORMANCE COMPARISON REPORT (vs dev branch) ║" + echo "╚════════════════════════════════════════════════════════════╝" + cat profile_results/comparison_report.txt + echo "" + + # Check for regressions and create warnings + REGRESSION_COUNT=$(grep -c "REGRESSION" profile_results/comparison_report.txt || echo "0") + + if [ "$REGRESSION_COUNT" -gt 0 ]; then + echo "##vso[task.logissue type=warning]⚠️ $REGRESSION_COUNT performance regression(s) detected!" + echo "##vso[task.logissue type=warning]Review the comparison report in the artifacts for details." + else + echo "✓ No performance regressions detected" + fi + + # Upload comparison to build summary + echo "##vso[task.uploadsummary]$(Build.SourcesDirectory)/${{ parameters.PROJECT_DIRECTORY }}/profile_results/comparison_report.txt" + else + echo "⚠ Could not find baseline JSON file in downloaded artifact" + fi + else + echo "" + echo "⚠ Could not download baseline - comparison skipped" + echo "This might be the first profiling run, or the baseline is unavailable" + fi + else + echo "==========================================" + echo "Main/Dev Branch Build" + echo "==========================================" + echo "This profile will serve as baseline for future PR comparisons" + echo "Artifact: performance-profile-python-$(PYTHON_VERSION)" + fi + else + echo "Skipping baseline comparison (no profile results)" + fi + displayName: "Compare Performance Against Baseline" + condition: succeededOrFailed() + env: + PYTHON_VERSION: $(PYTHON_VERSION) + SYSTEM_ACCESSTOKEN: $(System.AccessToken) + workingDirectory: $(Build.SourcesDirectory)/${{ parameters.PROJECT_DIRECTORY }} + + - bash: | + if [ -d profile_results ]; then + echo "Uploading performance profile artifact..." + echo "##vso[artifact.upload artifactname=performance-profile-python-$(PYTHON_VERSION)]$(Build.SourcesDirectory)/${{ parameters.PROJECT_DIRECTORY }}/profile_results" + echo "Performance profile artifact uploaded successfully" + else + echo "Skipping artifact upload (no profile results)" + fi + displayName: "Upload Performance Profile Artifact" + condition: succeededOrFailed() + env: + PYTHON_VERSION: $(PYTHON_VERSION) + workingDirectory: $(Build.SourcesDirectory)/${{ parameters.PROJECT_DIRECTORY }} + + - bash: | + echo "Cleaning up after profiling..." + # Remove build artifacts and caches + rm -rf $(Build.SourcesDirectory)/${{ parameters.PROJECT_DIRECTORY }}/build || true + rm -rf $(Build.SourcesDirectory)/${{ parameters.PROJECT_DIRECTORY }}/.pytest_cache || true + rm -rf $(Build.SourcesDirectory)/${{ parameters.PROJECT_DIRECTORY }}/profile_results || true + rm -rf $(Build.SourcesDirectory)/${{ parameters.PROJECT_DIRECTORY }}/baseline_results || true + + # Clean Docker resources + docker system prune -f || true + + # Clean pip cache + pip cache purge || true + + echo "Disk space after cleanup:" + df -h + displayName: 'Cleanup after profiling' + condition: always() diff --git a/workers/tests/unittest_proxy/compare_profiles.py b/workers/tests/unittest_proxy/compare_profiles.py new file mode 100644 index 00000000..b14128f8 --- /dev/null +++ b/workers/tests/unittest_proxy/compare_profiles.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python +""" +Compare performance profiles and detect regressions. + +This script compares two profiling results and identifies performance +changes between them. + +Usage: + python compare_profiles.py baseline.json current.json [OPTIONS] + +Examples: + # Basic comparison + python compare_profiles.py baseline/profile.json current/profile.json + + # Show only regressions + python compare_profiles.py baseline/profile.json current/profile.json \ + --regressions-only + + # Set custom threshold + python compare_profiles.py baseline/profile.json current/profile.json --threshold 5 +""" + +import argparse +import json +import sys +from pathlib import Path +from typing import Dict, List, Tuple + + +def load_profile(file_path: str) -> Dict: + """Load a profile JSON file.""" + with open(file_path, 'r') as f: + return json.load(f) + + +def analyze_difference(baseline: Dict, current: Dict, method: str) -> Dict: + """ + Analyze the performance difference for a specific method. + + Returns: + Dictionary with comparison metrics + """ + baseline_stats = baseline.get(method, {}) + current_stats = current.get(method, {}) + + if not baseline_stats: + return { + 'status': 'NEW', + 'baseline_time': 0, + 'current_time': current_stats.get('total_time', 0), + 'diff': current_stats.get('total_time', 0), + 'diff_pct': float('inf'), + } + + if not current_stats: + return { + 'status': 'REMOVED', + 'baseline_time': baseline_stats.get('total_time', 0), + 'current_time': 0, + 'diff': -baseline_stats.get('total_time', 0), + 'diff_pct': -100, + } + + baseline_time = baseline_stats.get('total_time', 0) + current_time = current_stats.get('total_time', 0) + diff = current_time - baseline_time + diff_pct = (diff / baseline_time * 100) if baseline_time > 0 else 0 + + # Determine status + if abs(diff_pct) < 5: + status = 'UNCHANGED' + elif diff_pct > 0: + status = 'REGRESSION' if diff_pct > 10 else 'SLOWER' + else: + status = 'IMPROVEMENT' if diff_pct < -10 else 'FASTER' + + return { + 'status': status, + 'baseline_time': baseline_time, + 'current_time': current_time, + 'diff': diff, + 'diff_pct': diff_pct, + 'baseline_calls': baseline_stats.get('call_count', 0), + 'current_calls': current_stats.get('call_count', 0), + 'baseline_mean': baseline_stats.get('mean_time', 0), + 'current_mean': current_stats.get('mean_time', 0), + } + + +def format_time(seconds: float) -> str: + """Format time in appropriate unit.""" + if seconds < 0.001: + return f"{seconds * 1_000_000:.2f}µs" + elif seconds < 1: + return f"{seconds * 1_000:.2f}ms" + else: + return f"{seconds:.3f}s" + + +def format_comparison_report( + baseline: Dict, + current: Dict, + threshold: float = 10.0, + show_all: bool = False, + regressions_only: bool = False +) -> Tuple[str, int]: + """ + Format a detailed comparison report. + + Returns: + Tuple of (report string, regression count) + """ + lines = [ + "=" * 100, + "Performance Profile Comparison Report", + "=" * 100, + "" + ] + + # Analyze all methods + all_methods = set(baseline.keys()) | set(current.keys()) + analyses = {} + + for method in all_methods: + analyses[method] = analyze_difference(baseline, current, method) + + # Count by status + status_counts = { + 'REGRESSION': 0, + 'SLOWER': 0, + 'UNCHANGED': 0, + 'FASTER': 0, + 'IMPROVEMENT': 0, + 'NEW': 0, + 'REMOVED': 0, + } + + for analysis in analyses.values(): + status_counts[analysis['status']] += 1 + + # Summary + lines.append("Summary:") + lines.append(f" Total methods: {len(all_methods)}") + lines.append(f" Regressions (>10%): {status_counts['REGRESSION']}") + lines.append(f" Slower (5-10%): {status_counts['SLOWER']}") + lines.append(f" Unchanged (<5%): {status_counts['UNCHANGED']}") + lines.append(f" Faster (5-10%): {status_counts['FASTER']}") + lines.append(f" Improvements (>10%): {status_counts['IMPROVEMENT']}") + lines.append(f" New methods: {status_counts['NEW']}") + lines.append(f" Removed methods: {status_counts['REMOVED']}") + lines.append("") + + # Detail sections + def add_section(title: str, filter_statuses: List[str]): + methods_in_section = [ + (method, analysis) + for method, analysis in analyses.items() + if analysis['status'] in filter_statuses + ] + + if not methods_in_section: + return + + # Sort by absolute difference + methods_in_section.sort( + key=lambda x: abs(x[1]['diff']), + reverse=True + ) + + lines.append("-" * 100) + lines.append(f"{title} ({len(methods_in_section)} methods)") + lines.append("-" * 100) + + for method, analysis in methods_in_section: + method_short = method.split('.')[-1] + status_symbol = { + 'REGRESSION': '🔴', + 'SLOWER': '🟡', + 'UNCHANGED': '⚪', + 'FASTER': '🟢', + 'IMPROVEMENT': '🟢', + 'NEW': '🆕', + 'REMOVED': '🗑️', + }.get(analysis['status'], '❓') + + lines.append(f"\n{status_symbol} {method_short}") + + if analysis['status'] == 'NEW': + lines.append(" Status: Newly added") + lines.append(f" Time: {format_time(analysis['current_time'])}") + elif analysis['status'] == 'REMOVED': + lines.append(" Status: Removed") + lines.append(f" Time: {format_time(analysis['baseline_time'])}") + else: + lines.append( + f" Baseline: {format_time(analysis['baseline_time'])} " + f"({analysis['baseline_calls']} calls, " + f"{format_time(analysis['baseline_mean'])} avg)") + lines.append( + f" Current: {format_time(analysis['current_time'])} " + f"({analysis['current_calls']} calls, " + f"{format_time(analysis['current_mean'])} avg)") + lines.append( + f" Change: {analysis['diff']:+.6f}s " + f"({analysis['diff_pct']:+.2f}%)") + + lines.append("") + + # Add sections based on filters + if regressions_only: + add_section("REGRESSIONS", ['REGRESSION', 'SLOWER']) + elif show_all: + add_section("REGRESSIONS", ['REGRESSION']) + add_section("SLOWER", ['SLOWER']) + add_section("IMPROVEMENTS", ['IMPROVEMENT']) + add_section("FASTER", ['FASTER']) + add_section("NEW METHODS", ['NEW']) + add_section("REMOVED METHODS", ['REMOVED']) + else: + # Default: show significant changes only + add_section("REGRESSIONS", ['REGRESSION']) + add_section("IMPROVEMENTS", ['IMPROVEMENT']) + add_section("NEW METHODS", ['NEW']) + + lines.append("=" * 100) + + return "\n".join(lines), status_counts['REGRESSION'] + + +def main(): + parser = argparse.ArgumentParser( + description='Compare performance profiles and detect regressions' + ) + parser.add_argument( + 'baseline', + help='Path to baseline profile JSON file' + ) + parser.add_argument( + 'current', + help='Path to current profile JSON file' + ) + parser.add_argument( + '--threshold', + type=float, + default=10.0, + help='Percentage threshold for marking regressions (default: 10.0)' + ) + parser.add_argument( + '--show-all', + action='store_true', + help='Show all methods, not just significant changes' + ) + parser.add_argument( + '--regressions-only', + action='store_true', + help='Show only regressions' + ) + parser.add_argument( + '--output', + help='Save report to file instead of printing' + ) + parser.add_argument( + '--fail-on-regression', + action='store_true', + help='Exit with non-zero status if regressions detected' + ) + + args = parser.parse_args() + + # Validate files exist + baseline_path = Path(args.baseline) + current_path = Path(args.current) + + if not baseline_path.exists(): + print(f"❌ Baseline file not found: {baseline_path}", file=sys.stderr) + return 1 + + if not current_path.exists(): + print(f"❌ Current file not found: {current_path}", file=sys.stderr) + return 1 + + # Load profiles + try: + baseline = load_profile(str(baseline_path)) + current = load_profile(str(current_path)) + except json.JSONDecodeError as e: + print(f"❌ Error parsing JSON: {e}", file=sys.stderr) + return 1 + + # Generate report + report, regression_count = format_comparison_report( + baseline, + current, + threshold=args.threshold, + show_all=args.show_all, + regressions_only=args.regressions_only + ) + + # Output report + if args.output: + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, 'w') as f: + f.write(report) + print(f"✓ Report saved to {output_path}") + else: + print(report) + + # Check for regressions + if args.fail_on_regression and regression_count > 0: + print(f"\n❌ {regression_count} performance regression(s) detected!") + return 1 + + print("\n✅ Comparison complete") + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/workers/tests/unittest_proxy/conftest.py b/workers/tests/unittest_proxy/conftest.py new file mode 100644 index 00000000..86d5938b --- /dev/null +++ b/workers/tests/unittest_proxy/conftest.py @@ -0,0 +1,20 @@ +""" +Pytest configuration for proxy_worker unit tests. + +This module sets up profiling capabilities for dispatcher performance analysis. +""" + +# Import profiling plugin hooks (use absolute import for pytest compatibility) +from conftest_profiling import ( + pytest_addoption, + enable_profiling, + profile_test, + pytest_terminal_summary, +) + +__all__ = [ + 'pytest_addoption', + 'enable_profiling', + 'profile_test', + 'pytest_terminal_summary', +] diff --git a/workers/tests/unittest_proxy/conftest_profiling.py b/workers/tests/unittest_proxy/conftest_profiling.py new file mode 100644 index 00000000..2ceae13c --- /dev/null +++ b/workers/tests/unittest_proxy/conftest_profiling.py @@ -0,0 +1,140 @@ +""" +Pytest plugin for performance profiling of proxy_worker dispatcher. + +This plugin automatically enables profiling when running proxy_worker tests +and generates performance reports after test completion. +""" + +import os +import pytest + +# Import profiling utilities (use absolute import for pytest compatibility) +from profile_dispatcher import ( + patch_dispatcher_for_profiling, + get_profiling_stats, + save_profiling_results, + reset_profiling_data, + format_profiling_report, + compare_profile_results, +) + + +def pytest_addoption(parser): + """Add command-line options for profiling.""" + parser.addoption( + "--profile-dispatcher", + action="store_true", + default=False, + help="Enable performance profiling for dispatcher methods" + ) + parser.addoption( + "--profile-output-dir", + action="store", + default="profile_results", + help="Directory to save profiling results (default: profile_results)" + ) + parser.addoption( + "--profile-baseline", + action="store", + default=None, + help="Path to baseline profile JSON for comparison" + ) + parser.addoption( + "--profile-show-report", + action="store_true", + default=False, + help="Display profiling report in console after tests" + ) + + +@pytest.fixture(scope="session", autouse=True) +def enable_profiling(request): + """ + Session-scoped fixture that enables profiling for all tests when requested. + """ + if request.config.getoption("--profile-dispatcher"): + # Patch dispatcher methods for profiling + patch_dispatcher_for_profiling() + print("\n✓ Performance profiling enabled for proxy_worker dispatcher") + + yield + + # After all tests complete, save results + output_dir = request.config.getoption("--profile-output-dir") + save_profiling_results(output_dir, "dispatcher_session") + + # Show report if requested + if request.config.getoption("--profile-show-report"): + stats = get_profiling_stats() + print("\n" + format_profiling_report(stats)) + + # Compare with baseline if provided + baseline_path = request.config.getoption("--profile-baseline") + if baseline_path and os.path.exists(baseline_path): + current_path = os.path.join(output_dir, "dispatcher_session_profile.json") + if os.path.exists(current_path): + comparison = compare_profile_results(baseline_path, current_path) + print("\n" + comparison) + + # Save comparison report + comparison_path = os.path.join(output_dir, "profile_comparison.txt") + with open(comparison_path, 'w') as f: + f.write(comparison) + print(f"\n✓ Comparison report saved to {comparison_path}") + else: + yield + + +@pytest.fixture(scope="function") +def profile_test(request): + """ + Function-scoped fixture for profiling individual tests. + + Usage: + def test_something(profile_test): + # Your test code here + pass + """ + if request.config.getoption("--profile-dispatcher"): + # Reset profiling data for this specific test + reset_profiling_data() + + yield + + # Save results for this specific test + output_dir = request.config.getoption("--profile-output-dir") + test_name = request.node.name + save_profiling_results(output_dir, f"test_{test_name}") + else: + yield + + +def pytest_terminal_summary(terminalreporter, exitstatus, config): + """ + Add profiling summary to pytest output. + """ + if config.getoption("--profile-dispatcher"): + terminalreporter.section("Performance Profiling Summary") + + output_dir = config.getoption("--profile-output-dir") + terminalreporter.write_line( + f"Profiling results saved to: {os.path.abspath(output_dir)}" + ) + + # Show key statistics + stats = get_profiling_stats() + if stats: + terminalreporter.write_line("\nTop 5 methods by total execution time:") + sorted_methods = sorted( + stats.items(), + key=lambda x: x[1]['total_time'], + reverse=True + )[:5] + + for i, (method, method_stats) in enumerate(sorted_methods, 1): + method_short = method.split('.')[-1] + terminalreporter.write_line( + f" {i}. {method_short}: " + f"{method_stats['total_time']:.6f}s " + f"({method_stats['call_count']} calls)" + ) diff --git a/workers/tests/unittest_proxy/generate_baseline.py b/workers/tests/unittest_proxy/generate_baseline.py new file mode 100644 index 00000000..7c847a30 --- /dev/null +++ b/workers/tests/unittest_proxy/generate_baseline.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python +""" +Generate performance baseline for proxy_worker dispatcher. + +This script runs the profiling tests and saves results as a baseline +for future comparisons. + +Usage: + python generate_baseline.py [--output-dir DIR] [--test-pattern PATTERN] + +Examples: + # Generate baseline in default location + python generate_baseline.py + + # Generate baseline in custom directory + python generate_baseline.py --output-dir baselines/v1.0.0 + + # Profile specific tests + python generate_baseline.py --test-pattern test_worker_init +""" + +import argparse +import subprocess +import sys +from pathlib import Path +from datetime import datetime + + +def main(): + parser = argparse.ArgumentParser( + description='Generate performance profiling baseline for proxy_worker' + ) + parser.add_argument( + '--output-dir', + default='profile_baselines', + help='Directory to save baseline results (default: profile_baselines)' + ) + parser.add_argument( + '--test-pattern', + default='test_dispatcher', + help='Test pattern to run (default: test_dispatcher)' + ) + parser.add_argument( + '--iterations', + type=int, + default=3, + help='Number of test iterations to run (default: 3)' + ) + parser.add_argument( + '--verbose', + action='store_true', + help='Show detailed output' + ) + + args = parser.parse_args() + + # Create timestamped output directory + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + output_dir = Path(args.output_dir) / timestamp + output_dir.mkdir(parents=True, exist_ok=True) + + print("Generating performance baseline...") + print(f"Output directory: {output_dir.absolute()}") + print(f"Test pattern: {args.test_pattern}") + print(f"Iterations: {args.iterations}") + print() + + # Build pytest command + pytest_args = [ + sys.executable, '-m', 'pytest', + '--profile-dispatcher', + f'--profile-output-dir={output_dir}', + '-v' if args.verbose else '-q', + f'-k={args.test_pattern}', + f'--count={args.iterations}', # pytest-repeat plugin + 'tests/unittest_proxy/' + ] + + if not args.verbose: + pytest_args.append('--tb=short') + + # Run tests + print("Running profiling tests...") + result = subprocess.run(pytest_args, cwd=Path(__file__).parent.parent.parent) + + if result.returncode != 0: + print(f"\n❌ Tests failed with exit code {result.returncode}") + return result.returncode + + # Check if results were generated + json_file = output_dir / 'dispatcher_session_profile.json' + txt_file = output_dir / 'dispatcher_session_profile.txt' + + if json_file.exists() and txt_file.exists(): + print("\n✅ Baseline generated successfully!") + print("\nResults saved to:") + print(f" - {json_file}") + print(f" - {txt_file}") + + # Show summary + print("\n" + "=" * 80) + with open(txt_file, 'r') as f: + # Show first 40 lines of the report + lines = f.readlines()[:40] + print(''.join(lines)) + print("=" * 80) + + # Create a 'latest' symlink + latest_link = Path(args.output_dir) / 'latest' + if latest_link.exists(): + latest_link.unlink() + try: + latest_link.symlink_to(output_dir.name, target_is_directory=True) + print(f"\n✓ Latest baseline link created: {latest_link}") + except OSError: + # Symlinks might not work on Windows without admin rights + print(f"\n⚠ Could not create symlink (use {output_dir} directly)") + + return 0 + else: + print(f"\n❌ Profile results not found in {output_dir}") + print("This may indicate profiling was not enabled or tests didn't run.") + return 1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/workers/tests/unittest_proxy/profile_dispatcher.py b/workers/tests/unittest_proxy/profile_dispatcher.py new file mode 100644 index 00000000..3e07001f --- /dev/null +++ b/workers/tests/unittest_proxy/profile_dispatcher.py @@ -0,0 +1,378 @@ +""" +Performance profiling for proxy_worker dispatcher methods. + +This module provides profiling capabilities for all _handle_* methods in dispatcher.py +and tracks execution time for inner method calls to identify performance bottlenecks. +""" + +import asyncio +import cProfile +import functools +import io +import json +import pstats +import time +from collections import defaultdict +from contextlib import contextmanager +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional + +# Performance tracking storage +_method_timings: Dict[str, List[float]] = defaultdict(list) +_call_counts: Dict[str, int] = defaultdict(int) +_nested_timings: Dict[str, Dict[str, List[float]]] = defaultdict( + lambda: defaultdict(list)) + + +@contextmanager +def profile_method(method_name: str, parent_method: Optional[str] = None): + """ + Context manager to profile a method's execution time. + + Args: + method_name: Name of the method being profiled + parent_method: Name of the parent method (for nested calls) + """ + start_time = time.perf_counter() + try: + yield + finally: + elapsed_time = time.perf_counter() - start_time + _method_timings[method_name].append(elapsed_time) + _call_counts[method_name] += 1 + + if parent_method: + _nested_timings[parent_method][method_name].append(elapsed_time) + + +def profile_function(func: Callable) -> Callable: + """ + Decorator to profile function execution time. + + Works with both sync and async functions. + """ + @functools.wraps(func) + async def async_wrapper(*args, **kwargs): + method_name = f"{func.__module__}.{func.__qualname__}" + with profile_method(method_name): + return await func(*args, **kwargs) + + @functools.wraps(func) + def sync_wrapper(*args, **kwargs): + method_name = f"{func.__module__}.{func.__qualname__}" + with profile_method(method_name): + return func(*args, **kwargs) + + if asyncio.iscoroutinefunction(func): + return async_wrapper + return sync_wrapper + + +def profile_nested_calls(parent_method: str): + """ + Decorator to profile nested method calls within a parent method. + + Args: + parent_method: Name of the parent method for tracking nested calls + """ + def decorator(func: Callable) -> Callable: + @functools.wraps(func) + async def async_wrapper(*args, **kwargs): + method_name = f"{func.__module__}.{func.__qualname__}" + with profile_method(method_name, parent_method): + return await func(*args, **kwargs) + + @functools.wraps(func) + def sync_wrapper(*args, **kwargs): + method_name = f"{func.__module__}.{func.__qualname__}" + with profile_method(method_name, parent_method): + return func(*args, **kwargs) + + if asyncio.iscoroutinefunction(func): + return async_wrapper + return sync_wrapper + + return decorator + + +def patch_dispatcher_for_profiling(): + """ + Monkey-patch the dispatcher module to add profiling to all _handle_* + methods and their nested calls. + """ + from proxy_worker import dispatcher + + # Find all _handle_* methods + handle_methods = [ + attr for attr in dir(dispatcher.Dispatcher) + if attr.startswith('_handle_') and callable( + getattr(dispatcher.Dispatcher, attr)) + ] + + # Patch each _handle_* method + for method_name in handle_methods: + original_method = getattr(dispatcher.Dispatcher, method_name) + + @functools.wraps(original_method) + async def profiled_handle_method( + self, request, + _orig=original_method, _name=method_name): + full_method_name = f"dispatcher.Dispatcher.{_name}" + with profile_method(full_method_name): + return await _orig(self, request) + + setattr(dispatcher.Dispatcher, method_name, profiled_handle_method) + + # Patch reload_library_worker + original_reload = dispatcher.Dispatcher.reload_library_worker + + @staticmethod + @functools.wraps(original_reload) + def profiled_reload_library_worker(directory: str): + method_name = "dispatcher.Dispatcher.reload_library_worker" + with profile_method(method_name): + return original_reload(directory) + + dispatcher.Dispatcher.reload_library_worker = \ + profiled_reload_library_worker + + # Patch DependencyManager methods if available + try: + from proxy_worker.utils.dependency import DependencyManager + + for method_name in ['prioritize_customer_dependencies', + 'should_load_cx_dependencies', + 'is_in_linux_consumption']: + if hasattr(DependencyManager, method_name): + original_method = getattr(DependencyManager, method_name) + + @functools.wraps(original_method) + def profiled_method( + *args, _orig=original_method, + _name=method_name, **kwargs): + full_name = f"dependency.DependencyManager.{_name}" + with profile_method(full_name): + return _orig(*args, **kwargs) + + setattr(DependencyManager, method_name, + staticmethod(profiled_method)) + except ImportError: + pass + + +def get_profiling_stats() -> Dict[str, Any]: + """ + Get comprehensive profiling statistics. + + Returns: + Dictionary containing timing statistics for all profiled methods + """ + stats = {} + + for method_name, timings in _method_timings.items(): + if timings: + stats[method_name] = { + 'call_count': len(timings), + 'total_time': sum(timings), + 'mean_time': sum(timings) / len(timings), + 'min_time': min(timings), + 'max_time': max(timings), + 'timings': timings, + } + + # Add nested call statistics if available + if method_name in _nested_timings: + nested_stats = {} + for nested_method, nested_times in \ + _nested_timings[method_name].items(): + nested_stats[nested_method] = { + 'call_count': len(nested_times), + 'total_time': sum(nested_times), + 'mean_time': sum(nested_times) / len(nested_times), + } + stats[method_name]['nested_calls'] = nested_stats + + return stats + + +def format_profiling_report( + stats: Dict[str, Any], include_details: bool = True) -> str: + """ + Format profiling statistics into a readable report. + + Args: + stats: Profiling statistics from get_profiling_stats() + include_details: Whether to include detailed timing information + + Returns: + Formatted string report + """ + lines = [ + "=" * 80, + "Performance Profiling Report - Proxy Worker Dispatcher", + "=" * 80, + "" + ] + + # Sort methods by total time (descending) + sorted_methods = sorted( + stats.items(), + key=lambda x: x[1]['total_time'], + reverse=True + ) + + for method_name, method_stats in sorted_methods: + lines.append(f"\n{method_name}") + lines.append("-" * len(method_name)) + lines.append(f" Call count: {method_stats['call_count']}") + lines.append(f" Total time: {method_stats['total_time']:.6f}s") + lines.append(f" Mean time: {method_stats['mean_time']:.6f}s") + lines.append(f" Min time: {method_stats['min_time']:.6f}s") + lines.append(f" Max time: {method_stats['max_time']:.6f}s") + + # Show nested calls if available + if 'nested_calls' in method_stats and method_stats['nested_calls']: + lines.append("\n Nested method calls:") + for nested_method, nested_stats in \ + method_stats['nested_calls'].items(): + lines.append(f" - {nested_method}") + lines.append( + f" Calls: {nested_stats['call_count']}, " + f"Total: {nested_stats['total_time']:.6f}s, " + f"Mean: {nested_stats['mean_time']:.6f}s") + + lines.append("\n" + "=" * 80) + return "\n".join(lines) + + +def save_profiling_results(output_dir: str, test_name: str = "proxy_worker"): + """ + Save profiling results to JSON and text files. + + Args: + output_dir: Directory to save results + test_name: Name prefix for output files + """ + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + stats = get_profiling_stats() + + # Save JSON results + json_path = output_path / f"{test_name}_profile.json" + with open(json_path, 'w') as f: + json.dump(stats, f, indent=2) + + # Save text report + report_path = output_path / f"{test_name}_profile.txt" + with open(report_path, 'w') as f: + f.write(format_profiling_report(stats)) + + print("\nProfiling results saved to:") + print(f" - {json_path}") + print(f" - {report_path}") + + +def run_cprofile_on_tests(test_module: str, output_dir: str): + """ + Run cProfile on test module and save detailed profile data. + + Args: + test_module: Module name to profile + output_dir: Directory to save profile results + """ + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + profiler = cProfile.Profile() + profiler.enable() + + # Import and run the test module + try: + import pytest + pytest.main(['-v', '-k', 'proxy']) + finally: + profiler.disable() + + # Save binary profile data + profile_data_path = output_path / 'cprofile_data.prof' + profiler.dump_stats(str(profile_data_path)) + + # Generate text report + s = io.StringIO() + ps = pstats.Stats(profiler, stream=s) + ps.strip_dirs() + ps.sort_stats('cumulative') + ps.print_stats(50) # Top 50 functions + + report_path = output_path / 'cprofile_report.txt' + with open(report_path, 'w') as f: + f.write(s.getvalue()) + + print("\ncProfile results saved to:") + print(f" - {profile_data_path}") + print(f" - {report_path}") + + +def reset_profiling_data(): + """Clear all profiling data.""" + _method_timings.clear() + _call_counts.clear() + _nested_timings.clear() + + +def compare_profile_results(baseline_file: str, current_file: str) -> str: + """ + Compare two profiling result files and generate a diff report. + + Args: + baseline_file: Path to baseline JSON profile + current_file: Path to current JSON profile + + Returns: + Formatted comparison report + """ + with open(baseline_file, 'r') as f: + baseline = json.load(f) + + with open(current_file, 'r') as f: + current = json.load(f) + + lines = [ + "=" * 80, + "Performance Profile Comparison", + "=" * 80, + f"\nBaseline: {baseline_file}", + f"Current: {current_file}", + "\n" + ] + + # Compare each method + all_methods = set(baseline.keys()) | set(current.keys()) + + for method in sorted(all_methods): + baseline_stats = baseline.get(method, {}) + current_stats = current.get(method, {}) + + if not baseline_stats: + lines.append(f"\n[NEW] {method}") + lines.append(f" Total time: {current_stats.get('total_time', 0):.6f}s") + continue + + if not current_stats: + lines.append(f"\n[REMOVED] {method}") + continue + + baseline_time = baseline_stats.get('total_time', 0) + current_time = current_stats.get('total_time', 0) + diff = current_time - baseline_time + diff_pct = (diff / baseline_time * 100) if baseline_time > 0 else 0 + + status = "⚠" if abs(diff_pct) > 10 else "✓" + lines.append(f"\n{status} {method}") + lines.append(f" Baseline: {baseline_time:.6f}s") + lines.append(f" Current: {current_time:.6f}s") + lines.append(f" Diff: {diff:+.6f}s ({diff_pct:+.2f}%)") + + lines.append("\n" + "=" * 80) + return "\n".join(lines) diff --git a/workers/tests/unittest_proxy/test_profiling.py b/workers/tests/unittest_proxy/test_profiling.py new file mode 100644 index 00000000..5b116a86 --- /dev/null +++ b/workers/tests/unittest_proxy/test_profiling.py @@ -0,0 +1,197 @@ +""" +Test the performance profiling functionality. + +These tests verify that the profiling system correctly tracks method execution times. +""" + +import pytest +import time + +from profile_dispatcher import ( + profile_function, + profile_method, + get_profiling_stats, + reset_profiling_data, + patch_dispatcher_for_profiling, +) + + +@pytest.fixture(autouse=True) +def reset_profiling(): + """Reset profiling data before each test.""" + reset_profiling_data() + yield + reset_profiling_data() + + +def test_profile_function_decorator_sync(): + """Test that profile_function decorator works with sync functions.""" + @profile_function + def slow_function(): + time.sleep(0.01) + return "done" + + # Call the function + result = slow_function() + + # Check result + assert result == "done" + + # Check profiling stats + stats = get_profiling_stats() + assert len(stats) > 0 + + # Find the profiled method + method_name = f"{slow_function.__module__}.{slow_function.__qualname__}" + assert method_name in stats + assert stats[method_name]['call_count'] == 1 + assert stats[method_name]['total_time'] >= 0.01 + + +def test_profile_function_decorator_async(): + """Test that profile_function decorator works with async functions.""" + import asyncio + + @profile_function + async def slow_async_function(): + await asyncio.sleep(0.01) + return "done" + + # Call the async function + result = asyncio.run(slow_async_function()) + + # Check result + assert result == "done" + + # Check profiling stats + stats = get_profiling_stats() + assert len(stats) > 0 + + +def test_profile_method_context_manager(): + """Test the profile_method context manager.""" + with profile_method("test_method"): + time.sleep(0.01) + + stats = get_profiling_stats() + assert "test_method" in stats + assert stats["test_method"]['call_count'] == 1 + assert stats["test_method"]['total_time'] >= 0.01 + + +def test_nested_profiling(): + """Test that nested method calls are tracked.""" + with profile_method("parent_method"): + time.sleep(0.01) + + with profile_method("child_method", parent_method="parent_method"): + time.sleep(0.01) + + stats = get_profiling_stats() + + # Check parent method + assert "parent_method" in stats + assert stats["parent_method"]['total_time'] >= 0.02 + + # Check child method + assert "child_method" in stats + assert stats["child_method"]['total_time'] >= 0.01 + + +def test_multiple_calls(): + """Test that multiple calls accumulate statistics.""" + @profile_function + def fast_function(): + return "fast" + + # Call multiple times + for _ in range(5): + fast_function() + + stats = get_profiling_stats() + method_name = f"{fast_function.__module__}.{fast_function.__qualname__}" + + assert stats[method_name]['call_count'] == 5 + assert len(stats[method_name]['timings']) == 5 + + +def test_stats_calculation(): + """Test that statistics are correctly calculated.""" + with profile_method("test_stats"): + time.sleep(0.01) + + with profile_method("test_stats"): + time.sleep(0.02) + + with profile_method("test_stats"): + time.sleep(0.01) + + stats = get_profiling_stats() + method_stats = stats["test_stats"] + + assert method_stats['call_count'] == 3 + assert method_stats['min_time'] >= 0.01 + assert method_stats['max_time'] >= 0.02 + assert 0.01 <= method_stats['mean_time'] <= 0.02 + + +def test_patch_dispatcher(): + """Test that dispatcher patching doesn't break imports.""" + # This should not raise any errors + try: + patch_dispatcher_for_profiling() + + # Try importing after patching + from proxy_worker.dispatcher import Dispatcher + + # Check that Dispatcher class still exists + assert Dispatcher is not None + + # Check that handle methods still exist + assert hasattr(Dispatcher, '_handle__worker_init_request') + + except Exception as e: + pytest.fail(f"Patching failed with error: {e}") + + +def test_reset_profiling_data(): + """Test that reset clears all profiling data.""" + # Add some profiling data + with profile_method("method1"): + pass + + with profile_method("method2"): + pass + + stats_before = get_profiling_stats() + assert len(stats_before) == 2 + + # Reset + reset_profiling_data() + + stats_after = get_profiling_stats() + assert len(stats_after) == 0 + + +def test_profiling_with_exception(): + """Test that profiling still captures time when exceptions occur.""" + @profile_function + def failing_function(): + time.sleep(0.01) + raise ValueError("Test error") + + # Call and expect exception + with pytest.raises(ValueError): + failing_function() + + # Profiling should still have captured the time + stats = get_profiling_stats() + method_name = f"{failing_function.__module__}.{failing_function.__qualname__}" + + assert method_name in stats + assert stats[method_name]['call_count'] == 1 + assert stats[method_name]['total_time'] >= 0.01 + + +if __name__ == '__main__': + pytest.main([__file__, '-v'])