diff --git a/.github/workflows/kilo-dispatch.yml b/.github/workflows/kilo-dispatch.yml index fa3e8c4..6c0e6c8 100644 --- a/.github/workflows/kilo-dispatch.yml +++ b/.github/workflows/kilo-dispatch.yml @@ -4,14 +4,50 @@ on: workflow_dispatch: inputs: prompt: - description: Prompt passed to Kilo CLI - required: true + description: Fallback prompt when command is not provided + required: false + default: "" + type: string + command: + description: Kilo command to run (without leading slash), e.g. local-review + required: false + default: "" + type: string + command_args: + description: Optional arguments passed to command mode + required: false + default: "" type: string timeout_minutes: description: Timeout in minutes for job and Kilo run required: false default: "30" type: string + model: + description: Gateway model ID for kilocode provider + required: false + default: "openai/gpt-5.3-codex" + type: string + pr_number: + description: Pull request number to post result to + required: false + default: "" + type: string + review_comment_id: + description: Optional PR review comment ID to reply to + required: false + default: "" + type: string + parent_comment_id: + description: Optional parent comment ID (preferred over review_comment_id) + required: false + default: "" + type: string + +permissions: + contents: read + issues: write + pull-requests: write jobs: run-kilo: @@ -25,7 +61,7 @@ jobs: - name: Setup Node.js uses: actions/setup-node@v4 with: - node-version: 20 + node-version: 24 - name: Install Kilo CLI run: npm install -g @kilocode/cli @@ -36,22 +72,132 @@ jobs: echo "KILO_API_TOKEN secret is missing. Add it in repository settings before running this workflow." exit 1 fi + if [ -z "${{ secrets.NIKOLAY_REVIEWER_APP_ID }}" ]; then + echo "NIKOLAY_REVIEWER_APP_ID secret is missing. Add it in repository settings before running this workflow." + exit 1 + fi + if [ -z "${{ secrets.NIKOLAY_REVIEWER_PRIVATE_KEY }}" ]; then + echo "NIKOLAY_REVIEWER_PRIVATE_KEY secret is missing. Add it in repository settings before running this workflow." + exit 1 + fi + + - name: Create GitHub App token + id: app-token + uses: actions/create-github-app-token@v1 + with: + app-id: ${{ secrets.NIKOLAY_REVIEWER_APP_ID }} + private-key: ${{ secrets.NIKOLAY_REVIEWER_PRIVATE_KEY }} + owner: ${{ github.repository_owner }} + repositories: ${{ github.event.repository.name }} - name: Run Kilo CLI env: KILO_API_TOKEN: ${{ secrets.KILO_API_TOKEN }} KILO_API_KEY: ${{ secrets.KILO_API_TOKEN }} + KILO_PROVIDER: kilocode + KILOCODE_MODEL: ${{ inputs.model }} run: | set -euo pipefail : > kilo-run.log + : > kilo-events.log + : > kilo-output.log kilo --version 2>&1 | tee -a kilo-run.log + + has_timeout=false if kilo run --help 2>&1 | grep -q -- '--timeout'; then - kilo run --auto "${{ inputs.prompt }}" --timeout "${{ inputs.timeout_minutes }}m" 2>&1 | tee -a kilo-run.log + has_timeout=true + fi + + kilo_args=(kilo run --auto --format json) + + if [ -n "${{ inputs.command }}" ]; then + kilo_args+=(--command "${{ inputs.command }}") + if [ -n "${{ inputs.command_args }}" ]; then + kilo_args+=("${{ inputs.command_args }}") + fi + elif [ -n "${{ inputs.prompt }}" ]; then + kilo_args+=("${{ inputs.prompt }}") + else + echo "Either 'command' or 'prompt' must be provided." | tee -a kilo-run.log + exit 1 + fi + + if [ "$has_timeout" = true ]; then + "${kilo_args[@]}" --timeout "${{ inputs.timeout_minutes }}m" 2>&1 | tee -a kilo-events.log | tee -a kilo-run.log else - echo "kilo run does not support --timeout; relying on job timeout-minutes." | tee -a kilo-run.log - kilo run --auto "${{ inputs.prompt }}" 2>&1 | tee -a kilo-run.log + echo "kilo run does not support --timeout; relying on job timeout-minutes." >> kilo-run.log + "${kilo_args[@]}" 2>&1 | tee -a kilo-events.log | tee -a kilo-run.log fi + node -e "const fs=require('fs');const strip=(s)=>s.replace(/\x1B\[[0-9;]*[A-Za-z]/g,'');const lines=fs.readFileSync('kilo-events.log','utf8').split(/\r?\n/);const texts=[];for(const line of lines){if(!line.trim())continue;try{const evt=JSON.parse(line);if(evt&&evt.type==='text'&&evt.part&&typeof evt.part.text==='string'){const t=evt.part.text.trim();if(t)texts.push(t);}}catch{}}let out='';if(texts.length){out=texts[texts.length-1];}else{const fallback=strip(fs.readFileSync('kilo-events.log','utf8')).split(/\r?\n/).map(x=>x.trim()).filter(Boolean);out=fallback.length?fallback[fallback.length-1]:'';}fs.writeFileSync('kilo-output.log',out+(out.endsWith('\n')?'':'\n'));" + + - name: Post result as GitHub App comment + if: ${{ always() && inputs.pr_number != '' }} + uses: actions/github-script@v7 + with: + github-token: ${{ steps.app-token.outputs.token }} + script: | + const fs = require('fs'); + const prNumber = Number('${{ inputs.pr_number }}'); + const parentCommentIdRaw = ('${{ inputs.parent_comment_id }}' || '${{ inputs.review_comment_id }}').trim(); + const parentCommentId = parentCommentIdRaw ? Number(parentCommentIdRaw) : null; + let outputContent = ''; + try { + outputContent = fs.readFileSync('kilo-output.log', 'utf8'); + } catch { + outputContent = 'kilo-output.log not found.'; + } + + const body = outputContent.trim() || '(empty Kilo output)'; + + if (parentCommentId) { + try { + await github.rest.pulls.createReplyForReviewComment({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: prNumber, + comment_id: parentCommentId, + body + }); + return; + } catch (error) { + const status = error?.status; + if (status !== 404 && status !== 422) { + throw error; + } + + let prefix = ''; + try { + const parent = await github.rest.issues.getComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: parentCommentId + }); + const login = parent?.data?.user?.login; + if (login) { + prefix = `@${login} `; + } + } catch { + // Fallback to plain issue comment if parent lookup fails. + } + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: `${prefix}${body}` + }); + return; + } + } + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body + }); + - name: Upload Kilo run log if: always() uses: actions/upload-artifact@v4 diff --git a/.kilo/command/perf-review.md b/.kilo/command/perf-review.md new file mode 100644 index 0000000..3da280a --- /dev/null +++ b/.kilo/command/perf-review.md @@ -0,0 +1,199 @@ +--- +description: Benchmark-driven PR performance review versus target branch +--- + +# Perf Review Workflow + +You are performing a performance review for the current PR branch. + +Non-negotiable requirements: +1. Benchmark timing plus profiling data is the highest-priority judgment tool. +2. Compare source branch versus target branch and report relevant benchmark metric changes. +3. Provide analysis and a final verdict: does the PR improve performance or not. + +## Inputs + +- Optional argument `--target `: target branch override. +- Optional argument `--filter `: benchmark filter regex. + +If arguments are omitted: +- Default target branch to PR base branch from `gh pr view --json baseRefName` when available. +- Fall back target branch to `main`. +- Default filter to empty (run full selected benchmark suites). + +## Step 1 - Resolve Branches and Revisions + +1. Identify contender branch and hash: + - Contender branch: current checked-out branch (or `HEAD` if detached). + - Contender hash: `git rev-parse --short HEAD`. +2. Identify baseline branch: + - Use `--target` if provided. + - Else use PR base branch from GitHub CLI when available. + - Else use `main`. +3. Resolve baseline hash with `git rev-parse --short `. +4. Print branch and hash mapping before running benchmarks. + +## Step 2 - Select Relevant Benchmark Binaries + +Inspect changed files with: + +`git diff --name-only ...HEAD` + +Map file paths to benchmark binaries: + +| Changed path pattern | Benchmark binary | Coverage | +|---|---|---| +| `include/bit_vector*`, `include/interleaved*` | `benchmarks` | BitVector rank/select | +| `include/rmm*` | `bench_rmm` | RmM tree operations | +| `include/louds*` | `louds_tree_benchmarks` | LOUDS traversal | +| `include/simd*`, `include/aligned*` | `alignment_comparison` | SIMD and alignment | +| `include/misc/*` | all relevant | Differential helpers | +| `CMakeLists.txt`, benchmark infra, broad/unknown changes | all benchmarks | Conservative full run | + +Available benchmark binaries: +- `benchmarks` +- `bench_rmm` +- `bench_rmm_sdsl` +- `louds_tree_benchmarks` +- `alignment_comparison` + +If the mapping is ambiguous, run all benchmark binaries. +If `--filter` is provided, pass it through as `--benchmark_filter`. +Print selected binaries and why they were selected. + +## Step 3 - Build Both Revisions (Timing and Profiling Builds) + +Use isolated build directories per short hash. + +1. Capture original ref (`git rev-parse --abbrev-ref HEAD` or detached `HEAD`). +2. If worktree is dirty, stash safely with untracked files: + - `git stash push -u -m "perf-review-auto-stash"` +3. Build baseline revision: + - `git checkout ` + - Timing build (required): + - `cmake -B build/benchmarks-all_bench_ -DCMAKE_BUILD_TYPE=Release -DPIXIE_BENCHMARKS=ON` + - `cmake --build build/benchmarks-all_bench_ --config Release -j` + - Profiling build (Linux only, recommended): + - `cmake -B build/benchmarks-diagnostic_bench_ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DPIXIE_BENCHMARKS=ON -DBENCHMARK_ENABLE_LIBPFM=ON -DPIXIE_DIAGNOSTICS=ON` + - `cmake --build build/benchmarks-diagnostic_bench_ --config RelWithDebInfo -j` +4. Build contender revision: + - `git checkout ` + - Repeat timing and profiling build with contender hash suffix. +5. Restore original ref and restore stashed state if a stash was created. + +Critical guardrails: +- Never use Debug binaries for timing review. +- Timing comparisons must use `benchmarks-all` Release builds. +- Profiling counters should use `benchmarks-diagnostic` RelWithDebInfo builds. + +## Step 4 - Resolve Binary Paths + +Support both generator layouts: + +- Multi-config: `build//Release/` or `build//RelWithDebInfo/` +- Single-config: `build//` + +For each needed binary, detect the existing executable path before running. +If a required binary is missing, report failure and stop with a blocked verdict. + +## Step 5 - Run Timing Comparison (Primary Judgment) + +Locate compare script from baseline timing build: + +`build/benchmarks-all_bench_/_deps/googlebenchmark-src/tools/compare.py` + +For each selected benchmark binary, run: + +`python3 benchmarks [--benchmark_filter=""]` + +Capture full output for each binary and keep it for report details. + +## Step 6 - Collect Hardware Counter Profiles (Linux Only) + +If Linux profiling build is available, run both baseline and contender diagnostic binaries with counter output: + +- `--benchmark_counters_tabular=true` +- `--benchmark_format=json` +- `--benchmark_out=` +- Include `--benchmark_filter` when provided. + +Collect and compare at least these counter families when present: +- `instructions`, `cycles` (for IPC) +- `cache-misses`, `cache-references` (cache miss rate) +- `branch-misses`, `branches` (branch mispredict rate) +- `L1-dcache-load-misses` (L1 data cache pressure) + +Compute derived metrics when denominators are non-zero: +- IPC = instructions / cycles +- Cache miss rate = cache-misses / cache-references +- Branch mispredict rate = branch-misses / branches + +If profiling is unavailable (non-Linux or libpfm not available), continue with timing-only review and explicitly mark profiling as unavailable in the report. + +## Step 7 - Analyze Timing and Counter Data + +Timing classification per benchmark entry: +- Improvement: time delta < -5% +- Regression: time delta > +5% +- Neutral: between -5% and +5% + +Aggregate per binary: +- Number of improvements/regressions/neutral +- Net average percentage change +- Largest regression and largest improvement + +Counter correlation: +- Use hardware counters to explain major timing changes. +- Flag anomalies (timing improves while key counters degrade, or opposite). + +Judgment priority: +- Base verdict primarily on benchmark timing comparison. +- Use counter data as explanatory evidence and confidence signal. + +## Step 8 - Produce Final Markdown Report + +Return a structured markdown report with this shape: + +```markdown +## Performance Review: vs + +### Configuration +- Baseline: () +- Contender: () +- Platform: +- Benchmarks run: +- Filter: +- Hardware counters: available / unavailable + +### Timing Summary +| Binary | Improvements | Regressions | Neutral | Net Change | +|---|---:|---:|---:|---:| +| ... | N | N | N | +/-X% | + +### Detailed Timing Results + + +### Hardware Counter Profile (if available) +| Benchmark | IPC (base->new) | Cache Miss Rate (base->new) | Branch Mispredict (base->new) | +|---|---:|---:|---:| +| ... | X.XX -> Y.YY | A.A% -> B.B% | C.C% -> D.D% | + +### Key Findings +- +- + +### Verdict +**[IMPROVES PERFORMANCE | REGRESSES PERFORMANCE | NO SIGNIFICANT CHANGE]** + +<1-2 sentence justification grounded in benchmark metrics, with profiling context if available> +``` + +Verdict rules: +- `IMPROVES PERFORMANCE`: improvements outnumber regressions, no severe regression (>10%), and net average change is favorable. +- `REGRESSES PERFORMANCE`: any severe regression (>10%) or regressions dominate with net unfavorable average. +- `NO SIGNIFICANT CHANGE`: mostly neutral changes or mixed results that approximately cancel out. + +## Failure Handling + +- If required builds fail or timing comparison cannot run, output a blocked review with exact failure points and no misleading verdict. +- If only profiling fails, continue with timing-based verdict and explicitly list profiling limitation. diff --git a/.kilo/command/ping.md b/.kilo/command/ping.md new file mode 100644 index 0000000..b5edaf7 --- /dev/null +++ b/.kilo/command/ping.md @@ -0,0 +1,7 @@ +--- +description: Test command that replies with pong +--- + +Respond with exactly `pong`. +Do not add any other words. +Do not add quotes or punctuation. diff --git a/.opencode/skills/benchmarks-compare-revisions/SKILL.md b/.kilo/skills/benchmarks-compare-revisions/SKILL.md similarity index 91% rename from .opencode/skills/benchmarks-compare-revisions/SKILL.md rename to .kilo/skills/benchmarks-compare-revisions/SKILL.md index 71bb92f..ae8d7e7 100644 --- a/.opencode/skills/benchmarks-compare-revisions/SKILL.md +++ b/.kilo/skills/benchmarks-compare-revisions/SKILL.md @@ -5,7 +5,7 @@ description: Compare benchmark performance between two git revisions by building # Benchmarks Compare Revisions Skill -Use this skill to compare performance between two git revisions. It focuses on the compare workflow and relies on the existing benchmarks skill for build/run details (see .opencode/skills/benchmarks/SKILL.md). +Use this skill to compare performance between two git revisions. It focuses on the compare workflow and relies on the existing benchmarks skill for build/run details (see .kilo/skills/benchmarks/SKILL.md). ## Goal @@ -29,12 +29,12 @@ Use the existing benchmarks skill build steps, but set the build suffix to inclu # Baseline BUILD_SUFFIX=bench_${BASELINE} git checkout ${BASELINE} -# Follow .opencode/skills/benchmarks/SKILL.md build instructions with this suffix +# Follow .kilo/skills/benchmarks/SKILL.md build instructions with this suffix # Contender BUILD_SUFFIX=bench_${CONTENDER} git checkout ${CONTENDER} -# Follow .opencode/skills/benchmarks/SKILL.md build instructions with this suffix +# Follow .kilo/skills/benchmarks/SKILL.md build instructions with this suffix ``` ## Step 2 — Compare using compare.py diff --git a/.opencode/skills/benchmarks/SKILL.md b/.kilo/skills/benchmarks/SKILL.md similarity index 100% rename from .opencode/skills/benchmarks/SKILL.md rename to .kilo/skills/benchmarks/SKILL.md diff --git a/.opencode/skills/cmake/SKILL.md b/.kilo/skills/cmake/SKILL.md similarity index 100% rename from .opencode/skills/cmake/SKILL.md rename to .kilo/skills/cmake/SKILL.md diff --git a/.opencode/skills/pdf/SKILL.md b/.kilo/skills/pdf/SKILL.md similarity index 100% rename from .opencode/skills/pdf/SKILL.md rename to .kilo/skills/pdf/SKILL.md diff --git a/AGENTS.md b/AGENTS.md index b7409fd..54ad750 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -8,7 +8,7 @@ Current implementations include BitVector, RmM Tree, and LOUDS Tree. Planned add ## Skills -./.opencode/skills/ contains several project-specific skill, use it when appropriate +./.kilo/skills/ contains several project-specific skills, use them when appropriate ## Architecture diff --git a/CMakePresets.json b/CMakePresets.json index f14f0c8..dabbdb0 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -24,7 +24,7 @@ }, { "name": "benchmarks-all", - "displayName": "All Benchmarks", + "displayName": "Benchmarks", "binaryDir": "${sourceDir}/build/release", "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", @@ -33,7 +33,7 @@ }, { "name": "benchmarks-diagnostic", - "displayName": "All Benchmarks RelWithDebInfo", + "displayName": "Benchmarks diagnostic build", "binaryDir": "${sourceDir}/build/release-with-deb", "cacheVariables": { "BENCHMARK_ENABLE_LIBPFM": "ON", @@ -85,7 +85,7 @@ }, { "name": "benchmarks-diagnostic", - "displayName": "Build ReleaseWithDebugInfo", + "displayName": "Benchmarks diagnostic", "configurePreset": "benchmarks-diagnostic" }, {