diff --git a/.codex/skills/fn-fp-root-cause-analysis/SKILL.md b/.codex/skills/fn-fp-root-cause-analysis/SKILL.md new file mode 100644 index 000000000..55d8fde52 --- /dev/null +++ b/.codex/skills/fn-fp-root-cause-analysis/SKILL.md @@ -0,0 +1,131 @@ +--- +name: fn-fp-root-cause-analysis +description: Use when runnable lift metrics already exist and you need structured false-negative or false-positive root-cause analysis from ground-truth, merged LL, and shard artifacts such as coverage CSVs, illegalEntry logs, stderr logs, or shard result manifests +--- + +# FN/FP Root Cause Analysis + +## Overview + +This skill is for explaining *why* runnable lift missed or over-lifted addresses. +Do not use it just to compute aggregate precision or recall. + +## When To Use + +- You already have ground-truth and lifted output. +- You need evidence-backed FN or FP categories instead of only `tp/fp/fn`. +- You have shard artifacts such as `shard_results.json`, `*.coverage.csv`, `*.illegalEntry.log`, `*.stderr.log`, `*.ll`, `*.li.csv`, or `*.need.csv`. + +Do not use this skill when the task is only “run evaluation and report metrics”. +For that, run the validator script first and stop there unless asked for causes. + +## Inputs + +- Ground truth: + - preferred: final-layout CSV like `ground_truth.csv` + - plus `function_symbols.csv` for function-to-shard attribution +- Lift output: + - merged LL such as `merged.ll` +- Optional shard evidence: + - `shard_results.json` + - shard `*.coverage.csv` + - shard `*.illegalEntry.log` + - shard `*.stderr.log` + - shard `*.ll` + +## Workflow + +1. Compute metrics and address sets: + +```bash +python3 runnable/scripts/validate_libcrypto_ground_truth.py \ + --ground-truth-csv \ + --ll \ + --function-symbols-csv \ + --csv-image-base \ + --rebase-base \ + --summary-out +``` + +2. Analyze FN and FP causes: + +```bash +python3 runnable/scripts/analyze_fn_fp_root_causes.py \ + --validation-summary \ + --ground-truth-csv \ + --function-symbols-csv \ + --merged-ll \ + --shard-results-json \ + --csv-image-base \ + --rebase-base \ + --summary-out +``` + +## Minimal Reproduction + +```bash +python3 runnable/scripts/validate_libcrypto_ground_truth.py \ + --ground-truth-csv test/fixtures/fn_fp_root_cause/ground_truth.csv \ + --ll test/fixtures/fn_fp_root_cause/merged.ll \ + --function-symbols-csv test/fixtures/fn_fp_root_cause/function_symbols.csv \ + --csv-image-base 0x1000 \ + --rebase-base 0x50000000 \ + --summary-out /tmp/fnfp.validation.json + +python3 runnable/scripts/analyze_fn_fp_root_causes.py \ + --validation-summary /tmp/fnfp.validation.json \ + --ground-truth-csv test/fixtures/fn_fp_root_cause/ground_truth.csv \ + --function-symbols-csv test/fixtures/fn_fp_root_cause/function_symbols.csv \ + --merged-ll test/fixtures/fn_fp_root_cause/merged.ll \ + --shard-results-json test/fixtures/fn_fp_root_cause/shard_results.json \ + --csv-image-base 0x1000 \ + --rebase-base 0x50000000 \ + --summary-out /tmp/fnfp.analysis.json +``` + +## Output Shape + +Expect JSON with: + +- `findings[]` +- each finding includes: + - `kind`: `fn` or `fp` + - `address` + - `reason` + - `priority` + - `symbol` and `range` when attributable + - `evidence_paths` + +## Supported Reason Categories + +- `illegal_entry_suppression` +- `shard_timeout` +- `shard_error` +- `shard_empty` +- `merge_missing` +- `continuation_byte` +- `ground_truth_gap` +- `padding` +- `outside_gt_coverage` +- `extra_lifted_bytes` + +## Interpretation Rules + +- Prefer shard-state explanations for FN when shard evidence exists. +- Prefer address-shape explanations for FP: + - continuation bytes + - GT gap near nearby instruction starts + - padding or data-section spill + - outside coverage + - extra lifted bytes inside covered space + +## Reporting Standard + +When summarizing results for reviewers, include: + +- representative addresses +- category counts +- exact evidence paths +- first-priority investigation directions + +Do not collapse everything back into only aggregate metrics. diff --git a/.codex/skills/runnable-libcrypto-canonical-eval/SKILL.md b/.codex/skills/runnable-libcrypto-canonical-eval/SKILL.md new file mode 100644 index 000000000..ea5fb1c49 --- /dev/null +++ b/.codex/skills/runnable-libcrypto-canonical-eval/SKILL.md @@ -0,0 +1,80 @@ +--- +name: runnable-libcrypto-canonical-eval +description: Run or audit the canonical libcrypto evaluation contract for SYM-20 using the repo-local ground-truth bundle reachable from this workspace, fresh compare_runnable_text-based compares, and explicit address mapping. Use when a task mentions SYM-20, canonical libcrypto eval, gtBlock.pb, or when historical CSV/coverage-sidecar results must be distinguished from the authoritative compare path. +--- + +# Runnable Libcrypto Canonical Eval + +Use this skill when the task is specifically about the authoritative libcrypto benchmark contract, not just generic Runnable compare metrics. + +## Canonical Contract + +For `SYM-20`, the authoritative contract in this workspace is: + +1. Canonical binary: + - `python3 runnable/scripts/libcrypto_bench_paths.py binary --must-exist` +2. Canonical ground truth protobuf: + - `python3 runnable/scripts/libcrypto_bench_paths.py groundtruth-pb --must-exist` +3. Protobuf loader: + - `python3 runnable/scripts/libcrypto_bench_paths.py blocks-pb2 --must-exist` +4. Compare tool: + - `python3 runnable/scripts/libcrypto_bench_paths.py cmp-tool --must-exist` +5. Address mapping: + - derive `.text` start from the ELF + - quick check: `python3 runnable/scripts/libcrypto_bench_paths.py text-start` + - keep Runnable rebase at `0x50000000` unless the user explicitly provides another contract +6. Compare path: + - `python3 runnable/scripts/validate_libcrypto_ground_truth.py cmp --ll /abs/path/to/file.ll --out-dir /abs/path/to/out` + +The wrapper records fresh `HIT`, `MISMATCH`, `OBJ_ONLY`, `LL_ONLY`, `FALSE_NEGATIVE`, `FALSE_POSITIVE`, `precision`, and `recall`. + +## Quick Start + +Gap-audit the canonical GT bundle: + +```bash +python3 runnable/scripts/validate_libcrypto_ground_truth.py gap-audit \ + --out-dir runs/groundtruth_validation/canonical_gap_audit +``` + +Compare a lift under the canonical contract: + +```bash +python3 runnable/scripts/validate_libcrypto_ground_truth.py cmp \ + --ll /abs/path/to/libcrypto.ll \ + --out-dir runs/groundtruth_validation/canonical_cmp +``` + +## Historical / Non-Canonical Paths + +Do **not** report the following as the canonical `SYM-20` result without an explicit label: + +- `coverage_sidecar_union_threadpool16` +- CSV-only GT flows such as `ground_truth.csv` +- `rebase_base=0x50400000` +- old libcrypto binaries whose SHA differs from the repo-local canonical bundle + +When historical artifacts are involved, report them as: + +- `historical sidecar-union / non-canonical` +- `old-binary compare / non-canonical` +- `canonical gtBlock.pb compare / authoritative` + +## Required Reporting + +Always include: + +- absolute binary path +- absolute GT path +- absolute `.ll` path +- `.text` start +- Runnable base +- `HIT`, `MISMATCH`, `OBJ_ONLY`, `LL_ONLY` +- `FALSE_NEGATIVE`, `FALSE_POSITIVE` +- `precision`, `recall` +- whether the result is `canonical` or `non-canonical` + +## Repo Notes + +- The canonical binary / protobuf are auto-discovered from this workspace's `GroudTruth` checkout, not from a checked-in `archives/` directory under `Runnable-Rewriting`. +- `runnable/scripts/validate_libcrypto_ground_truth.py` still supports the legacy CSV validation mode used by existing fn/fp root-cause tests; use `csv-validate` only for that older flow. diff --git a/.codex/skills/runnable-parallel-lift/SKILL.md b/.codex/skills/runnable-parallel-lift/SKILL.md new file mode 100644 index 000000000..2f319fa5f --- /dev/null +++ b/.codex/skills/runnable-parallel-lift/SKILL.md @@ -0,0 +1,109 @@ +--- +name: runnable-parallel-lift +description: Use when working on runnable parallel lift workflows and deciding whether a task should use the current dynamic branch-driven runnable-lift path or the legacy offline static sharding path. +--- + +# Runnable Parallel Lift + +## Overview + +There are two different parallel lift models in this repository. The default current architecture is the online dynamic branch-driven mode inside `runnable-lift`. The older offline static sharding flow exists only for legacy address-ranged experiments and must not be treated as the current default. + +## Default Workflow: Dynamic Branch-Driven `runnable-lift` + +Use this when the task is about the current parallel lift architecture. + +Entry command: + +```bash +runnable-lift \ + -dynamic-parallel \ + -parallel-workers= \ + -parallel-fragment-dir= \ + [other normal runnable-lift flags] +``` + +Current user-facing flags: + +- `-dynamic-parallel` +- `-parallel-workers=` +- `-parallel-fragment-dir=` + +Internal flags that agents should not pass manually: + +- `-parallel-worker-mode` +- `-parallel-seed-pc` + +Dynamic artifacts: + +- coordinator output: `` +- worker fragments: `/worker_.ll` +- worker logs: + - `/worker_.ll.stdout.log` + - `/worker_.ll.stderr.log` +- temporary merge output before rename: `.merged.ll` + +Dynamic merge behavior: + +- `runnable/tools/runnable-lift/CodeGenerator.cpp` spawns branch workers from the coordinator. +- Successful worker fragments are merged back into the top-level module with `runnable/scripts/merge_dynamic_runnable_fragments.py`. +- The final merged module replaces the coordinator output path. Dynamic mode does not produce `merged_full.ll`, `shard_results.json`, `raw/`, or `shards/`. + +Recommended validation: + +```bash +rg -n "dynamic-parallel|parallel-workers|parallel-fragment-dir" \ + runnable/tools/runnable-lift/Main.cpp \ + runnable/tools/runnable-lift/CodeGenerator.cpp + +python3 -m unittest discover -s test -p 'test_merge_dynamic_runnable_fragments.py' -v +``` + +## Legacy Workflow: Offline Static Sharding + +This is the old address-ranged family. Historical wrappers for it were `scripts/libcrypto_parallel_lift.py` together with `run_libcrypto_parallel_lift_stable.py`. + +In this repository snapshot, the surviving legacy helper and CLI specification for that workflow is `runnable/scripts/_merge_dynamic_fragments_lib.py`. That path expects `runnable-lift` builds with `-addr-range-min` and `-addr-range-max` support and describes the offline per-function/static-shard artifact model instead of online branch spawning. + +Legacy outputs are different from the dynamic mode: + +- `shards/` +- `raw/` +- `eval/` +- `logs/` +- `shard_results.json` +- `shard_results.jsonl` +- `status.json` +- `merged.ll` +- `merged_full.ll` +- `final_report.json` +- `final_report.md` + +Use the legacy flow only when the task explicitly calls for static address-ranged lifting, libcrypto/OpenSSL-style offline evaluation, or comparing against historical shard reports. + +Recommended validation: + +```bash +python3 runnable/scripts/_merge_dynamic_fragments_lib.py --help +git show origin/wip/runnable-20260409-121909:test/lift_openssl_parallel.py | sed -n '1,120p' +``` + +## Guardrails + +- Treat dynamic branch-driven `runnable-lift` as the default explanation unless the task explicitly asks for the old static sharding flow. +- Do not mix dynamic worker fragments (`worker_.ll`) with legacy `shards/` or `raw/` outputs in the same interpretation or report. +- Do not describe `scripts/libcrypto_parallel_lift.py` or `run_libcrypto_parallel_lift_stable.py` as the current architecture. +- Do not expect `merged_full.ll`, `parallel.eval.json`, or shard manifests from dynamic mode. +- Do not feed dynamic fragments into the legacy offline evaluation pipeline. Dynamic fragments are only for `merge_dynamic_runnable_fragments.py`. +- Do not pass `-parallel-worker-mode` or `-parallel-seed-pc` by hand. Those are worker-internal flags. +- If a task depends on `-addr-range-min` or `-addr-range-max`, call it legacy/static sharding explicitly and verify the target branch still exposes those flags before proceeding. + +## References + +- `README.md` dynamic parallel prototype section +- `runnable/tools/runnable-lift/Main.cpp` +- `runnable/tools/runnable-lift/CodeGenerator.cpp` +- `runnable/scripts/merge_dynamic_runnable_fragments.py` +- `runnable/scripts/_merge_dynamic_fragments_lib.py` +- `docs/superpowers/plans/2026-04-29-runnable-lift-dynamic-branch-parallel.md` +- `test/test_merge_dynamic_runnable_fragments.py` diff --git a/README.md b/README.md index b0d0115d1..6de442302 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,45 @@ $ runnable-lift hello hello.ll 2>hello.log $ runnable translate hello ``` +## Dynamic Parallel Lift (Prototype) + +The `codex/dynamic-parallel-lift` branch adds an experimental dynamic +branch-driven parallel mode to `runnable-lift`. + +User-facing flags: + +- `-dynamic-parallel`: enable dynamic branch-driven worker spawning +- `-parallel-workers=`: cap the number of worker subprocesses +- `-parallel-fragment-dir=`: directory for worker `.ll` fragments and logs + +Example: + +``` +$ mkdir -p /tmp/runnable-fragments +$ runnable-lift hello hello.ll \ + -dynamic-parallel \ + -parallel-workers=4 \ + -parallel-fragment-dir=/tmp/runnable-fragments \ + 2>hello.parallel.log +``` + +Artifacts: + +- coordinator output: `hello.ll` +- worker fragments: `/tmp/runnable-fragments/worker_.ll` +- worker stdout/stderr logs: + - `/tmp/runnable-fragments/worker_.ll.stdout.log` + - `/tmp/runnable-fragments/worker_.ll.stderr.log` + +Notes: + +- `-parallel-worker-mode` and `-parallel-seed-pc` are internal flags used by + worker subprocesses and should not be passed manually. +- Successful worker fragments are merged back into the final top-level `.ll` + output with the repository helper `runnable/scripts/merge_dynamic_runnable_fragments.py`. +- This branch is still experimental. If fragment merge fails, the coordinator + output and worker `.ll` fragments are still left on disk for manual inspection. + ## Experimental Evaluation diff --git a/docs/exp/2026-05-10-libcrypto-canonical-eval-contract.md b/docs/exp/2026-05-10-libcrypto-canonical-eval-contract.md new file mode 100644 index 000000000..b8ee677cd --- /dev/null +++ b/docs/exp/2026-05-10-libcrypto-canonical-eval-contract.md @@ -0,0 +1,72 @@ +# 2026-05-10 libcrypto canonical eval contract + +## Goal + +修复 `SYM-20` 里 `libcrypto` 的评估口径错配,把当前 workspace 可直接验证的 canonical contract、历史 non-canonical 路径、以及 repo-local compare tooling 拆开记录清楚。 + +## Canonical Inputs In This Workspace + +- Canonical binary: + - `../GroudTruth/groundtruth-gap-analysis-skill/results/libcrypto-artifacts/libcrypto.so.3` +- Canonical GT: + - `../GroudTruth/groundtruth-gap-analysis-skill/results/libcrypto-artifacts/libcrypto.gtBlock.pb` +- Canonical protobuf loader: + - `../GroudTruth/protobuf_def/blocks_pb2.py` +- Canonical compare wrapper: + - `runnable/scripts/run_cmp_eval.py` +- Canonical audit / compare entrypoint: + - `runnable/scripts/validate_libcrypto_ground_truth.py` + +These paths are discovered by `runnable/scripts/libcrypto_bench_paths.py`. + +## Important Correction + +The earlier `SYM-20` branch version assumed top-level `scripts/`, `tests/`, and `archives/...` paths inside `Runnable-Rewriting`. That does not match the current `codex/dynamic-parallel-lift` branch layout. + +For this branch, the correct repo-local structure is: + +- scripts under `runnable/scripts/` +- tests under `test/` +- canonical binary / gtBlock bundle discovered from the sibling `GroudTruth` checkout + +## Commands + +Resolve canonical assets: + +```bash +python3 runnable/scripts/libcrypto_bench_paths.py binary --must-exist +python3 runnable/scripts/libcrypto_bench_paths.py groundtruth-pb --must-exist +python3 runnable/scripts/libcrypto_bench_paths.py blocks-pb2 --must-exist +python3 runnable/scripts/libcrypto_bench_paths.py cmp-tool --must-exist +python3 runnable/scripts/libcrypto_bench_paths.py text-start +``` + +Gap-audit canonical GT coverage: + +```bash +python3 runnable/scripts/validate_libcrypto_ground_truth.py gap-audit \ + --out-dir runs/groundtruth_validation/canonical_gap_audit +``` + +Compare a lift under the canonical contract: + +```bash +python3 runnable/scripts/validate_libcrypto_ground_truth.py cmp \ + --ll /abs/path/to/libcrypto.ll \ + --out-dir runs/groundtruth_validation/canonical_cmp +``` + +## Historical vs Canonical + +`SYM-19` results using: + +- `coverage_sidecar_union_threadpool16` +- CSV GT bundles +- `rebase_base=0x50400000` +- the old `test/openssl_data/libcrypto.so.3` + +must be treated as `historical sidecar-union / non-canonical`, not directly compared against canonical `gtBlock.pb` metrics. + +## Current Caveat + +This branch now contains the canonical compare wrappers and path resolution logic, but it does **not** vendor the large libcrypto run artifacts themselves into the `Runnable-Rewriting` git repository. The workflow is repo-local in the sense that the scripts and skill are committed here; the binary / protobuf assets are discovered from the co-located `GroudTruth` checkout. diff --git a/docs/superpowers/plans/2026-04-29-runnable-lift-dynamic-branch-parallel.md b/docs/superpowers/plans/2026-04-29-runnable-lift-dynamic-branch-parallel.md new file mode 100644 index 000000000..ae7c04efd --- /dev/null +++ b/docs/superpowers/plans/2026-04-29-runnable-lift-dynamic-branch-parallel.md @@ -0,0 +1,93 @@ +# Runnable Lift Dynamic Branch Parallel Plan + +## Purpose + +This note is the repository-local reference for the current parallel lift architecture on `codex/dynamic-parallel-lift`. It exists so skills and future agents do not confuse the new online dynamic branch-driven flow with the older offline static sharding wrappers. + +## Current Default: Online Dynamic Branch-Driven Parallelism + +Primary entrypoint: + +```bash +runnable-lift \ + -dynamic-parallel \ + -parallel-workers= \ + -parallel-fragment-dir= +``` + +Implementation anchors: + +- CLI flags: `runnable/tools/runnable-lift/Main.cpp` +- worker spawning and merge hook: `runnable/tools/runnable-lift/CodeGenerator.cpp` +- merge helper: `runnable/scripts/merge_dynamic_runnable_fragments.py` +- merge behavior test: `test/test_merge_dynamic_runnable_fragments.py` + +Current behavior: + +- the coordinator runs in the main `runnable-lift` process +- fresh branch frontiers can fork worker subprocesses +- workers write `worker_.ll` fragments into `-parallel-fragment-dir` +- successful fragments are merged back into the coordinator output module +- the final merged top-level module is written back to the original `` path + +Dynamic artifacts: + +- `` +- `.merged.ll` as a temporary pre-rename merge target +- `/worker_.ll` +- `/worker_.ll.stdout.log` +- `/worker_.ll.stderr.log` + +Flags that are internal-only for worker subprocesses: + +- `-parallel-worker-mode` +- `-parallel-seed-pc` + +## Legacy Static Sharding Flow + +Legacy static sharding refers to the older address-ranged workflow historically driven by `scripts/libcrypto_parallel_lift.py` and `run_libcrypto_parallel_lift_stable.py`. + +The closest legacy helper and CLI specification that still exists in this repository is `runnable/scripts/_merge_dynamic_fragments_lib.py`. Its arguments and artifact model describe the old offline address-ranged orchestration family. That flow depends on `runnable-lift` binaries exposing `-addr-range-min` and `-addr-range-max`, as seen on `origin/wip/runnable-20260409-121909`. + +Legacy artifacts are output-tree oriented rather than fragment-dir oriented: + +- `shards/` +- `raw/` +- `eval/` +- `logs/` +- `shard_results.json` +- `shard_results.jsonl` +- `status.json` +- `merged.ll` +- `merged_full.ll` +- `final_report.json` +- `final_report.md` + +Use the legacy flow only for historical per-function/address-range experiments, not for explaining the current default architecture. + +## Validation + +Dynamic validation: + +```bash +rg -n "dynamic-parallel|parallel-workers|parallel-fragment-dir" \ + runnable/tools/runnable-lift/Main.cpp \ + runnable/tools/runnable-lift/CodeGenerator.cpp + +python3 -m unittest discover -s test -p 'test_merge_dynamic_runnable_fragments.py' -v +``` + +Legacy validation: + +```bash +python3 runnable/scripts/_merge_dynamic_fragments_lib.py --help +git grep -n "addr-range-min\\|addr-range-max" origin/wip/runnable-20260409-121909 -- runnable/tools/runnable-lift/JumpTargetManager.cpp test/lift_openssl_parallel.py +``` + +## Guardrails + +- Default to the dynamic branch-driven path when a task says "current parallel lift architecture". +- Call out legacy/static sharding explicitly whenever a workflow depends on `-addr-range-min` or static shard manifests. +- Do not mix dynamic `worker_.ll` fragments with legacy `shards/` and `raw/` outputs. +- Do not claim that dynamic mode emits `merged_full.ll` or shard manifests. +- Do not manually pass worker-internal flags when exercising the public dynamic entrypoint. diff --git a/qemu/linux-user/ptc.c b/qemu/linux-user/ptc.c index f1a0f1c7c..28a550fdf 100644 --- a/qemu/linux-user/ptc.c +++ b/qemu/linux-user/ptc.c @@ -309,6 +309,8 @@ int ptc_load(void *handle, PTCInterface *output, const char *ptc_filename, result.disassemble = &ptc_disassemble; result.do_syscall2 = &ptc_do_syscall2; result.storeCPUState = &ptc_storeCPUState; + result.dropCPUState = &ptc_dropCPUState; + result.queueDepth = &ptc_queueDepth; result.getBranchCPUeip = &ptc_getBranchCPUeip; result.deletCPULINEState = &ptc_deletCPULINEState; result.recoverStack = &ptc_recoverStack; @@ -1444,6 +1446,22 @@ uint32_t ptc_storeCPUState(void) { return 1; } +uint32_t ptc_dropCPUState(void){ + BranchState datatmp; + + if(isEmpty()) + return 0; + + datatmp = deletArchCPUStateQueueLine(); + free(datatmp.elf_data); + free(datatmp.elf_stack); + return 1; +} + +uint32_t ptc_queueDepth(void){ + return numsArchCPUStateQueueLine(); +} + void ptc_recoverStack(void){ CPUArchState *env = (CPUArchState *)cpu->env_ptr; memcpy((void *)env->regs[4],current_stack,elf_start_stack-(abi_ulong)env->regs[4]); diff --git a/qemu/linux-user/ptc.h b/qemu/linux-user/ptc.h index b1231f05e..9b4e850ff 100644 --- a/qemu/linux-user/ptc.h +++ b/qemu/linux-user/ptc.h @@ -257,6 +257,8 @@ EXPORTED(uint64_t, ptc_run_library, (size_t flag)); EXPORTED(void, ptc_data_start, (uint64_t start, uint64_t entry)); EXPORTED(unsigned long, ptc_do_syscall2, (void)); EXPORTED(uint32_t, ptc_storeCPUState, (void)); +EXPORTED(uint32_t, ptc_dropCPUState, (void)); +EXPORTED(uint32_t, ptc_queueDepth, (void)); EXPORTED(void, ptc_getBranchCPUeip,(void)); EXPORTED(uint32_t, ptc_deletCPULINEState,(void)); EXPORTED(void,ptc_recoverStack,(void)); @@ -290,6 +292,8 @@ typedef struct { ptc_do_syscall2_ptr_t do_syscall2; ptc_storeCPUState_ptr_t storeCPUState; + ptc_dropCPUState_ptr_t dropCPUState; + ptc_queueDepth_ptr_t queueDepth; ptc_getBranchCPUeip_ptr_t getBranchCPUeip; ptc_deletCPULINEState_ptr_t deletCPULINEState; ptc_recoverStack_ptr_t recoverStack; diff --git a/runnable/CMakeLists.txt b/runnable/CMakeLists.txt index d72f5fe9c..b43e55bcb 100644 --- a/runnable/CMakeLists.txt +++ b/runnable/CMakeLists.txt @@ -159,10 +159,30 @@ configure_file(include/runnable/Runtime/commonconstants.h "${CMAKE_BINARY_DIR}/c configure_file(runtime/early-linked.c "${CMAKE_BINARY_DIR}/early-linked.c" COPYONLY) configure_file(scripts/runnable "${CMAKE_BINARY_DIR}/runnable" COPYONLY) configure_file(scripts/runnable-merge-dynamic "${CMAKE_BINARY_DIR}/runnable-merge-dynamic" COPYONLY) +configure_file(scripts/merge_dynamic_runnable_fragments.py "${CMAKE_BINARY_DIR}/merge_dynamic_runnable_fragments.py" COPYONLY) +configure_file(scripts/_merge_dynamic_fragments_lib.py "${CMAKE_BINARY_DIR}/_merge_dynamic_fragments_lib.py" COPYONLY) +configure_file(scripts/_compare_runnable_text_lib.py "${CMAKE_BINARY_DIR}/_compare_runnable_text_lib.py" COPYONLY) +configure_file(scripts/libcrypto_bench_paths.py "${CMAKE_BINARY_DIR}/libcrypto_bench_paths.py" COPYONLY) +configure_file(scripts/run_cmp_eval.py "${CMAKE_BINARY_DIR}/run_cmp_eval.py" COPYONLY) +configure_file(scripts/validate_libcrypto_ground_truth.py "${CMAKE_BINARY_DIR}/validate_libcrypto_ground_truth.py" COPYONLY) +configure_file(scripts/analyze_fn_fp_root_causes.py "${CMAKE_BINARY_DIR}/analyze_fn_fp_root_causes.py" COPYONLY) +configure_file(scripts/_fn_fp_root_cause_lib.py "${CMAKE_BINARY_DIR}/_fn_fp_root_cause_lib.py" COPYONLY) install(PROGRAMS scripts/runnable scripts/runnable-merge-dynamic DESTINATION bin) +install(PROGRAMS + scripts/merge_dynamic_runnable_fragments.py + scripts/libcrypto_bench_paths.py + scripts/run_cmp_eval.py + scripts/validate_libcrypto_ground_truth.py + scripts/analyze_fn_fp_root_causes.py + DESTINATION share/runnable) install(FILES runtime/support.c DESTINATION share/runnable) install(FILES runtime/support.h DESTINATION share/runnable) install(FILES include/runnable/Runtime/commonconstants.h DESTINATION share/runnable) +install(FILES + scripts/_compare_runnable_text_lib.py + scripts/_merge_dynamic_fragments_lib.py + scripts/_fn_fp_root_cause_lib.py + DESTINATION share/runnable) # Remove -rdynamic set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS) @@ -170,5 +190,3 @@ set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS) install(EXPORT runnable NAMESPACE runnable:: DESTINATION share/runnable/cmake) - - diff --git a/runnable/scripts/_compare_runnable_text_lib.py b/runnable/scripts/_compare_runnable_text_lib.py new file mode 100644 index 000000000..3e759a68f --- /dev/null +++ b/runnable/scripts/_compare_runnable_text_lib.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 + +from __future__ import print_function + +import collections +import re +import subprocess + + +OBJ_RE = re.compile(r"^\s+([0-9a-fA-F]+):\t(.*?)\t(.*)$") +OBJ_BLANK_RE = re.compile(r"^\s+([0-9a-fA-F]+):\t(.*)$") +LL_RE = re.compile(r"^\s*;\s*(0x[0-9a-fA-F]+):\s+(.*)$") +IGNORE_TOKENS = ("nop", "data", "xchg") +ADV_MAP = { + "cqto": "cqo", + "cltd": "cdq", + "cltq": "cdqe", + "cbtw": "cbw", + "cwtl": "cwde", +} + + +def adv_cmp(op1, op2): + return ADV_MAP.get(op1) == op2 + + +def op_match(op1, op2): + return ( + op1 == op2 + or op1 in op2 + or op2 in op1 + or ("mov" in op1 and "mov" in op2) + or adv_cmp(op1, op2) + or adv_cmp(op2, op1) + ) + + +def parse_objdump(binary_path, text_start): + out = subprocess.check_output(["objdump", "-d", str(binary_path)], universal_newlines=True) + instructions = collections.OrderedDict() + + for line in out.splitlines(): + match = OBJ_RE.match(line) + if match is not None: + addr = int(match.group(1), 16) + if addr < text_start: + continue + asm = match.group(3) + op_matcher = re.match(r"^(\S+)\s*(.*)$", asm) + ins = op_matcher.group(1) if op_matcher else asm.strip() + if ins and not any(token in ins for token in IGNORE_TOKENS): + instructions[addr] = ins + continue + + match = OBJ_BLANK_RE.match(line) + if match is not None: + addr = int(match.group(1), 16) + if addr < text_start: + continue + + return instructions + + +def parse_ll_raw(ll_path): + instructions = collections.OrderedDict() + with open(ll_path, "r", errors="ignore") as handle: + for line in handle: + match = LL_RE.match(line) + if match is None: + continue + raw_addr = int(match.group(1), 16) + asm = match.group(2).strip() + op_matcher = re.match(r"^(\S+)\s*(\S*)", asm) + ins = op_matcher.group(1) if op_matcher else asm.split()[0] + if ins and not any(token in ins for token in IGNORE_TOKENS): + instructions[raw_addr] = ins + return instructions + + +def normalize_ll_addresses(raw_instructions, text_start, base): + instructions = collections.OrderedDict() + for raw_addr, ins in raw_instructions.items(): + addr = raw_addr - base if raw_addr >= base else raw_addr + if addr < text_start: + continue + instructions[addr] = ins + return instructions + + +def compare(obj_instructions, ll_instructions, example_limit): + hits = 0 + mismatch = 0 + obj_only = 0 + ll_only = 0 + mismatch_examples = [] + obj_only_examples = [] + ll_only_examples = [] + + for addr, obj_ins in obj_instructions.items(): + ll_ins = ll_instructions.get(addr) + if ll_ins is None: + obj_only += 1 + if len(obj_only_examples) < example_limit: + obj_only_examples.append((addr, obj_ins)) + continue + if op_match(obj_ins, ll_ins): + hits += 1 + else: + mismatch += 1 + if len(mismatch_examples) < example_limit: + mismatch_examples.append((addr, obj_ins, ll_ins)) + + for addr, ll_ins in ll_instructions.items(): + if addr not in obj_instructions: + ll_only += 1 + if len(ll_only_examples) < example_limit: + ll_only_examples.append((addr, ll_ins)) + + false_negative = obj_only + mismatch + false_positive = ll_only + mismatch + obj_count = len(obj_instructions) + ll_count = len(ll_instructions) + recall = float(hits) / obj_count if obj_count else 0.0 + precision = float(hits) / ll_count if ll_count else 0.0 + + return { + "obj_count": obj_count, + "ll_count": ll_count, + "hit": hits, + "mismatch": mismatch, + "obj_only": obj_only, + "ll_only": ll_only, + "false_negative": false_negative, + "false_positive": false_positive, + "recall": recall, + "precision": precision, + "mismatch_examples": mismatch_examples, + "obj_only_examples": obj_only_examples, + "ll_only_examples": ll_only_examples, + } diff --git a/runnable/scripts/_fn_fp_root_cause_lib.py b/runnable/scripts/_fn_fp_root_cause_lib.py new file mode 100644 index 000000000..d4583dbe6 --- /dev/null +++ b/runnable/scripts/_fn_fp_root_cause_lib.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 + +import csv +import json +import re +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Iterable, List, Optional, Sequence, Set, Tuple + + +LL_COMMENT_RE = re.compile(r"^\s*;\s*(0x[0-9a-fA-F]+):(.*)$") +HEX_RE = re.compile(r"0x[0-9a-fA-F]+|[0-9a-fA-F]{4,}") +ILLEGAL_RANGE_RE = re.compile(r"^\s*([0-9a-fA-Fx]+)\s*,\s*([0-9a-fA-Fx]+)\s*$") + + +@dataclass(frozen=True) +class SymbolRange: + start_csv: int + end_csv: int + start_runtime: int + end_runtime: int + name: str + + +def parse_int(value: str) -> int: + return int(value, 0) + + +def csv_to_runtime(addr_csv: int, csv_image_base: int, rebase_base: int) -> int: + return rebase_base + (addr_csv - csv_image_base) + + +def runtime_to_csv(addr_runtime: int, csv_image_base: int, rebase_base: int) -> int: + return csv_image_base + (addr_runtime - rebase_base) + + +def load_ground_truth_csv( + path: Path, + *, + csv_image_base: int, + rebase_base: int, +) -> Tuple[Set[int], List[Tuple[int, int]]]: + instruction_addrs: Set[int] = set() + data_ranges: List[Tuple[int, int]] = [] + with path.open("r", encoding="utf-8") as handle: + reader = csv.DictReader(handle) + for row in reader: + kind = row["kind"] + start_csv = parse_int(row["start_hex"]) + end_csv = parse_int(row["end_hex"]) + start_runtime = csv_to_runtime(start_csv, csv_image_base, rebase_base) + end_runtime = csv_to_runtime(end_csv, csv_image_base, rebase_base) + if kind == "instruction_start": + instruction_addrs.add(start_runtime) + elif kind == "data_section": + data_ranges.append((start_runtime, end_runtime)) + return instruction_addrs, data_ranges + + +def load_function_symbols_csv( + path: Path, + *, + csv_image_base: int, + rebase_base: int, +) -> List[SymbolRange]: + rows: List[Tuple[int, str]] = [] + with path.open("r", encoding="utf-8") as handle: + reader = csv.DictReader(handle) + for row in reader: + start_csv = parse_int(row["address_hex"]) + rows.append((start_csv, row["symbol"])) + rows.sort(key=lambda item: item[0]) + ranges: List[SymbolRange] = [] + for idx, (start_csv, name) in enumerate(rows): + next_start = rows[idx + 1][0] if idx + 1 < len(rows) else start_csv + 0x100 + end_csv = max(start_csv, next_start - 1) + ranges.append( + SymbolRange( + start_csv=start_csv, + end_csv=end_csv, + start_runtime=csv_to_runtime(start_csv, csv_image_base, rebase_base), + end_runtime=csv_to_runtime(end_csv, csv_image_base, rebase_base), + name=name, + ) + ) + return ranges + + +def load_lifted_addresses_from_ll(path: Path) -> List[int]: + addrs: Dict[int, str] = {} + with path.open("r", encoding="utf-8", errors="ignore") as handle: + for line in handle: + match = LL_COMMENT_RE.match(line) + if match is None: + continue + addr = int(match.group(1), 16) + addrs[addr] = match.group(2).strip() + return sorted(addrs) + + +def load_lifted_instruction_map(path: Path) -> Dict[int, str]: + lifted: Dict[int, str] = {} + with path.open("r", encoding="utf-8", errors="ignore") as handle: + for line in handle: + match = LL_COMMENT_RE.match(line) + if match is None: + continue + addr = int(match.group(1), 16) + lifted[addr] = match.group(2).strip() + return lifted + + +def write_json(path: Optional[Path], payload: Dict[str, object]) -> None: + if path is None: + return + path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + + +def hex_list(values: Iterable[int]) -> List[str]: + return [f"0x{value:x}" for value in sorted(values)] + + +def find_symbol_for_address(symbols: Sequence[SymbolRange], runtime_addr: int) -> Optional[SymbolRange]: + for symbol in symbols: + if symbol.start_runtime <= runtime_addr <= symbol.end_runtime: + return symbol + return None + + +def read_json(path: Path) -> Dict[str, object]: + return json.loads(path.read_text(encoding="utf-8")) + + +def load_shard_results(path: Path) -> Dict[str, Dict[str, object]]: + items = read_json(path) + return {item["tag"]: item for item in items} + + +def tag_for_symbol(symbol: SymbolRange) -> str: + return f"fn_{symbol.start_csv:016x}" + + +def shard_path(root: Path, relative_or_abs: Optional[str]) -> Optional[Path]: + if not relative_or_abs: + return None + candidate = Path(relative_or_abs) + if candidate.is_absolute(): + return candidate + return root / candidate + + +def read_text_if_exists(path: Optional[Path]) -> str: + if path is None or not path.exists(): + return "" + return path.read_text(encoding="utf-8", errors="ignore") + + +def load_illegal_addresses(path: Optional[Path]) -> Set[int]: + if path is None or not path.exists(): + return set() + results: Set[int] = set() + for line in path.read_text(encoding="utf-8", errors="ignore").splitlines(): + stripped = line.strip() + if not stripped: + continue + range_match = ILLEGAL_RANGE_RE.match(stripped) + if range_match is not None: + start_token = range_match.group(1) + end_token = range_match.group(2) + start = int(start_token, 16 if not start_token.lower().startswith("0x") else 0) + end = int(end_token, 16 if not end_token.lower().startswith("0x") else 0) + results.update(range(start, end + 1)) + continue + for token in HEX_RE.findall(stripped): + try: + results.add(int(token, 16 if not token.lower().startswith("0x") else 0)) + except ValueError: + continue + return results + + +def load_coverage_addresses(path: Optional[Path]) -> Set[int]: + if path is None or not path.exists(): + return set() + covered: Set[int] = set() + with path.open("r", encoding="utf-8", errors="ignore") as handle: + for line in handle: + parts = line.strip().split(",") + if not parts or not parts[0]: + continue + try: + covered.add(int(parts[0], 16)) + except ValueError: + continue + return covered + + +def is_continuation_byte(addr: int, gt_instrs: Set[int]) -> bool: + if addr in gt_instrs: + return False + for base in gt_instrs: + if 0 < addr - base <= 3: + return True + return False + + +def is_in_ranges(addr: int, ranges: Sequence[Tuple[int, int]]) -> bool: + return any(start <= addr <= end for start, end in ranges) + + +def neighbor_ground_truth(addr: int, gt_instrs: Set[int], max_distance: int = 8) -> bool: + return any(abs(addr - gt) <= max_distance for gt in gt_instrs) diff --git a/runnable/scripts/_merge_dynamic_fragments_lib.py b/runnable/scripts/_merge_dynamic_fragments_lib.py new file mode 100644 index 000000000..ef34f6f6d --- /dev/null +++ b/runnable/scripts/_merge_dynamic_fragments_lib.py @@ -0,0 +1,1847 @@ +#!/usr/bin/env python3 + +import argparse +import csv +import json +import os +import re +import shlex +import struct +import subprocess +import sys +import threading +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from dataclasses import asdict, dataclass +from pathlib import Path, PurePosixPath +from typing import Dict, Iterable, List, Optional, Tuple + +REPO_ROOT = Path(__file__).resolve().parents[2] +DEFAULT_CONTAINER_NAME = "runnable-parallel-lift-legacy" +DEFAULT_RUN_DIR = REPO_ROOT +DEFAULT_OUT_DIR = DEFAULT_RUN_DIR / "out" / "runnable-parallel-lift-legacy" +DEFAULT_FUNCS_CSV = DEFAULT_RUN_DIR / "dataset" / "libcrypto.funcs.csv" +DEFAULT_REFERENCE_LL = DEFAULT_OUT_DIR / "reference.ll" +DEFAULT_GROUND_TRUTH = DEFAULT_RUN_DIR / "dataset" / "libcrypto.so" +DEFAULT_RUN_CMP_EVAL = DEFAULT_RUN_DIR / "test" / "cmp_instruction.py" +DEFAULT_CONTAINER_WORKDIR = "/workdir" +DEFAULT_CONTAINER_BINARY = "/workdir/libcrypto.so" +DEFAULT_RUNNABLE_LIFT = "runnable-lift" +DEFAULT_CSV_IMAGE_BASE = 0x400000 + +LL_COMMENT_RE = re.compile(r"^\s*;\s*(0x[0-9a-fA-F]+):(.*)$") +ROOT_LABEL_RE = re.compile(r"^([A-Za-z$._0-9-]+):") +ROOT_BLOCK_LABEL_RE = re.compile(r"^bb\.0x([0-9a-fA-F]+)") +ROOT_SYMBOLIC_BLOCK_RE = re.compile(r"^(bb\.[A-Za-z$._0-9-]+?)(?:\.0x([0-9a-fA-F]+))?(?:$|[._].*)") +ROOT_CASE_RE = re.compile(r"^\s*i64\s+(\d+),\s+label\s+%([A-Za-z$._0-9-]+)") +SWITCH_CASE_RE = re.compile(r"^\s*i\d+\s+(-?\d+),\s+label\s+%[A-Za-z$._0-9-]+") +ANON_BLOCK_LABEL_RE = re.compile(r"^\s*;\s*