From e44700d3216660995601034a01cf7b2fd059009c Mon Sep 17 00:00:00 2001 From: iskindar Date: Wed, 29 Apr 2026 22:22:58 +0800 Subject: [PATCH 1/7] feat: prototype fork-based dynamic branch workers --- qemu/linux-user/ptc.c | 18 ++ qemu/linux-user/ptc.h | 4 + .../tools/runnable-lift/CodeGenerator.cpp | 215 +++++++++++++++--- runnable/tools/runnable-lift/CodeGenerator.h | 27 ++- .../tools/runnable-lift/JumpTargetManager.cpp | 38 +++- .../tools/runnable-lift/JumpTargetManager.h | 1 + runnable/tools/runnable-lift/Main.cpp | 32 ++- .../tools/runnable-lift/ParallelOptions.h | 22 ++ 8 files changed, 319 insertions(+), 38 deletions(-) create mode 100644 runnable/tools/runnable-lift/ParallelOptions.h diff --git a/qemu/linux-user/ptc.c b/qemu/linux-user/ptc.c index f1a0f1c7c..28a550fdf 100644 --- a/qemu/linux-user/ptc.c +++ b/qemu/linux-user/ptc.c @@ -309,6 +309,8 @@ int ptc_load(void *handle, PTCInterface *output, const char *ptc_filename, result.disassemble = &ptc_disassemble; result.do_syscall2 = &ptc_do_syscall2; result.storeCPUState = &ptc_storeCPUState; + result.dropCPUState = &ptc_dropCPUState; + result.queueDepth = &ptc_queueDepth; result.getBranchCPUeip = &ptc_getBranchCPUeip; result.deletCPULINEState = &ptc_deletCPULINEState; result.recoverStack = &ptc_recoverStack; @@ -1444,6 +1446,22 @@ uint32_t ptc_storeCPUState(void) { return 1; } +uint32_t ptc_dropCPUState(void){ + BranchState datatmp; + + if(isEmpty()) + return 0; + + datatmp = deletArchCPUStateQueueLine(); + free(datatmp.elf_data); + free(datatmp.elf_stack); + return 1; +} + +uint32_t ptc_queueDepth(void){ + return numsArchCPUStateQueueLine(); +} + void ptc_recoverStack(void){ CPUArchState *env = (CPUArchState *)cpu->env_ptr; memcpy((void *)env->regs[4],current_stack,elf_start_stack-(abi_ulong)env->regs[4]); diff --git a/qemu/linux-user/ptc.h b/qemu/linux-user/ptc.h index b1231f05e..9b4e850ff 100644 --- a/qemu/linux-user/ptc.h +++ b/qemu/linux-user/ptc.h @@ -257,6 +257,8 @@ EXPORTED(uint64_t, ptc_run_library, (size_t flag)); EXPORTED(void, ptc_data_start, (uint64_t start, uint64_t entry)); EXPORTED(unsigned long, ptc_do_syscall2, (void)); EXPORTED(uint32_t, ptc_storeCPUState, (void)); +EXPORTED(uint32_t, ptc_dropCPUState, (void)); +EXPORTED(uint32_t, ptc_queueDepth, (void)); EXPORTED(void, ptc_getBranchCPUeip,(void)); EXPORTED(uint32_t, ptc_deletCPULINEState,(void)); EXPORTED(void,ptc_recoverStack,(void)); @@ -290,6 +292,8 @@ typedef struct { ptc_do_syscall2_ptr_t do_syscall2; ptc_storeCPUState_ptr_t storeCPUState; + ptc_dropCPUState_ptr_t dropCPUState; + ptc_queueDepth_ptr_t queueDepth; ptc_getBranchCPUeip_ptr_t getBranchCPUeip; ptc_deletCPULINEState_ptr_t deletCPULINEState; ptc_recoverStack_ptr_t recoverStack; diff --git a/runnable/tools/runnable-lift/CodeGenerator.cpp b/runnable/tools/runnable-lift/CodeGenerator.cpp index 3a9c9c087..e4579a98f 100644 --- a/runnable/tools/runnable-lift/CodeGenerator.cpp +++ b/runnable/tools/runnable-lift/CodeGenerator.cpp @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include @@ -64,6 +66,37 @@ using namespace llvm; using std::make_pair; using std::string; +namespace { + +static std::string hexValue(uint64_t Value) { + std::ostringstream Stream; + Stream << std::hex << Value; + return Stream.str(); +} + +static std::string findRepoRootFromCwd() { + char Buffer[4096]; + if (getcwd(Buffer, sizeof(Buffer)) == nullptr) + return ""; + std::string Current(Buffer); + while (!Current.empty()) { + std::string Probe = Current + "/scripts/merge_dynamic_runnable_fragments.py"; + if (access(Probe.c_str(), F_OK) == 0) + return Current; + size_t Slash = Current.find_last_of('/'); + if (Slash == std::string::npos) + break; + if (Slash == 0) { + Current = "/"; + break; + } + Current = Current.substr(0, Slash); + } + return ""; +} + +} // namespace + // Register all the arguments cl::opt ExeInit("exe-init", @@ -179,13 +212,15 @@ CodeGenerator::CodeGenerator(BinaryFile &Binary, llvm::LLVMContext &TheContext, std::string Output, std::string Helpers, - std::string EarlyLinked) : + std::string EarlyLinked, + const ParallelOptions &Options) : TargetArchitecture(Target), Context(TheContext), TheModule((new Module("top", Context))), OutputPath(Output), Debug(new DebugHelper(Output, TheModule.get(), DebugInfo, DebugPath)), - Binary(Binary) { + Binary(Binary), + ParallelConfig(Options) { OriginalInstrMDKind = Context.getMDKindID("oi"); PTCInstrMDKind = Context.getMDKindID("pi"); @@ -1015,14 +1050,14 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { } }////?end if(!JumpTargets.haveBB) - if(EntryFlag and JumpTargets.haveBB){ + if(EntryFlag and JumpTargets.haveBB){ JumpTargets.handleSuspectDataRegion(SuspectEntryAddr,VirtualAddress); SuspectEntryAddr = 0; EntryFlag = false; } - // Obtain a new program counter to translate - std::tie(VirtualAddress, Entry) = JumpTargets.peek(); + // Obtain a new program counter to translate + std::tie(VirtualAddress, Entry) = JumpTargets.peek(); if(*ptc.isCall and BlockBRs){ if(!JumpTargets.isDataSegmAddr(ptc.regs[R_ESP])){ @@ -1036,7 +1071,7 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { *ptc.isCall = 0; } - if(!EntryFlag){ + if(!EntryFlag){ if(*ptc.exception_syscall == 0x100){ if(ExeInit){ if(ptc.regs[R_EAX]==20){ @@ -1078,9 +1113,12 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { if(it == JumpTargets.CallBranches.end()) DynamicVirtualAddress = 0; } + if(!JumpTargets.haveBB && DynamicVirtualAddress != 0 + && JumpTargets.isOutOfAddrRange(DynamicVirtualAddress)) + DynamicVirtualAddress = 0; } - if(traverseFLAG){ + if(traverseFLAG){ //handle invalid address if(!JumpTargets.isExecutableAddress(DynamicVirtualAddress) and !JumpTargets.haveBB) @@ -1092,22 +1130,31 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { } - // Some branch destination addr is 0 - if((JumpTargets.haveBB || DynamicVirtualAddress == 0 ) and - !JumpTargets.BranchTargets.empty()) - { - BlockBRs = nullptr; - // if occure a translated BB, traversing next branch - std::tie(jtVirtualAddress, srcBB, srcAddr) = JumpTargets.BranchTargets.front(); - errs()<<"--------------------\n"; - JumpTargets.BranchTargets.erase(JumpTargets.BranchTargets.begin()); - ptc.deletCPULINEState(); - DynamicVirtualAddress = jtVirtualAddress; - BaseData.clear(); - } - - if(DynamicVirtualAddress){ - auto tmpBB = JumpTargets.registerJT(DynamicVirtualAddress,JTReason::GlobalData); + // Some branch destination addr is 0 + if((JumpTargets.haveBB || DynamicVirtualAddress == 0 ) and + !JumpTargets.BranchTargets.empty()) + { + BlockBRs = nullptr; + // if occure a translated BB, traversing next branch + std::tie(jtVirtualAddress, srcBB, srcAddr) = JumpTargets.BranchTargets.front(); + errs()<<"--------------------\n"; + if (trySpawnBranchWorker(jtVirtualAddress, JumpTargets.BranchTargets)) { + JumpTargets.haveBB = 0; + DynamicVirtualAddress = 0; + BaseData.clear(); + } else if (ParallelConfig.WorkerMode) { + DynamicVirtualAddress = jtVirtualAddress; + BaseData.clear(); + } else { + JumpTargets.BranchTargets.erase(JumpTargets.BranchTargets.begin()); + ptc.deletCPULINEState(); + DynamicVirtualAddress = jtVirtualAddress; + BaseData.clear(); + } + } + + if(DynamicVirtualAddress){ + auto tmpBB = JumpTargets.registerJT(DynamicVirtualAddress,JTReason::GlobalData); //JumpTargets.isContainIndirectInst(DynamicVirtualAddress,tmpVA,tmpBB); if(JumpTargets.haveBB){ // If have translated BB, give Entry an arbitrary value @@ -1122,14 +1169,14 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { srcBB = nullptr; JumpTargets.haveBB = 0; } - if(BlockBRs != nullptr and !EntryFlag){ - auto branchLabeledcontent = Translator.branchcontent(); - JumpTargets.harvestbranchBasicBlock(VirtualAddress, + if(BlockBRs != nullptr and !EntryFlag){ + auto branchLabeledcontent = Translator.branchcontent(); + JumpTargets.harvestbranchBasicBlock(VirtualAddress, tmpVA, BlockBRs, - Translator.branchsize(), - branchLabeledcontent); - } + Translator.branchsize(), + branchLabeledcontent); + } std::cerr<print(Output, false); } + if (!ParallelConfig.WorkerMode) + mergeForkWorkerFragments(); +} + +void CodeGenerator::switchToWorkerOutput(uint64_t SeedPC) { + if (!ParallelConfig.FragmentDir.empty()) { + std::string Command = "mkdir -p \"" + ParallelConfig.FragmentDir + "\""; + ::system(Command.c_str()); + } + std::ostringstream Path; + Path << ParallelConfig.FragmentDir << "/worker_" << hexValue(SeedPC) << ".ll"; + OutputPath = Path.str(); + CoveragePath = OutputPath + ".coverage.csv"; + BBSummaryPath = OutputPath + ".bbsummary.csv"; + LinkingInfoPath = OutputPath + ".li.csv"; + Debug.reset(new DebugHelper(OutputPath, TheModule.get(), DebugInfo, DebugPath)); +} + +bool CodeGenerator::trySpawnBranchWorker( + uint64_t SeedPC, + std::vector> &BranchTargets) { + if (!ParallelConfig.DynamicParallel || ParallelConfig.WorkerMode) + return false; + if (ParallelConfig.WorkerCount == 0) + return false; + if (ParallelSpawnedSeeds.count(SeedPC) != 0) + return false; + + size_t ActiveWorkers = 0; + for (const auto &Worker : ParallelWorkers) + if (!Worker.Finished) + ActiveWorkers++; + if (ActiveWorkers >= ParallelConfig.WorkerCount) + return false; + + pid_t PID = fork(); + runnable_assert(PID >= 0, "failed to fork branch worker"); + if (PID == 0) { + ParallelConfig.WorkerMode = true; + switchToWorkerOutput(SeedPC); + BranchTargets.erase(BranchTargets.begin()); + ptc.deletCPULINEState(); + return false; + } + + BranchTargets.erase(BranchTargets.begin()); + ptc.dropCPUState(); + ParallelSpawnedSeeds.insert(SeedPC); + std::ostringstream WorkerOutput; + WorkerOutput << ParallelConfig.FragmentDir << "/worker_" << hexValue(SeedPC) << ".ll"; + ParallelWorkers.push_back({ static_cast(PID), SeedPC, WorkerOutput.str(), -1, false }); + ParallelWorkersSpawned++; + return true; +} + +void CodeGenerator::waitForForkWorkers() { + if (!ParallelConfig.DynamicParallel || ParallelConfig.WorkerMode) + return; + + for (auto &Worker : ParallelWorkers) { + if (Worker.Finished) + continue; + int Status = 0; + pid_t Waited = waitpid(Worker.Pid, &Status, 0); + if (Waited != Worker.Pid) + continue; + Worker.Finished = true; + Worker.ExitCode = WIFEXITED(Status) ? WEXITSTATUS(Status) : -1; + if (Worker.ExitCode == 0) + ParallelWorkersSucceeded++; + else + ParallelWorkersFailed++; + } +} + +void CodeGenerator::mergeForkWorkerFragments() { + if (!ParallelConfig.DynamicParallel || ParallelConfig.WorkerMode) + return; + if (ParallelWorkersSucceeded == 0) + return; + + std::string RepoRoot = findRepoRootFromCwd(); + if (RepoRoot.empty()) + return; + std::string ScriptPath = RepoRoot + "/scripts/merge_dynamic_runnable_fragments.py"; + if (access(ScriptPath.c_str(), F_OK) != 0) + return; + + std::string TempOutput = OutputPath + ".merged.ll"; + std::ostringstream EntryPC; + EntryPC << "0x" << std::hex << Binary.entryPoint(); + std::ostringstream Command; + Command << "python3 " << ScriptPath + << " --output " << TempOutput + << " --entry-pc " << EntryPC.str() + << " " << OutputPath; + for (const auto &Worker : ParallelWorkers) { + if (Worker.ExitCode != 0) + continue; + Command << " " << Worker.OutputPath; + } + int RC = ::system(Command.str().c_str()); + if (RC != 0) + return; + rename(TempOutput.c_str(), OutputPath.c_str()); } diff --git a/runnable/tools/runnable-lift/CodeGenerator.h b/runnable/tools/runnable-lift/CodeGenerator.h index f35665f4b..1cb10099f 100644 --- a/runnable/tools/runnable-lift/CodeGenerator.h +++ b/runnable/tools/runnable-lift/CodeGenerator.h @@ -8,7 +8,9 @@ // Standard includes #include #include +#include #include +#include // LLVM includes #include "llvm/ADT/ArrayRef.h" @@ -18,6 +20,7 @@ // Local includes #include "BinaryFile.h" +#include "ParallelOptions.h" // Forward declarations namespace llvm { @@ -38,6 +41,14 @@ class ObjectFile; class DebugHelper; +struct ParallelWorkerState { + int Pid = -1; + uint64_t SeedPC = 0; + std::string OutputPath; + int ExitCode = -1; + bool Finished = false; +}; + /// Translator from binary code to LLVM IR. class CodeGenerator { public: @@ -54,7 +65,8 @@ class CodeGenerator { llvm::LLVMContext &TheContext, std::string Output, std::string Helpers, - std::string EarlyLinked); + std::string EarlyLinked, + const ParallelOptions &Options); ~CodeGenerator(); @@ -97,6 +109,12 @@ class CodeGenerator { /// \param Name name of the imported function llvm::Function *importHelperFunctionDeclaration(llvm::StringRef Name); + void switchToWorkerOutput(uint64_t SeedPC); + bool trySpawnBranchWorker(uint64_t SeedPC, + std::vector> &BranchTargets); + void waitForForkWorkers(); + void mergeForkWorkerFragments(); + private: Architecture TargetArchitecture; llvm::LLVMContext &Context; @@ -112,6 +130,13 @@ class CodeGenerator { unsigned DbgMDKind; std::string FunctionListPath; + ParallelOptions ParallelConfig; + std::vector ParallelWorkers; + std::set ParallelSpawnedSeeds; + uint64_t ParallelFrontierCandidates = 0; + uint64_t ParallelWorkersSpawned = 0; + uint64_t ParallelWorkersSucceeded = 0; + uint64_t ParallelWorkersFailed = 0; }; #endif // CODEGENERATOR_H diff --git a/runnable/tools/runnable-lift/JumpTargetManager.cpp b/runnable/tools/runnable-lift/JumpTargetManager.cpp index cf51c65c4..17e519bda 100644 --- a/runnable/tools/runnable-lift/JumpTargetManager.cpp +++ b/runnable/tools/runnable-lift/JumpTargetManager.cpp @@ -61,6 +61,20 @@ cl::opt FAST("fast", cl::opt SUPERFAST("super-fast", cl::desc("fast rewriting"), cl::cat(MainCategory)); +cl::opt AddrRangeMin("addr-range-min", + cl::desc("Lower bound (inclusive) of the address range" + " to translate. When non-zero, execution leaving" + " [addr-range-min, addr-range-max) is stopped."), + cl::value_desc("address"), + cl::init(0), + cl::cat(MainCategory)); +cl::opt AddrRangeMax("addr-range-max", + cl::desc("Upper bound (exclusive) of the address range" + " to translate. Must be set together with" + " --addr-range-min."), + cl::value_desc("address"), + cl::init(0), + cl::cat(MainCategory)); cl::opt INFO("info", cl::desc("print statistics information"), cl::cat(MainCategory)); @@ -2935,9 +2949,19 @@ bool JumpTargetManager::isIllegalStaticAddr(uint64_t pc){ // return true; //} + if(isOutOfAddrRange(pc)) + return true; + return false; } +bool JumpTargetManager::isOutOfAddrRange(uint64_t pc){ + if(AddrRangeMin == 0 && AddrRangeMax == 0) + return false; + + return (pc < AddrRangeMin || pc >= AddrRangeMax); +} + void JumpTargetManager::harvestNextAddrofBr(){ // *ptc.CFIAddr represents next block addr. auto BlockNext = *ptc.CFIAddr; @@ -3929,7 +3953,7 @@ void JumpTargetManager::harvestBTBasicBlock(llvm::BasicBlock *thisBlock, if(std::get<0>(item) == destAddr) return; } - if(!haveTranslatedPC(destAddr, 0)){ + if(!haveTranslatedPC(destAddr, 0) && !isOutOfAddrRange(destAddr)){ ptc.storeCPUState(); /* Recording not execute branch destination relationship with current BasicBlock */ // thisBlock = nullptr; @@ -4675,8 +4699,10 @@ void JumpTargetManager::harvestCallBasicBlock(llvm::BasicBlock *thisBlock,uint64 * So,this Block will not contain a call instruction, that has been splited * but we still record this relationship, because when we backtracking, * we will check splited Block. */ - BranchTargets.push_back(std::make_tuple(*ptc.CallNext,thisBlock,thisAddr)); - errs()< IllegalStaticAddrs; bool isIllegalStaticAddr(uint64_t pc); + bool isOutOfAddrRange(uint64_t pc); void TestSuspectDataRegion(std::string path); StaticAddrsMap SuspectDataRegion; diff --git a/runnable/tools/runnable-lift/Main.cpp b/runnable/tools/runnable-lift/Main.cpp index f501dac31..40466c9eb 100644 --- a/runnable/tools/runnable-lift/Main.cpp +++ b/runnable/tools/runnable-lift/Main.cpp @@ -38,6 +38,7 @@ extern "C" { // Local includes #include "BinaryFile.h" #include "CodeGenerator.h" +#include "ParallelOptions.h" #include "PTCInterface.h" PTCInterface ptc = {}; ///< The interface with the PTC library. @@ -78,6 +79,26 @@ opt OutputPath(Positional, Required, desc("")); opt ExecutableArgs("exe-args", value_desc("arguments"), cat(MainCategory)); +opt DynamicParallel("dynamic-parallel", + desc("enable dynamic branch-driven parallel lift"), + cat(MainCategory), + init(false)); +opt ParallelWorkers("parallel-workers", + desc("maximum number of worker subprocesses"), + cat(MainCategory), + init(1)); +opt ParallelWorkerMode("parallel-worker-mode", + desc("internal worker subprocess mode"), + cat(MainCategory), + init(false)); +opt ParallelSeedPC("parallel-seed-pc", + desc("worker seed PC"), + cat(MainCategory), + init(0)); +opt ParallelFragmentDir("parallel-fragment-dir", + desc("fragment output directory"), + cat(MainCategory)); + } // namespace static std::string LibTinycodePath; @@ -203,12 +224,21 @@ int main(int argc, const char *argv[]) { // Translate everything Architecture TargetArchitecture; llvm::LLVMContext RevambGlobalContext; + ParallelOptions Options; + Options.DynamicParallel = DynamicParallel; + Options.WorkerMode = ParallelWorkerMode; + Options.WorkerCount = ParallelWorkers; + Options.SeedPC = ParallelSeedPC; + Options.FragmentDir = ParallelFragmentDir; + Options.InputPath = InputPath; + Options.ExecutableArgs = ExecutableArgs; CodeGenerator Generator(TheBinary, TargetArchitecture, RevambGlobalContext, std::string(OutputPath), LibHelpersPath, - EarlyLinkedPath); + EarlyLinkedPath, + Options); Generator.translate(EntryPointAddress); Generator.serialize(); diff --git a/runnable/tools/runnable-lift/ParallelOptions.h b/runnable/tools/runnable-lift/ParallelOptions.h new file mode 100644 index 000000000..02b13488b --- /dev/null +++ b/runnable/tools/runnable-lift/ParallelOptions.h @@ -0,0 +1,22 @@ +#ifndef RUNNABLE_LIFT_PARALLELOPTIONS_H +#define RUNNABLE_LIFT_PARALLELOPTIONS_H + +// +// This file is distributed under the MIT License. See LICENSE.md for details. +// + +// Standard includes +#include +#include + +struct ParallelOptions { + bool DynamicParallel = false; + bool WorkerMode = false; + unsigned WorkerCount = 1; + uint64_t SeedPC = 0; + std::string FragmentDir; + std::string InputPath; + std::string ExecutableArgs; +}; + +#endif // RUNNABLE_LIFT_PARALLELOPTIONS_H From 88bf9e170aa19ebe3329ae2c816bef9d0bac5821 Mon Sep 17 00:00:00 2001 From: iskindar Date: Fri, 1 May 2026 22:03:19 +0800 Subject: [PATCH 2/7] fix: isolate dynamic parallel workers from inherited branch state --- .../tools/runnable-lift/CodeGenerator.cpp | 203 ++++++++++++++---- runnable/tools/runnable-lift/CodeGenerator.h | 6 + .../tools/runnable-lift/JumpTargetManager.cpp | 12 ++ 3 files changed, 185 insertions(+), 36 deletions(-) diff --git a/runnable/tools/runnable-lift/CodeGenerator.cpp b/runnable/tools/runnable-lift/CodeGenerator.cpp index e4579a98f..e0f95e1c2 100644 --- a/runnable/tools/runnable-lift/CodeGenerator.cpp +++ b/runnable/tools/runnable-lift/CodeGenerator.cpp @@ -7,6 +7,8 @@ // // Standard includes +#include +#include #include #include #include @@ -217,6 +219,8 @@ CodeGenerator::CodeGenerator(BinaryFile &Binary, TargetArchitecture(Target), Context(TheContext), TheModule((new Module("top", Context))), + HelpersPath(Helpers), + EarlyLinkedPath(EarlyLinked), OutputPath(Output), Debug(new DebugHelper(Output, TheModule.get(), DebugInfo, DebugPath)), Binary(Binary), @@ -224,7 +228,7 @@ CodeGenerator::CodeGenerator(BinaryFile &Binary, OriginalInstrMDKind = Context.getMDKindID("oi"); PTCInstrMDKind = Context.getMDKindID("pi"); - HelpersModule = parseIR(Helpers, Context); + HelpersModule = parseIR(HelpersPath, Context); for (auto &F : HelpersModule->functions()) { // Remove 'optnone' Function attribute from QEMU helpers. // QEMU helpers are compiled with -O0 in libtinycode because the LLVM IR @@ -236,7 +240,7 @@ CodeGenerator::CodeGenerator(BinaryFile &Binary, F.removeFnAttr(Attribute::OptimizeNone); F.setDSOLocal(false); } - EarlyLinkedModule = parseIR(EarlyLinked, Context); + EarlyLinkedModule = parseIR(EarlyLinkedPath, Context); if (CoveragePath.size() == 0) CoveragePath = Output + ".coverage.csv"; @@ -842,6 +846,10 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { bool StaticAddrFlag = false; uint32_t EntryFlag = 0; uint64_t SuspectEntryAddr = 0; + bool PreferBranchFrontier = false; + uint64_t LastHaveBBVA = 0; + size_t RepeatedHaveBBCount = 0; + const size_t ParallelHaveBBRepeatLimit = 4096; std::vector BlockPCs1; std::vector &BlockPCs = BlockPCs1; std::map BaseData1; @@ -850,6 +858,7 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { jjj++; BlockBRs = nullptr; BlockPCs.clear(); + DynamicVirtualAddress = 0; if(!JumpTargets.haveBB){ Builder.SetInsertPoint(Entry); BlockBRs = Builder.GetInsertBlock(); @@ -869,6 +878,32 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { if(traverseFLAG && JumpTargets.haveBB){ ptc_instruction_list_malloc(InstructionList.get()); errs()<<"Nop execute!\n"; + if (ParallelConfig.DynamicParallel) { + if (LastHaveBBVA == VirtualAddress) + RepeatedHaveBBCount++; + else { + LastHaveBBVA = VirtualAddress; + RepeatedHaveBBCount = 1; + } + if (RepeatedHaveBBCount > ParallelHaveBBRepeatLimit) { + if (ParallelConfig.WorkerMode) { + errs() << "parallel worker loop guard stop repeated haveBB pc=0x" + << Twine::utohexstr(VirtualAddress) + << " repeats=" << RepeatedHaveBBCount << "\n"; + break; + } else { + errs() << "parallel loop guard skip repeated haveBB pc=0x" + << Twine::utohexstr(VirtualAddress) + << " repeats=" << RepeatedHaveBBCount << "\n"; + JumpTargets.haveBB = 0; + DynamicVirtualAddress = 0; + Entry = nullptr; + PreferBranchFrontier = !JumpTargets.BranchTargets.empty(); + } + } + } + } else { + RepeatedHaveBBCount = 0; } if(!JumpTargets.haveBB){ @@ -1056,8 +1091,17 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { EntryFlag = false; } - // Obtain a new program counter to translate - std::tie(VirtualAddress, Entry) = JumpTargets.peek(); + // Prefer draining saved branch-frontier states before resuming the + // general unexplored/static worklist. This preserves the serial + // queue discipline more closely when dynamic workers are active. + if (PreferBranchFrontier + && !ParallelConfig.WorkerMode + && !JumpTargets.BranchTargets.empty()) { + VirtualAddress = 0; + Entry = nullptr; + } else { + std::tie(VirtualAddress, Entry) = JumpTargets.peek(); + } if(*ptc.isCall and BlockBRs){ if(!JumpTargets.isDataSegmAddr(ptc.regs[R_ESP])){ @@ -1068,6 +1112,8 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { errs()<<*((unsigned long *)ptc.regs[4])<<"<--store callnext\n"; errs()<<*ptc.CallNext<<"\n"; JumpTargets.harvestCallBasicBlock(BlockBRs,tmpVA); + if (ParallelConfig.WorkerMode) + JumpTargets.BranchTargets.clear(); *ptc.isCall = 0; } @@ -1087,9 +1133,16 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { JumpTargets.haveBB = 0; BlockBRs = nullptr; std::tie(jtVirtualAddress, srcBB, srcAddr) = JumpTargets.BranchTargets.front(); - JumpTargets.BranchTargets.erase(JumpTargets.BranchTargets.begin()); - ptc.deletCPULINEState(); - DynamicVirtualAddress = jtVirtualAddress; + if (trySpawnBranchWorker(jtVirtualAddress, JumpTargets.BranchTargets)) { + DynamicVirtualAddress = 0; + BaseData.clear(); + PreferBranchFrontier = false; + } else { + JumpTargets.BranchTargets.erase(JumpTargets.BranchTargets.begin()); + ptc.deletCPULINEState(); + DynamicVirtualAddress = jtVirtualAddress; + PreferBranchFrontier = true; + } errs()<<"syscall--------------------\n"; } } @@ -1142,6 +1195,7 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { JumpTargets.haveBB = 0; DynamicVirtualAddress = 0; BaseData.clear(); + PreferBranchFrontier = false; } else if (ParallelConfig.WorkerMode) { DynamicVirtualAddress = jtVirtualAddress; BaseData.clear(); @@ -1150,6 +1204,7 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { ptc.deletCPULINEState(); DynamicVirtualAddress = jtVirtualAddress; BaseData.clear(); + PreferBranchFrontier = true; } } @@ -1176,12 +1231,16 @@ void CodeGenerator::translate(uint64_t VirtualAddress) { BlockBRs, Translator.branchsize(), branchLabeledcontent); + if (ParallelConfig.WorkerMode) + JumpTargets.BranchTargets.clear(); } std::cerr<> &BranchTargets) { @@ -1468,6 +1612,8 @@ bool CodeGenerator::trySpawnBranchWorker( if (ParallelSpawnedSeeds.count(SeedPC) != 0) return false; + pollFinishedForkWorkers(false); + size_t ActiveWorkers = 0; for (const auto &Worker : ParallelWorkers) if (!Worker.Finished) @@ -1478,13 +1624,14 @@ bool CodeGenerator::trySpawnBranchWorker( pid_t PID = fork(); runnable_assert(PID >= 0, "failed to fork branch worker"); if (PID == 0) { - ParallelConfig.WorkerMode = true; - switchToWorkerOutput(SeedPC); - BranchTargets.erase(BranchTargets.begin()); - ptc.deletCPULINEState(); - return false; + int Result = runFreshBranchWorker(SeedPC); + errs() << "parallel worker seed=0x" << Twine::utohexstr(SeedPC) + << " exit=" << Result << "\n"; + ::_exit(Result); } + errs() << "parallel parent spawn seed=0x" << Twine::utohexstr(SeedPC) + << " qdepth-before-drop=" << ptc.queueDepth() << "\n"; BranchTargets.erase(BranchTargets.begin()); ptc.dropCPUState(); ParallelSpawnedSeeds.insert(SeedPC); @@ -1496,23 +1643,7 @@ bool CodeGenerator::trySpawnBranchWorker( } void CodeGenerator::waitForForkWorkers() { - if (!ParallelConfig.DynamicParallel || ParallelConfig.WorkerMode) - return; - - for (auto &Worker : ParallelWorkers) { - if (Worker.Finished) - continue; - int Status = 0; - pid_t Waited = waitpid(Worker.Pid, &Status, 0); - if (Waited != Worker.Pid) - continue; - Worker.Finished = true; - Worker.ExitCode = WIFEXITED(Status) ? WEXITSTATUS(Status) : -1; - if (Worker.ExitCode == 0) - ParallelWorkersSucceeded++; - else - ParallelWorkersFailed++; - } + pollFinishedForkWorkers(true); } void CodeGenerator::mergeForkWorkerFragments() { diff --git a/runnable/tools/runnable-lift/CodeGenerator.h b/runnable/tools/runnable-lift/CodeGenerator.h index 1cb10099f..4aee2243c 100644 --- a/runnable/tools/runnable-lift/CodeGenerator.h +++ b/runnable/tools/runnable-lift/CodeGenerator.h @@ -109,7 +109,11 @@ class CodeGenerator { /// \param Name name of the imported function llvm::Function *importHelperFunctionDeclaration(llvm::StringRef Name); + std::string workerOutputPath(uint64_t SeedPC) const; + void configureOutputArtifacts(const std::string &Output); void switchToWorkerOutput(uint64_t SeedPC); + int runFreshBranchWorker(uint64_t SeedPC); + void pollFinishedForkWorkers(bool Block); bool trySpawnBranchWorker(uint64_t SeedPC, std::vector> &BranchTargets); void waitForForkWorkers(); @@ -121,6 +125,8 @@ class CodeGenerator { std::unique_ptr TheModule; std::unique_ptr HelpersModule; std::unique_ptr EarlyLinkedModule; + std::string HelpersPath; + std::string EarlyLinkedPath; std::string OutputPath; std::unique_ptr Debug; BinaryFile &Binary; diff --git a/runnable/tools/runnable-lift/JumpTargetManager.cpp b/runnable/tools/runnable-lift/JumpTargetManager.cpp index 17e519bda..e55d495fe 100644 --- a/runnable/tools/runnable-lift/JumpTargetManager.cpp +++ b/runnable/tools/runnable-lift/JumpTargetManager.cpp @@ -9,6 +9,7 @@ // Standard includes #include "runnable/Support/Assert.h" +#include #include #include #include @@ -52,6 +53,11 @@ namespace { Logger<> JTCountLog("jtcount"); +static bool inParallelWorkerMode() { + const char *Value = std::getenv("RUNNABLE_PARALLEL_WORKER_MODE"); + return Value != nullptr && Value[0] == '1'; +} + cl::opt Statistics("Statistics", cl::desc("Count rewriting information"), cl::cat(MainCategory)); @@ -3949,6 +3955,8 @@ void JumpTargetManager::handleIndirectJmp(llvm::BasicBlock *thisBlock, void JumpTargetManager::harvestBTBasicBlock(llvm::BasicBlock *thisBlock, uint64_t thisAddr, uint64_t destAddr){ + if (inParallelWorkerMode()) + return; for(auto item : BranchTargets){ if(std::get<0>(item) == destAddr) return; @@ -4675,6 +4683,8 @@ void JumpTargetManager::harvestCallBasicBlock(llvm::BasicBlock *thisBlock,uint64 } if(!haveTranslatedPC(*ptc.CallNext, 0)) StaticAddrs[*ptc.CallNext] = 2; + if (inParallelWorkerMode()) + return; for(auto item : BranchTargets){ if(std::get<0>(item) == *ptc.CallNext) return; @@ -4750,6 +4760,8 @@ void JumpTargetManager::harvestbranchBasicBlock(uint64_t nextAddr, CondBranches[thisAddr] = 1; } for (auto destAddrSrcBB : branchJT){ + if (inParallelWorkerMode()) + break; if(!haveTranslatedPC(destAddrSrcBB.first, nextAddr) && !isIllegalStaticAddr(destAddrSrcBB.first)){ bool isRecord = false; From a5e9e7f56f0adafdf8938a1ba01f34f7c0860a26 Mon Sep 17 00:00:00 2001 From: iskindar Date: Fri, 1 May 2026 22:31:53 +0800 Subject: [PATCH 3/7] docs: add dynamic parallel lift prototype usage --- README.md | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/README.md b/README.md index b0d0115d1..d9334f77e 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,45 @@ $ runnable-lift hello hello.ll 2>hello.log $ runnable translate hello ``` +## Dynamic Parallel Lift (Prototype) + +The `codex/dynamic-parallel-lift` branch adds an experimental dynamic +branch-driven parallel mode to `runnable-lift`. + +User-facing flags: + +- `-dynamic-parallel`: enable dynamic branch-driven worker spawning +- `-parallel-workers=`: cap the number of worker subprocesses +- `-parallel-fragment-dir=`: directory for worker `.ll` fragments and logs + +Example: + +``` +$ mkdir -p /tmp/runnable-fragments +$ runnable-lift hello hello.ll \ + -dynamic-parallel \ + -parallel-workers=4 \ + -parallel-fragment-dir=/tmp/runnable-fragments \ + 2>hello.parallel.log +``` + +Artifacts: + +- coordinator output: `hello.ll` +- worker fragments: `/tmp/runnable-fragments/worker_.ll` +- worker stdout/stderr logs: + - `/tmp/runnable-fragments/worker_.ll.stdout.log` + - `/tmp/runnable-fragments/worker_.ll.stderr.log` + +Notes: + +- `-parallel-worker-mode` and `-parallel-seed-pc` are internal flags used by + worker subprocesses and should not be passed manually. +- This branch is still a prototype. The worker-fragment auto-merge helper + expected by `runnable-lift` is not upstreamed in this repository yet, so a + plain checkout of this branch will emit worker fragments but will not + automatically merge them back into the final top-level `.ll` output. + ## Experimental Evaluation From c70b1c97bf6740c235439581611921f63113ee2f Mon Sep 17 00:00:00 2001 From: iskindar Date: Fri, 1 May 2026 23:20:04 +0800 Subject: [PATCH 4/7] feat: upstream dynamic parallel merge helper --- README.md | 8 +- runnable/CMakeLists.txt | 5 +- .../scripts/_merge_dynamic_fragments_lib.py | 1816 +++++++++++++++++ .../merge_dynamic_runnable_fragments.py | 99 + .../tools/runnable-lift/CodeGenerator.cpp | 24 +- test/test_merge_dynamic_runnable_fragments.py | 110 + 6 files changed, 2052 insertions(+), 10 deletions(-) create mode 100644 runnable/scripts/_merge_dynamic_fragments_lib.py create mode 100644 runnable/scripts/merge_dynamic_runnable_fragments.py create mode 100644 test/test_merge_dynamic_runnable_fragments.py diff --git a/README.md b/README.md index d9334f77e..6de442302 100644 --- a/README.md +++ b/README.md @@ -107,10 +107,10 @@ Notes: - `-parallel-worker-mode` and `-parallel-seed-pc` are internal flags used by worker subprocesses and should not be passed manually. -- This branch is still a prototype. The worker-fragment auto-merge helper - expected by `runnable-lift` is not upstreamed in this repository yet, so a - plain checkout of this branch will emit worker fragments but will not - automatically merge them back into the final top-level `.ll` output. +- Successful worker fragments are merged back into the final top-level `.ll` + output with the repository helper `runnable/scripts/merge_dynamic_runnable_fragments.py`. +- This branch is still experimental. If fragment merge fails, the coordinator + output and worker `.ll` fragments are still left on disk for manual inspection. ## Experimental Evaluation diff --git a/runnable/CMakeLists.txt b/runnable/CMakeLists.txt index d72f5fe9c..29f842e62 100644 --- a/runnable/CMakeLists.txt +++ b/runnable/CMakeLists.txt @@ -159,10 +159,14 @@ configure_file(include/runnable/Runtime/commonconstants.h "${CMAKE_BINARY_DIR}/c configure_file(runtime/early-linked.c "${CMAKE_BINARY_DIR}/early-linked.c" COPYONLY) configure_file(scripts/runnable "${CMAKE_BINARY_DIR}/runnable" COPYONLY) configure_file(scripts/runnable-merge-dynamic "${CMAKE_BINARY_DIR}/runnable-merge-dynamic" COPYONLY) +configure_file(scripts/merge_dynamic_runnable_fragments.py "${CMAKE_BINARY_DIR}/merge_dynamic_runnable_fragments.py" COPYONLY) +configure_file(scripts/_merge_dynamic_fragments_lib.py "${CMAKE_BINARY_DIR}/_merge_dynamic_fragments_lib.py" COPYONLY) install(PROGRAMS scripts/runnable scripts/runnable-merge-dynamic DESTINATION bin) +install(PROGRAMS scripts/merge_dynamic_runnable_fragments.py DESTINATION share/runnable) install(FILES runtime/support.c DESTINATION share/runnable) install(FILES runtime/support.h DESTINATION share/runnable) install(FILES include/runnable/Runtime/commonconstants.h DESTINATION share/runnable) +install(FILES scripts/_merge_dynamic_fragments_lib.py DESTINATION share/runnable) # Remove -rdynamic set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS) @@ -171,4 +175,3 @@ set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS) install(EXPORT runnable NAMESPACE runnable:: DESTINATION share/runnable/cmake) - diff --git a/runnable/scripts/_merge_dynamic_fragments_lib.py b/runnable/scripts/_merge_dynamic_fragments_lib.py new file mode 100644 index 000000000..e23daee5c --- /dev/null +++ b/runnable/scripts/_merge_dynamic_fragments_lib.py @@ -0,0 +1,1816 @@ +#!/usr/bin/env python3 + +import argparse +import csv +import json +import os +import re +import shlex +import struct +import subprocess +import sys +import threading +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from dataclasses import asdict, dataclass +from pathlib import Path, PurePosixPath +from typing import Dict, Iterable, List, Optional, Tuple + +LL_COMMENT_RE = re.compile(r"^\s*;\s*(0x[0-9a-fA-F]+):(.*)$") +ROOT_LABEL_RE = re.compile(r"^([A-Za-z$._0-9-]+):") +ROOT_BLOCK_LABEL_RE = re.compile(r"^bb\.0x([0-9a-fA-F]+)") +ROOT_SYMBOLIC_BLOCK_RE = re.compile(r"^(bb\.[A-Za-z$._0-9-]+?)(?:\.0x([0-9a-fA-F]+))?(?:$|[._].*)") +ROOT_CASE_RE = re.compile(r"^\s*i64\s+(\d+),\s+label\s+%([A-Za-z$._0-9-]+)") +SWITCH_CASE_RE = re.compile(r"^\s*i\d+\s+(-?\d+),\s+label\s+%[A-Za-z$._0-9-]+") +ANON_BLOCK_LABEL_RE = re.compile(r"^\s*;\s*