From fe0329ba325a69526fc7bf37b643063f42b0b69c Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 28 May 2026 00:43:19 +0200 Subject: [PATCH] JIT: factor shared native-register bookkeeping Signed-off-by: Paul Guyot --- libs/jit/src/CMakeLists.txt | 10 + libs/jit/src/jit_aarch64.erl | 548 ++++-------- libs/jit/src/jit_arm32.erl | 923 +++++++++---------- libs/jit/src/jit_armv6m.erl | 988 ++++++++++----------- libs/jit/src/jit_backend_regs_impl.hrl | 137 +++ libs/jit/src/jit_regs.erl | 84 +- libs/jit/src/jit_riscv32.erl | 10 +- libs/jit/src/jit_riscv64.erl | 12 +- libs/jit/src/jit_riscv_impl.hrl | 853 ++++++++---------- libs/jit/src/jit_wasm32.erl | 150 ++-- libs/jit/src/jit_x86_64.erl | 598 +++++-------- libs/jit/src/jit_xtensa.erl | 1119 ++++++++++++------------ tests/libs/jit/jit_aarch64_tests.erl | 26 +- tests/libs/jit/jit_armv6m_tests.erl | 22 +- tests/libs/jit/jit_riscv32_tests.erl | 12 +- tests/libs/jit/jit_riscv64_tests.erl | 12 +- tests/libs/jit/jit_wasm32_tests.erl | 15 + 17 files changed, 2626 insertions(+), 2893 deletions(-) create mode 100644 libs/jit/src/jit_backend_regs_impl.hrl diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index 375a79f5ae..bca9324278 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -122,6 +122,16 @@ foreach(riscv_module jit_riscv32 jit_riscv64) ) endforeach() +# The register-based backends include the shared native-register bookkeeping in +# jit_backend_regs_impl.hrl, so changes to that header must force recompilation. +foreach(regs_module jit_x86_64 jit_aarch64 jit_arm32 jit_armv6m jit_xtensa jit_riscv32 jit_riscv64) + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/${regs_module}.beam + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jit_backend_regs_impl.hrl + APPEND + ) +endforeach() + set(JIT_VERSION ${ATOMVM_BASE_VERSION}) install( diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 02dcdc561d..b8f96d746b 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -156,8 +156,6 @@ offset :: non_neg_integer(), branches :: #{integer() | reference() => [{non_neg_integer(), non_neg_integer()}]}, jump_table_start :: non_neg_integer(), - available_regs :: non_neg_integer(), - used_regs :: non_neg_integer(), labels :: #{integer() | reference() => integer()}, variant :: non_neg_integer(), regs :: jit_regs:regs() @@ -289,11 +287,9 @@ new(Variant, StreamModule, Stream) -> branches = #{}, jump_table_start = 0, offset = StreamModule:offset(Stream), - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, labels = #{}, variant = Variant, - regs = jit_regs:new() + regs = jit_regs:new(?AVAILABLE_REGS_MASK, 0) }. %%----------------------------------------------------------------------------- @@ -338,70 +334,14 @@ debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> Stream1 = StreamModule:append(Stream0, jit_aarch64_asm:brk(0)), State#state{stream = Stream1}. -%%----------------------------------------------------------------------------- -%% @doc Return the list of currently used native registers. This is used for -%% debugging and not in production. -%% @end -%% @param State current backend state -%% @return The list of used registers -%%----------------------------------------------------------------------------- --spec used_regs(state()) -> [aarch64_register()]. -used_regs(#state{used_regs = Used}) -> mask_to_list(Used). - -%%----------------------------------------------------------------------------- -%% @doc Return the list of currently available native scratch registers. This -%% is used for debugging and not in production. -%% @end -%% @param State current backend state -%% @return The list of available registers -%%----------------------------------------------------------------------------- --spec available_regs(state()) -> [aarch64_register()]. -available_regs(#state{available_regs = Available}) -> mask_to_list(Available). - -%%----------------------------------------------------------------------------- -%% @doc Free native registers. The passed list of registers can contain -%% registers, pointer to registers or other values that are ignored. -%% @end -%% @param State current backend state -%% @param Regs list of registers or other values -%% @return The updated backend state -%%----------------------------------------------------------------------------- --spec free_native_registers(state(), [value()]) -> state(). -free_native_registers(State, []) -> - State; -free_native_registers(State, [Reg | Rest]) -> - State1 = free_native_register(State, Reg), - free_native_registers(State1, Rest). - --spec free_native_register(state(), value()) -> state(). -free_native_register( - #state{available_regs = Available0, used_regs = Used0} = State, - Reg -) when - is_atom(Reg) --> - Bit = reg_bit(Reg), - State#state{ - available_regs = Available0 bor Bit, - used_regs = Used0 band (bnot Bit) - }; -free_native_register(State, {ptr, Reg}) -> - free_native_register(State, Reg); -free_native_register(State, _Other) -> - State. - -%%----------------------------------------------------------------------------- -%% @doc Assert that all native scratch registers are available. This is used -%% for debugging and not in production. -%% @end -%% @param State current backend state -%% @return ok -%%----------------------------------------------------------------------------- --spec assert_all_native_free(state()) -> ok. -assert_all_native_free(State) -> - 0 = State#state.used_regs, - ?AVAILABLE_REGS_MASK = State#state.available_regs, - ok. +%% Native-register allocation bookkeeping (used_regs/1, available_regs/1, +%% free_native_registers/2, free_native_register/2, assert_all_native_free/1, +%% first_avail/1, mask_to_list/1, args_regs/1, prepare_call_scratch/1) is shared +%% across the register-based backends and flows through jit_regs. +-define(FIRST_AVAIL_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6]). +-define(MASK_TO_LIST_REGS, ?FIRST_AVAIL_REGS). +-define(JITSTATE_ARG_REG, ?JITSTATE_REG). +-include("jit_backend_regs_impl.hrl"). %%----------------------------------------------------------------------------- %% @doc Emit the jump table at the beginning of the module. Branches will be @@ -568,15 +508,8 @@ call_primitive_last( % We need a register for the function pointer that should not be used as a parameter % Since we're not returning, we can use all scratch registers except % registers used for parameters - ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), - ArgsRegs = args_regs(Args), - ArgsMask = jit_regs:regs_to_mask(ArgsRegs, fun reg_bit/1), - ParamMask = jit_regs:regs_to_mask(ParamRegs, fun reg_bit/1), - ScratchMask = ?AVAILABLE_REGS_MASK band (bnot (ArgsMask bor ParamMask)), - Temp = first_avail(ScratchMask), - TempBit = reg_bit(Temp), - AvailableRegs1 = ScratchMask band (bnot TempBit), - UsedRegs = ?AVAILABLE_REGS_MASK band (bnot AvailableRegs1), + #{temp := Temp, available_mask := AvailableRegs1, used_mask := UsedRegs} = + prepare_call_scratch(Args), PrepCall = case Primitive of 0 -> @@ -588,9 +521,9 @@ call_primitive_last( State1 = set_args( State0#state{ stream = Stream1, - available_regs = AvailableRegs1, - used_regs = UsedRegs, - regs = jit_regs:invalidate_reg(State0#state.regs, Temp) + regs = jit_regs:set_masks( + jit_regs:invalidate_reg(State0#state.regs, Temp), AvailableRegs1, UsedRegs + ) }, Args ), @@ -599,9 +532,9 @@ call_primitive_last( Stream3 = StreamModule:append(Stream2, Call), State1#state{ stream = Stream3, - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, - regs = jit_regs:unreachable(State1#state.regs) + regs = jit_regs:set_masks( + jit_regs:unreachable(State1#state.regs), ?AVAILABLE_REGS_MASK, 0 + ) }. %%----------------------------------------------------------------------------- @@ -618,8 +551,7 @@ return_if_not_equal_to_ctx( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0 + regs = Regs0 } = State, {free, Reg} ) -> @@ -635,12 +567,9 @@ return_if_not_equal_to_ctx( I2 = jit_aarch64_asm:bcc(eq, 4 + byte_size(I3) + byte_size(I4)), Stream1 = StreamModule:append(Stream0, <>), Bit = reg_bit(Reg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), State#state{ stream = Stream1, - available_regs = AvailableRegs1, - used_regs = UsedRegs1 + regs = jit_regs:free_reg(Regs0, Bit) }. %%----------------------------------------------------------------------------- @@ -698,10 +627,11 @@ jump_to_continuation( stream_module = StreamModule, stream = Stream0, offset = BaseOffset, - available_regs = Available + regs = Regs0 } = State, {free, OffsetReg} ) -> + Available = jit_regs:available_regs(Regs0), TempReg = first_avail(Available), % Calculate absolute address: native_code_base + target_offset % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) @@ -720,9 +650,9 @@ jump_to_continuation( % Free all registers since this is a tail jump State#state{ stream = Stream1, - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, - regs = jit_regs:unreachable(State#state.regs) + regs = jit_regs:set_masks( + jit_regs:unreachable(Regs0), ?AVAILABLE_REGS_MASK, 0 + ) }. %% @private @@ -780,9 +710,10 @@ if_block( Stream2, Replacements ), - State4 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State4#state{regs = MergedRegs}; + MergedRegs = jit_regs:merge( + State1#state.regs, State2#state.regs, ?AVAILABLE_REGS_MASK + ), + State2#state{stream = Stream3, regs = MergedRegs}; if_block( #state{stream_module = StreamModule, stream = Stream0} = State0, Cond, @@ -797,9 +728,10 @@ if_block( BranchOffset = OffsetAfter - (Offset + BranchInstrOffset), NewBranchInstr = rewrite_branch_instruction(CC, BranchOffset), Stream3 = StreamModule:replace(Stream2, Offset + BranchInstrOffset, NewBranchInstr), - State3 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State3#state{regs = MergedRegs}. + MergedRegs = jit_regs:merge( + State1#state.regs, State2#state.regs, ?AVAILABLE_REGS_MASK + ), + State2#state{stream = Stream3, regs = MergedRegs}. %%----------------------------------------------------------------------------- %% @doc Emit an if else block, i.e. emit a test of a condition and @@ -836,8 +768,6 @@ if_else_block( %% Build the else block StateElse = State2#state{ stream = Stream4, - used_regs = State1#state.used_regs, - available_regs = State1#state.available_regs, regs = State1#state.regs }, State3 = BlockFalseFn(StateElse), @@ -847,9 +777,10 @@ if_else_block( FinalJumpOffset = OffsetFinal - ElseJumpOffset, NewElseJumpInstr = jit_aarch64_asm:b(FinalJumpOffset), Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), - State4 = merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs), - MergedRegs = jit_regs:merge(State2#state.regs, State3#state.regs), - State4#state{regs = MergedRegs}. + MergedRegs = jit_regs:merge( + State2#state.regs, State3#state.regs, ?AVAILABLE_REGS_MASK + ), + State3#state{stream = Stream6, regs = MergedRegs}. %% @private -spec if_block_cond(state(), condition()) -> @@ -1110,10 +1041,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available + regs = Regs0 } = State0, {RegOrTuple, '&', Val, '!=', 0} ) -> + Available = jit_regs:available_regs(Regs0), Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1144,10 +1076,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available + regs = Regs0 } = State0, {Reg, '&', Mask, '!=', Val} ) when ?IS_GPR(Reg) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), % AND with mask OffsetBefore = StreamModule:offset(Stream0), @@ -1186,23 +1119,15 @@ if_block_cond( %% @private -spec if_block_free_reg(aarch64_register() | {free, aarch64_register()}, state()) -> state(). if_block_free_reg({free, Reg}, State0) -> - #state{available_regs = AvR0, used_regs = UR0} = State0, + #state{regs = Regs0} = State0, Bit = reg_bit(Reg), - AvR1 = AvR0 bor Bit, - UR1 = UR0 band (bnot Bit), State0#state{ - available_regs = AvR1, - used_regs = UR1 + regs = jit_regs:free_reg(Regs0, Bit) }; if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> State0. %% @private --spec merge_used_regs(state(), non_neg_integer()) -> state(). -merge_used_regs(#state{used_regs = UR} = State, OtherUR) -> - MergedUR = UR bor OtherUR, - MergedAvail = ?AVAILABLE_REGS_MASK band (bnot MergedUR), - State#state{used_regs = MergedUR, available_regs = MergedAvail}. %%----------------------------------------------------------------------------- %% @doc Emit a shift register right by a fixed number of bits, effectively @@ -1227,8 +1152,6 @@ shift_right( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = Used, regs = Regs0 } = State, Reg, @@ -1236,6 +1159,7 @@ shift_right( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + Available = jit_regs:available_regs(Regs0), ResultReg = first_avail(Available), Bit = reg_bit(ResultReg), I = jit_aarch64_asm:lsr(ResultReg, Reg, Shift), @@ -1244,9 +1168,7 @@ shift_right( { State#state{ stream = Stream1, - available_regs = Available band (bnot Bit), - used_regs = Used bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, ResultReg }. @@ -1266,8 +1188,6 @@ shift_right_arith( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = Used, regs = Regs0 } = State, Reg, @@ -1275,6 +1195,7 @@ shift_right_arith( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + Available = jit_regs:available_regs(Regs0), ResultReg = first_avail(Available), Bit = reg_bit(ResultReg), I = jit_aarch64_asm:asr(ResultReg, Reg, Shift), @@ -1283,9 +1204,7 @@ shift_right_arith( { State#state{ stream = Stream1, - available_regs = Available band (bnot Bit), - used_regs = Used bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, ResultReg }. @@ -1324,12 +1243,13 @@ call_func_ptr( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0 + regs = Regs0 } = State0, FuncPtrTuple, Args ) -> + AvailableRegs0 = jit_regs:available_regs(Regs0), + UsedRegs0 = jit_regs:used_regs(Regs0), FreeRegs = lists:flatmap( fun ({free, ?IP0_REG}) -> []; @@ -1385,14 +1305,12 @@ call_func_ptr( ResultBit = reg_bit(ResultReg), AvailableRegs2 = AvailableRegs1 band (bnot ResultBit), AvailableRegs3 = AvailableRegs2 band ?AVAILABLE_REGS_MASK, - Regs1 = jit_regs:invalidate_all(State0#state.regs), + Regs1 = jit_regs:invalidate_all(Regs0), UsedRegs2 = UsedRegs1 bor ResultBit, { State1#state{ stream = Stream6, - available_regs = AvailableRegs3, - used_regs = UsedRegs2, - regs = Regs1 + regs = jit_regs:set_masks(Regs1, AvailableRegs3, UsedRegs2) }, ResultReg }. @@ -1423,8 +1341,9 @@ pop_registers(false, [RegB, RegA | Tail], StreamModule, Stream0) -> %% @private -spec set_args(state(), [arg()]) -> state(). set_args( - #state{stream = Stream0, stream_module = StreamModule, used_regs = UsedRegs} = State0, Args + #state{stream = Stream0, stream_module = StreamModule, regs = Regs0} = State0, Args ) -> + UsedRegs = jit_regs:used_regs(Regs0), ParamRegs = parameter_regs(Args), ArgsRegs = args_regs(Args), ParamMask = jit_regs:regs_to_mask(ParamRegs, fun reg_bit/1), @@ -1453,8 +1372,11 @@ set_args( ), State0#state{ stream = Stream1, - available_regs = ?AVAILABLE_REGS_MASK band (bnot (ParamMask bor NewUsedMask)), - used_regs = ParamMask bor NewUsedMask + regs = jit_regs:set_masks( + Regs0, + ?AVAILABLE_REGS_MASK band (bnot (ParamMask bor NewUsedMask)), + ParamMask bor NewUsedMask + ) }. %% @private @@ -1626,10 +1548,11 @@ move_to_vm_register_emit(State0, Src, {ptr, Reg}) when is_atom(Src) -> Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State0#state{stream = Stream1}; move_to_vm_register_emit( - #state{available_regs = Available, regs = Regs0} = State0, Src, {y_reg, Y} + #state{regs = Regs0} = State0, Src, {y_reg, Y} ) when is_atom(Src) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), I2 = jit_aarch64_asm:str(Src, {Temp, Y * ?WORD_SIZE}), @@ -1639,71 +1562,41 @@ move_to_vm_register_emit( % Source is an integer move_to_vm_register_emit(State, 0, Dest) -> move_to_vm_register_emit(State, xzr, Dest); -move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when +move_to_vm_register_emit(#state{regs = Regs0} = State0, N, Dest) when is_integer(N) -> - Temp = first_avail(AR0), - TempBit = reg_bit(Temp), - AT = AR0 band (bnot TempBit), - I1 = jit_aarch64_asm:mov(Temp, N), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register_emit( - State0#state{stream = Stream1, available_regs = AT}, Temp, Dest - ), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {imm, N}), - State1#state{available_regs = AR0, regs = Regs1}; + with_temp(State0, Dest, fun(Temp) -> + {jit_aarch64_asm:mov(Temp, N), jit_regs:set_contents(Regs0, Temp, {imm, N})} + end); % Source is a VM register -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, extra}, Dest) -> - Temp = first_avail(AR0), - TempBit = reg_bit(Temp), - AT = AR0 band (bnot TempBit), - I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(?MAX_REG)), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register_emit( - State0#state{stream = Stream1, available_regs = AT}, Temp, Dest - ), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, ?MAX_REG}), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, X}, Dest) -> - Temp = first_avail(AR0), - TempBit = reg_bit(Temp), - AT = AR0 band (bnot TempBit), - I1 = jit_aarch64_asm:ldr(Temp, ?X_REG(X)), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register_emit( - State0#state{stream = Stream1, available_regs = AT}, Temp, Dest - ), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, X}), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest) -> - Temp = first_avail(AR0), - TempBit = reg_bit(Temp), - AT = AR0 band (bnot TempBit), - I1 = jit_aarch64_asm:ldr(Temp, {Reg, 0}), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register_emit( - State0#state{stream = Stream1, available_regs = AT}, Temp, Dest - ), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {y_reg, Y}, Dest) -> - Temp = first_avail(AR0), - TempBit = reg_bit(Temp), - AT = AR0 band (bnot TempBit), - I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), - I2 = jit_aarch64_asm:ldr(Temp, {Temp, Y * ?WORD_SIZE}), - Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), - State1 = move_to_vm_register_emit( - State0#state{stream = Stream1, available_regs = AT}, Temp, Dest - ), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {y_reg, Y}), - State1#state{available_regs = AR0, regs = Regs1}; +move_to_vm_register_emit(#state{regs = Regs0} = State0, {x_reg, extra}, Dest) -> + with_temp(State0, Dest, fun(Temp) -> + { + jit_aarch64_asm:ldr(Temp, ?X_REG(?MAX_REG)), + jit_regs:set_contents(Regs0, Temp, {x_reg, ?MAX_REG}) + } + end); +move_to_vm_register_emit(#state{regs = Regs0} = State0, {x_reg, X}, Dest) -> + with_temp(State0, Dest, fun(Temp) -> + {jit_aarch64_asm:ldr(Temp, ?X_REG(X)), jit_regs:set_contents(Regs0, Temp, {x_reg, X})} + end); +move_to_vm_register_emit(#state{regs = Regs0} = State0, {ptr, Reg}, Dest) -> + with_temp(State0, Dest, fun(Temp) -> + {jit_aarch64_asm:ldr(Temp, {Reg, 0}), jit_regs:invalidate_reg(Regs0, Temp)} + end); +move_to_vm_register_emit(#state{regs = Regs0} = State0, {y_reg, Y}, Dest) -> + with_temp(State0, Dest, fun(Temp) -> + I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), + I2 = jit_aarch64_asm:ldr(Temp, {Temp, Y * ?WORD_SIZE}), + {<>, jit_regs:set_contents(Regs0, Temp, {y_reg, Y})} + end); % term_to_float move_to_vm_register_emit( - #state{stream_module = StreamModule, available_regs = Available, stream = Stream0} = State0, + #state{stream_module = StreamModule, regs = Regs0, stream = Stream0} = State0, {free, {ptr, Reg, 1}}, {fp_reg, F} ) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), I1 = jit_aarch64_asm:ldr(Reg, {Reg, ?WORD_SIZE}), I2 = jit_aarch64_asm:ldr(Temp, ?FP_REGS), @@ -1714,6 +1607,29 @@ move_to_vm_register_emit( Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), State1#state{stream = Stream1, regs = Regs1}. +-spec with_temp( + state(), + vm_register(), + fun((aarch64_register()) -> {binary(), jit_regs:regs()}) +) -> state(). +with_temp( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, Dest, EmitFn +) -> + AR0 = jit_regs:available_regs(Regs0), + Temp = first_avail(AR0), + TempBit = reg_bit(Temp), + {Code, Regs1} = EmitFn(Temp), + Stream1 = StreamModule:append(Stream0, Code), + State1 = move_to_vm_register_emit( + State0#state{ + stream = Stream1, + regs = jit_regs:set_available_regs(Regs1, AR0 band (bnot TempBit)) + }, + Temp, + Dest + ), + State1#state{regs = jit_regs:set_available_regs(State1#state.regs, AR0)}. + %%----------------------------------------------------------------------------- %% @doc Emit a move of an array element (reg[x]) to a vm or a native register. %% @end @@ -1730,12 +1646,13 @@ move_to_vm_register_emit( vm_register() | aarch64_register() ) -> state(). move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), I1 = jit_aarch64_asm:ldr(Temp, {Reg, Index * ?WORD_SIZE}), I2 = jit_aarch64_asm:str(Temp, ?X_REG(X)), @@ -1744,12 +1661,13 @@ move_array_element( Regs2 = jit_regs:set_contents(Regs1, Temp, {x_reg, X}), State#state{stream = Stream1, regs = Regs2}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {ptr, Dest} ) when is_atom(Reg) andalso is_integer(Index) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), I1 = jit_aarch64_asm:ldr(Temp, {Reg, Index * ?WORD_SIZE}), I2 = jit_aarch64_asm:str(Temp, {Dest, 0}), @@ -1757,12 +1675,13 @@ move_array_element( Regs1 = jit_regs:invalidate_reg(Regs0, Temp), State#state{stream = Stream1, regs = Regs1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {y_reg, Y} ) when is_atom(Reg) andalso is_integer(Index) -> + Available = jit_regs:available_regs(Regs0), Temp1 = first_avail(Available), Bit1 = reg_bit(Temp1), Avail1 = Available band (bnot Bit1), @@ -1777,12 +1696,13 @@ move_array_element( Regs3 = jit_regs:invalidate_reg(Regs2, Temp2), State#state{stream = Stream1, regs = Regs3}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {free, Reg}, Index, {y_reg, Y} ) when is_integer(Index) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), I2 = jit_aarch64_asm:ldr(Reg, {Reg, Index * ?WORD_SIZE}), @@ -1804,8 +1724,6 @@ move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, @@ -1815,23 +1733,17 @@ move_array_element( I1 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), I2 = jit_aarch64_asm:str(IndexReg, ?X_REG(X)), Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_vm_loc(Regs0, {x_reg, X}), Regs2 = jit_regs:invalidate_reg(Regs1, IndexReg), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, - regs = Regs2 + regs = jit_regs:free_reg(Regs2, Bit) }; move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, @@ -1841,35 +1753,28 @@ move_array_element( I1 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), I2 = jit_aarch64_asm:str(IndexReg, {PtrReg, 0}), Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_reg(Regs0, IndexReg), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, - regs = Regs1 + regs = jit_regs:free_reg(Regs1, Bit) }; move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, {free, IndexReg}, {y_reg, Y} ) when ?IS_GPR(IndexReg) -> + AvailableRegs0 = jit_regs:available_regs(Regs0), Temp = first_avail(AvailableRegs0), I1 = jit_aarch64_asm:ldr(Temp, ?Y_REGS), I2 = jit_aarch64_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 3}), I3 = jit_aarch64_asm:str(IndexReg, {Temp, Y * ?WORD_SIZE}), Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append( Stream0, <> ), @@ -1877,10 +1782,8 @@ move_array_element( Regs2 = jit_regs:invalidate_reg(Regs1, Temp), Regs3 = jit_regs:invalidate_reg(Regs2, IndexReg), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, - regs = Regs3 + regs = jit_regs:free_reg(Regs3, Bit) }. %%----------------------------------------------------------------------------- @@ -1912,13 +1815,12 @@ get_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, Index ) -> + Available = jit_regs:available_regs(Regs0), ElemReg = first_avail(Available), Bit = reg_bit(ElemReg), I1 = jit_aarch64_asm:ldr(ElemReg, {Reg, Index * ?WORD_SIZE}), @@ -1927,9 +1829,7 @@ get_array_element( { State#state{ stream = Stream1, - available_regs = Available band (bnot Bit), - used_regs = UsedRegs0 bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, ElemReg }. @@ -1998,13 +1898,14 @@ move_to_array_element( ) when is_integer(IndexVal) andalso is_integer(Offset) -> move_to_array_element(State, Value, BaseReg, IndexVal + Offset); move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, ValueReg, BaseReg, IndexReg, Offset ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), I1 = jit_aarch64_asm:add(Temp, IndexReg, Offset), I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, Temp, lsl, 3}), @@ -2019,7 +1920,7 @@ move_to_array_element( Offset ) -> {State1, ValueReg} = copy_to_native_register(State0, Value), - Temp = first_avail(State1#state.available_regs), + Temp = first_avail(jit_regs:available_regs(State1#state.regs)), I1 = jit_aarch64_asm:add(Temp, IndexReg, Offset), I2 = jit_aarch64_asm:str(ValueReg, {BaseReg, Temp, lsl, 3}), Stream1 = (State1#state.stream_module):append(State1#state.stream, <>), @@ -2044,16 +1945,17 @@ move_to_native_register(#state{regs = Regs} = State, Value) -> case Contents =/= unknown andalso jit_regs:find_reg_with_contents(Regs, Contents) of {ok, CachedReg} -> Bit = reg_bit(CachedReg), - case State#state.used_regs band Bit of + CurUsed = jit_regs:used_regs(Regs), + CurAvail = jit_regs:available_regs(Regs), + case CurUsed band Bit of 0 -> - case State#state.available_regs band Bit of + case CurAvail band Bit of 0 -> move_to_native_register_emit(State, Value, Contents); _ -> { State#state{ - used_regs = State#state.used_regs bor Bit, - available_regs = State#state.available_regs band (bnot Bit) + regs = jit_regs:alloc_reg(Regs, Bit) }, CachedReg } @@ -2069,13 +1971,12 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = Used, regs = Regs0 } = State, cp, Contents ) -> + Available = jit_regs:available_regs(Regs0), Reg = first_avail(Available), Bit = reg_bit(Reg), I1 = jit_aarch64_asm:ldr(Reg, ?CP), @@ -2084,9 +1985,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor Bit, - available_regs = Available band (bnot Bit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }; @@ -2103,8 +2002,6 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = Used, regs = Regs0 } = State, Imm, @@ -2112,6 +2009,7 @@ move_to_native_register_emit( ) when is_integer(Imm) -> + Available = jit_regs:available_regs(Regs0), Reg = first_avail(Available), Bit = reg_bit(Reg), I1 = jit_aarch64_asm:mov(Reg, Imm), @@ -2120,9 +2018,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor Bit, - available_regs = Available band (bnot Bit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }; @@ -2130,13 +2026,12 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = Used, regs = Regs0 } = State, {x_reg, extra}, Contents ) -> + Available = jit_regs:available_regs(Regs0), Reg = first_avail(Available), Bit = reg_bit(Reg), I1 = jit_aarch64_asm:ldr(Reg, ?X_REG(?MAX_REG)), @@ -2145,9 +2040,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor Bit, - available_regs = Available band (bnot Bit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }; @@ -2155,8 +2048,6 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = Used, regs = Regs0 } = State, {x_reg, X}, @@ -2164,6 +2055,7 @@ move_to_native_register_emit( ) when X < ?MAX_REG -> + Available = jit_regs:available_regs(Regs0), Reg = first_avail(Available), Bit = reg_bit(Reg), I1 = jit_aarch64_asm:ldr(Reg, ?X_REG(X)), @@ -2172,9 +2064,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor Bit, - available_regs = Available band (bnot Bit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }; @@ -2182,13 +2072,12 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = Used, regs = Regs0 } = State, {y_reg, Y}, Contents ) -> + Available = jit_regs:available_regs(Regs0), Reg = first_avail(Available), Bit = reg_bit(Reg), I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS), @@ -2199,9 +2088,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - available_regs = Available band (bnot Bit), - used_regs = Used bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }. @@ -2280,12 +2167,11 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = Used, regs = Regs0 } = State, Reg ) when is_atom(Reg) -> + Available = jit_regs:available_regs(Regs0), SaveReg = first_avail(Available), Bit = reg_bit(SaveReg), I1 = jit_aarch64_asm:mov(SaveReg, Reg), @@ -2295,9 +2181,7 @@ copy_to_native_register( { State#state{ stream = Stream1, - available_regs = Available band (bnot Bit), - used_regs = Used bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, SaveReg }; @@ -2305,12 +2189,11 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = Used, regs = Regs0 } = State, {ptr, Reg} ) when is_atom(Reg) -> + Available = jit_regs:available_regs(Regs0), SaveReg = first_avail(Available), Bit = reg_bit(SaveReg), I1 = jit_aarch64_asm:ldr(SaveReg, {Reg, 0}), @@ -2319,9 +2202,7 @@ copy_to_native_register( { State#state{ stream = Stream1, - available_regs = Available band (bnot Bit), - used_regs = Used bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, SaveReg }; @@ -2337,10 +2218,10 @@ copy_to_native_register(State, Reg) -> %%----------------------------------------------------------------------------- -spec move_to_cp(state(), vm_register()) -> state(). move_to_cp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {y_reg, Y} ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS), I2 = jit_aarch64_asm:ldr(Reg, {Reg, Y * ?WORD_SIZE}), @@ -2359,10 +2240,10 @@ move_to_cp( %%----------------------------------------------------------------------------- -spec increment_sp(state(), integer()) -> state(). increment_sp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Offset ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), I1 = jit_aarch64_asm:ldr(Reg, ?Y_REGS), I2 = jit_aarch64_asm:add(Reg, Reg, Offset * ?WORD_SIZE), @@ -2385,13 +2266,13 @@ set_continuation_to_label( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, branches = Branches, labels = Labels, regs = Regs0 } = State, Label ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Offset = StreamModule:offset(Stream0), Regs1 = jit_regs:invalidate_reg(Regs0, Temp), @@ -2430,11 +2311,11 @@ set_continuation_to_offset( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, branches = Branches, regs = Regs0 } = State ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), @@ -2476,11 +2357,10 @@ get_module_index( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UsedRegs0, regs = Regs0 } = State ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), I1 = jit_aarch64_asm:ldr(Reg, ?JITSTATE_MODULE), @@ -2491,9 +2371,7 @@ get_module_index( { State#state{ stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = UsedRegs0 bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }. @@ -2507,7 +2385,7 @@ op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, Reg, StreamModule:append(Stream0, I) catch error:{unencodable_immediate, Val} -> - Temp = first_avail(State#state.available_regs), + Temp = first_avail(jit_regs:available_regs(State#state.regs)), I1 = jit_aarch64_asm:mov(Temp, Val), I2 = jit_aarch64_asm:Op(Reg, Reg, Temp), StreamModule:append(Stream0, <>) @@ -2550,21 +2428,29 @@ and_(#state{regs = Regs0} = State, {free, Reg}, Val) -> Regs1 = jit_regs:invalidate_reg(Regs0, Reg), {NewState#state{regs = Regs1}, Reg}; and_( - #state{available_regs = Avail, used_regs = UR, regs = Regs0} = State, + #state{regs = Regs0} = State, Reg, Val ) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), Bit = reg_bit(ResultReg), NewState = op_imm( - State#state{available_regs = Avail band (bnot Bit), used_regs = UR bor Bit}, + State#state{ + regs = jit_regs:alloc_reg(Regs0, Bit) + }, and_, ResultReg, Reg, Val ), Regs1 = jit_regs:invalidate_reg(Regs0, ResultReg), - {NewState#state{regs = Regs1}, ResultReg}. + NewRegs = jit_regs:set_masks( + Regs1, + jit_regs:available_regs(NewState#state.regs), + jit_regs:used_regs(NewState#state.regs) + ), + {NewState#state{regs = NewRegs}, ResultReg}. %%----------------------------------------------------------------------------- %% @doc Perform bitwise OR of a register with an immediate value. @@ -2647,7 +2533,8 @@ mul(State, _Reg, 1) -> State; mul(State, Reg, 2) -> shift_left(State, Reg, 1); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 3) -> +mul(#state{regs = Regs0} = State, Reg, 3) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_aarch64_asm:lsl(Temp, Reg, 1), I2 = jit_aarch64_asm:add(Reg, Temp, Reg), @@ -2656,7 +2543,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 3) -> State#state{stream = Stream1, regs = Regs1}; mul(State, Reg, 4) -> shift_left(State, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 5) -> +mul(#state{regs = Regs0} = State, Reg, 5) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_aarch64_asm:lsl(Temp, Reg, 2), I2 = jit_aarch64_asm:add(Reg, Temp, Reg), @@ -2666,7 +2554,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 5) -> mul(State0, Reg, 6) -> State1 = mul(State0, Reg, 3), mul(State1, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 7) -> +mul(#state{regs = Regs0} = State, Reg, 7) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_aarch64_asm:lsl(Temp, Reg, 3), I2 = jit_aarch64_asm:sub(Reg, Temp, Reg), @@ -2675,7 +2564,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 7) -> State#state{stream = Stream1, regs = Regs1}; mul(State, Reg, 8) -> shift_left(State, Reg, 3); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 9) -> +mul(#state{regs = Regs0} = State, Reg, 9) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_aarch64_asm:lsl(Temp, Reg, 3), I2 = jit_aarch64_asm:add(Reg, Temp, Reg), @@ -2685,7 +2575,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 9) -> mul(State0, Reg, 10) -> State1 = mul(State0, Reg, 5), mul(State1, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 15) -> +mul(#state{regs = Regs0} = State, Reg, 15) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_aarch64_asm:lsl(Temp, Reg, 4), I2 = jit_aarch64_asm:sub(Reg, Temp, Reg), @@ -2699,11 +2590,12 @@ mul(State, Reg, 32) -> mul(State, Reg, 64) -> shift_left(State, Reg, 6); mul( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Val ) when is_integer(Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_aarch64_asm:mov(Temp, Val), I2 = jit_aarch64_asm:mul(Reg, Reg, Temp), @@ -2731,11 +2623,12 @@ div_( -spec rem_(state(), aarch64_register(), aarch64_register()) -> {state(), aarch64_register()}. rem_( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DividendReg, DivisorReg ) -> + Avail = jit_regs:available_regs(Regs0), %% rem = dividend - (dividend / divisor) * divisor %% Use msub: Rd = Ra - (Rn * Rm) %% First sdiv into a temp, then msub @@ -2756,9 +2649,10 @@ rem_( %%----------------------------------------------------------------------------- -spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). decrement_reductions_and_maybe_schedule_next( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0 ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Regs1 = jit_regs:invalidate_reg(Regs0, Temp), % Load reduction count @@ -2787,9 +2681,8 @@ decrement_reductions_and_maybe_schedule_next( Stream4 = StreamModule:replace( Stream3, BNEOffset, <> ), - State3 = merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs), %% schedule_next clobbers caller-saved regs; invalidate cache at continuation. - State3#state{regs = jit_regs:invalidate_all(State1#state.regs)}. + State2#state{stream = Stream4, regs = jit_regs:invalidate_all(State1#state.regs)}. %%----------------------------------------------------------------------------- %% @doc Emit a call to a label with automatic scheduling. Decrements reductions @@ -2822,10 +2715,11 @@ call_only_or_schedule_next( stream = Stream0, branches = Branches, labels = Labels, - available_regs = Avail + regs = Regs0 } = State0, Label ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), % Load reduction count I1 = jit_aarch64_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT), @@ -2984,78 +2878,6 @@ reg_bit(r15) -> ?REG_BIT_R15; reg_bit(r16) -> ?REG_BIT_R16; reg_bit(r17) -> ?REG_BIT_R17. -%% first_avail returns the first available register from a bitmask. -%% Order: [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6] -first_avail(Mask) when Mask band ?REG_BIT_R7 =/= 0 -> r7; -first_avail(Mask) when Mask band ?REG_BIT_R8 =/= 0 -> r8; -first_avail(Mask) when Mask band ?REG_BIT_R9 =/= 0 -> r9; -first_avail(Mask) when Mask band ?REG_BIT_R10 =/= 0 -> r10; -first_avail(Mask) when Mask band ?REG_BIT_R11 =/= 0 -> r11; -first_avail(Mask) when Mask band ?REG_BIT_R12 =/= 0 -> r12; -first_avail(Mask) when Mask band ?REG_BIT_R13 =/= 0 -> r13; -first_avail(Mask) when Mask band ?REG_BIT_R14 =/= 0 -> r14; -first_avail(Mask) when Mask band ?REG_BIT_R15 =/= 0 -> r15; -first_avail(Mask) when Mask band ?REG_BIT_R3 =/= 0 -> r3; -first_avail(Mask) when Mask band ?REG_BIT_R4 =/= 0 -> r4; -first_avail(Mask) when Mask band ?REG_BIT_R5 =/= 0 -> r5; -first_avail(Mask) when Mask band ?REG_BIT_R6 =/= 0 -> r6. - -%% Convert bitmask to list, matching the order -mask_to_list(0) -> []; -mask_to_list(Mask) -> mask_to_list_r7(Mask). - -mask_to_list_r7(Mask) when Mask band ?REG_BIT_R7 =/= 0 -> [r7 | mask_to_list_r8(Mask)]; -mask_to_list_r7(Mask) -> mask_to_list_r8(Mask). -mask_to_list_r8(Mask) when Mask band ?REG_BIT_R8 =/= 0 -> [r8 | mask_to_list_r9(Mask)]; -mask_to_list_r8(Mask) -> mask_to_list_r9(Mask). -mask_to_list_r9(Mask) when Mask band ?REG_BIT_R9 =/= 0 -> [r9 | mask_to_list_r10(Mask)]; -mask_to_list_r9(Mask) -> mask_to_list_r10(Mask). -mask_to_list_r10(Mask) when Mask band ?REG_BIT_R10 =/= 0 -> [r10 | mask_to_list_r11(Mask)]; -mask_to_list_r10(Mask) -> mask_to_list_r11(Mask). -mask_to_list_r11(Mask) when Mask band ?REG_BIT_R11 =/= 0 -> [r11 | mask_to_list_r12(Mask)]; -mask_to_list_r11(Mask) -> mask_to_list_r12(Mask). -mask_to_list_r12(Mask) when Mask band ?REG_BIT_R12 =/= 0 -> [r12 | mask_to_list_r13(Mask)]; -mask_to_list_r12(Mask) -> mask_to_list_r13(Mask). -mask_to_list_r13(Mask) when Mask band ?REG_BIT_R13 =/= 0 -> [r13 | mask_to_list_r14(Mask)]; -mask_to_list_r13(Mask) -> mask_to_list_r14(Mask). -mask_to_list_r14(Mask) when Mask band ?REG_BIT_R14 =/= 0 -> [r14 | mask_to_list_r15(Mask)]; -mask_to_list_r14(Mask) -> mask_to_list_r15(Mask). -mask_to_list_r15(Mask) when Mask band ?REG_BIT_R15 =/= 0 -> [r15 | mask_to_list_r3(Mask)]; -mask_to_list_r15(Mask) -> mask_to_list_r3(Mask). -mask_to_list_r3(Mask) when Mask band ?REG_BIT_R3 =/= 0 -> [r3 | mask_to_list_r4(Mask)]; -mask_to_list_r3(Mask) -> mask_to_list_r4(Mask). -mask_to_list_r4(Mask) when Mask band ?REG_BIT_R4 =/= 0 -> [r4 | mask_to_list_r5(Mask)]; -mask_to_list_r4(Mask) -> mask_to_list_r5(Mask). -mask_to_list_r5(Mask) when Mask band ?REG_BIT_R5 =/= 0 -> [r5 | mask_to_list_r6(Mask)]; -mask_to_list_r5(Mask) -> mask_to_list_r6(Mask). -mask_to_list_r6(Mask) when Mask band ?REG_BIT_R6 =/= 0 -> [r6]; -mask_to_list_r6(_Mask) -> []. - -%% @private --spec args_regs([arg()]) -> [aarch64_register() | imm]. -args_regs(Args) -> - lists:map( - fun - ({free, {ptr, Reg}}) -> Reg; - ({free, Reg}) when is_atom(Reg) -> Reg; - ({free, Imm}) when is_integer(Imm) -> imm; - (offset) -> imm; - (ctx) -> ?CTX_REG; - (jit_state) -> ?JITSTATE_REG; - (Reg) when is_atom(Reg) -> Reg; - (Imm) when is_integer(Imm) -> imm; - ({ptr, Reg}) -> Reg; - ({x_reg, _}) -> ?CTX_REG; - ({y_reg, _}) -> ?CTX_REG; - ({fp_reg, _}) -> ?CTX_REG; - ({free, {x_reg, _}}) -> ?CTX_REG; - ({free, {y_reg, _}}) -> ?CTX_REG; - ({free, {fp_reg, _}}) -> ?CTX_REG; - ({avm_int64_t, _}) -> imm - end, - Args - ). - %%----------------------------------------------------------------------------- %% @doc Add a label at the current offset %% @end diff --git a/libs/jit/src/jit_arm32.erl b/libs/jit/src/jit_arm32.erl index 281d7fb276..324df7ea56 100644 --- a/libs/jit/src/jit_arm32.erl +++ b/libs/jit/src/jit_arm32.erl @@ -143,8 +143,6 @@ offset :: non_neg_integer(), branches :: #{integer() | reference() => [{non_neg_integer(), non_neg_integer()}]}, jump_table_start :: non_neg_integer(), - available_regs :: non_neg_integer(), - used_regs :: non_neg_integer(), labels :: #{integer() | reference() => integer()}, variant :: non_neg_integer(), literal_pool :: [{non_neg_integer(), arm32_register(), non_neg_integer()}], @@ -292,12 +290,10 @@ new(Variant, StreamModule, Stream) -> branches = #{}, jump_table_start = 0, offset = StreamModule:offset(Stream), - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, labels = #{}, variant = Variant, literal_pool = [], - regs = jit_regs:new() + regs = jit_regs:new(?AVAILABLE_REGS_MASK, 0) }. %%----------------------------------------------------------------------------- @@ -342,69 +338,14 @@ debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> Stream1 = StreamModule:append(Stream0, jit_arm32_asm:bkpt(0)), State#state{stream = Stream1}. -%%----------------------------------------------------------------------------- -%% @doc Return the list of currently used native registers. This is used for -%% debugging and not in production. -%% @end -%% @param State current backend state -%% @return The list of used registers -%%----------------------------------------------------------------------------- --spec used_regs(state()) -> [arm32_register()]. -used_regs(#state{used_regs = Used}) -> mask_to_list(Used). - -%%----------------------------------------------------------------------------- -%% @doc Return the list of currently available native scratch registers. This -%% is used for debugging and not in production. -%% @end -%% @param State current backend state -%% @return The list of available registers -%%----------------------------------------------------------------------------- --spec available_regs(state()) -> [arm32_register()]. -available_regs(#state{available_regs = Available}) -> mask_to_list(Available). - -%%----------------------------------------------------------------------------- -%% @doc Free native registers. The passed list of registers can contain -%% registers, pointer to registers or other values that are ignored. -%% @end -%% @param State current backend state -%% @param Regs list of registers or other values -%% @return The updated backend state -%%----------------------------------------------------------------------------- --spec free_native_registers(state(), [value()]) -> state(). -free_native_registers(State, []) -> - State; -free_native_registers(State, [Reg | Rest]) -> - State1 = free_native_register(State, Reg), - free_native_registers(State1, Rest). - --spec free_native_register(state(), value()) -> state(). -free_native_register( - #state{available_regs = Available0, used_regs = Used0} = State, - Reg -) when - is_atom(Reg) --> - Bit = reg_bit(Reg), - State#state{ - available_regs = Available0 bor Bit, used_regs = Used0 band (bnot Bit) - }; -free_native_register(State, {ptr, Reg}) -> - free_native_register(State, Reg); -free_native_register(State, _Other) -> - State. - -%%----------------------------------------------------------------------------- -%% @doc Assert that all native scratch registers are available. This is used -%% for debugging and not in production. -%% @end -%% @param State current backend state -%% @return ok -%%----------------------------------------------------------------------------- --spec assert_all_native_free(state()) -> ok. -assert_all_native_free(State) -> - 0 = State#state.used_regs, - ?AVAILABLE_REGS_MASK = State#state.available_regs, - ok. +%% Native-register allocation bookkeeping (used_regs/1, available_regs/1, +%% free_native_registers/2, free_native_register/2, assert_all_native_free/1, +%% first_avail/1, mask_to_list/1, args_regs/1, prepare_call_scratch/1) is shared +%% across the register-based backends and flows through jit_regs. +-define(FIRST_AVAIL_REGS, ?AVAILABLE_REGS). +-define(MASK_TO_LIST_REGS, ?FIRST_AVAIL_REGS). +-define(JITSTATE_ARG_REG, jit_state). +-include("jit_backend_regs_impl.hrl"). %%----------------------------------------------------------------------------- %% @doc Emit the jump table at the beginning of the module. Branches will be @@ -586,31 +527,28 @@ call_primitive( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = Used - } = State, - Primitive, - Args -) when Available =/= 0 -> - % Use an available register for loading the function pointer - TempReg = first_avail(Available), - TempBit = reg_bit(TempReg), - PrepCall = load_primitive_ptr(Primitive, TempReg), - Stream1 = StreamModule:append(Stream0, PrepCall), - Regs0 = jit_regs:invalidate_reg(State#state.regs, TempReg), - StateCall = State#state{ - stream = Stream1, - available_regs = Available band (bnot TempBit), - used_regs = Used bor TempBit, regs = Regs0 - }, - call_func_ptr(StateCall, {free, TempReg}, Args); -call_primitive( - #state{available_regs = 0} = State, + } = State, Primitive, Args ) -> - call_func_ptr(State, {primitive, Primitive}, Args). + Available = jit_regs:available_regs(Regs0), + case Available of + 0 -> + call_func_ptr(State, {primitive, Primitive}, Args); + _ -> + % Use an available register for loading the function pointer + TempReg = first_avail(Available), + TempBit = reg_bit(TempReg), + PrepCall = load_primitive_ptr(Primitive, TempReg), + Stream1 = StreamModule:append(Stream0, PrepCall), + Regs1 = jit_regs:invalidate_reg(Regs0, TempReg), + StateCall = State#state{ + stream = Stream1, + regs = jit_regs:alloc_reg(Regs1, TempBit) + }, + call_func_ptr(StateCall, {free, TempReg}, Args) + end. %%----------------------------------------------------------------------------- %% @doc Emit a jump (call without return) to a primitive with arguments. This @@ -634,15 +572,8 @@ call_primitive_last( % We need a register for the function pointer that should not be used as a parameter % Since we're not returning, we can use all scratch registers except % registers used for parameters - ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), - ArgsRegs = args_regs(Args), - ParamMask = jit_regs:regs_to_mask(ParamRegs, fun reg_bit/1), - ArgsMask = jit_regs:regs_to_mask(ArgsRegs, fun reg_bit/1), - ScratchMask = ?AVAILABLE_REGS_MASK band (bnot (ArgsMask bor ParamMask)), - Temp = first_avail(ScratchMask), - TempBit = reg_bit(Temp), - AvailableRegs1 = ScratchMask band (bnot TempBit), - UsedRegs = ?AVAILABLE_REGS_MASK band (bnot AvailableRegs1), + #{temp := Temp, available_mask := AvailableRegs1, used_mask := UsedRegs} = + prepare_call_scratch(Args), PrepCall = load_primitive_ptr(Primitive, Temp), Stream1 = StreamModule:append(Stream0, PrepCall), @@ -650,7 +581,8 @@ call_primitive_last( % not whatever value the cache may have recorded for it. Regs1 = jit_regs:invalidate_reg(State0#state.regs, Temp), State1 = State0#state{ - stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs, regs = Regs1 + stream = Stream1, + regs = jit_regs:set_masks(Regs1, AvailableRegs1, UsedRegs) }, % Preprocess offset special arg @@ -694,9 +626,9 @@ call_primitive_last( tail_call_with_jit_state_registers_only(State2, Temp) end, State5 = State4#state{ - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, - regs = jit_regs:unreachable(State4#state.regs) + regs = jit_regs:set_masks( + jit_regs:unreachable(State4#state.regs), ?AVAILABLE_REGS_MASK, 0 + ) }, flush_literal_pool(State5). @@ -750,8 +682,6 @@ return_if_not_equal_to_ctx( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, {free, Reg} @@ -770,9 +700,7 @@ return_if_not_equal_to_ctx( RegBit = reg_bit(Reg), State#state{ stream = Stream1, - available_regs = AvailableRegs0 bor RegBit, - used_regs = UsedRegs0 band (bnot RegBit), - regs = Regs0 + regs = jit_regs:free_reg(Regs0, RegBit) }. %%----------------------------------------------------------------------------- @@ -820,11 +748,12 @@ jump_to_continuation( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, + regs = Regs0, offset = BaseOffset } = State0, {free, OffsetReg} ) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), % ARM32 PIC implementation: % 1. Use mov(al, Temp, pc) to read PC (gives current instruction address + 8) @@ -867,9 +796,9 @@ jump_to_continuation( % Free all registers as this is a terminal instruction State2 = State1#state{ stream = Stream2, - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, - regs = jit_regs:unreachable(State1#state.regs) + regs = jit_regs:set_masks( + jit_regs:unreachable(State1#state.regs), ?AVAILABLE_REGS_MASK, 0 + ) }, flush_literal_pool(State2). @@ -926,9 +855,10 @@ if_block( Stream2, Replacements ), - State3 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State3#state{regs = MergedRegs}; + MergedRegs = jit_regs:merge( + State1#state.regs, State2#state.regs, ?AVAILABLE_REGS_MASK + ), + State2#state{stream = Stream3, regs = MergedRegs}; if_block( #state{stream_module = StreamModule, stream = Stream0} = State0, Cond, @@ -943,9 +873,10 @@ if_block( BranchOffset = OffsetAfter - (Offset + BranchInstrOffset), NewBranchInstr = jit_arm32_asm:b(CC, BranchOffset), Stream3 = StreamModule:replace(Stream2, Offset + BranchInstrOffset, NewBranchInstr), - State3 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State3#state{regs = MergedRegs}. + MergedRegs = jit_regs:merge( + State1#state.regs, State2#state.regs, ?AVAILABLE_REGS_MASK + ), + State2#state{stream = Stream3, regs = MergedRegs}. %%----------------------------------------------------------------------------- %% @doc Emit an if else block, i.e. emit a test of a condition and @@ -983,8 +914,6 @@ if_else_block( %% Build the else block StateElse = State2#state{ stream = Stream4, - used_regs = State1#state.used_regs, - available_regs = State1#state.available_regs, regs = State1#state.regs }, State3 = BlockFalseFn(StateElse), @@ -994,9 +923,10 @@ if_else_block( FinalJumpOffset = OffsetFinal - ElseJumpOffset, NewElseJumpInstr = jit_arm32_asm:b(al, FinalJumpOffset), Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), - State4 = merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs), - MergedRegs = jit_regs:merge(State2#state.regs, State3#state.regs), - State4#state{regs = MergedRegs}. + MergedRegs = jit_regs:merge( + State2#state.regs, State3#state.regs, ?AVAILABLE_REGS_MASK + ), + State3#state{stream = Stream6, regs = MergedRegs}. -spec if_block_cond(state(), condition()) -> {state(), jit_arm32_asm:cc(), non_neg_integer()}. @@ -1021,9 +951,10 @@ if_block_cond( {State2, le, byte_size(I1)}; %% Handle {Val, '<', Reg} for values > 255, need to load into temp register if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {Val, '<', RegOrTuple} -) when is_integer(Val), Available =/= 0 -> +) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -1078,9 +1009,10 @@ if_block_cond( State2 = State1#state{stream = Stream1}, {State2, CC, byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '<', Val} -) when is_integer(Val), Available =/= 0 -> +) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -1101,9 +1033,10 @@ if_block_cond( State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, CC, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, available_regs = Available} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, {Val, '<', RegOrTuple} -) when is_integer(Val), Available =/= 0 -> +) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -1208,9 +1141,10 @@ if_block_cond( State3 = if_block_free_reg({free, RegB}, State2), {State3, CC, byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '==', Val} -) when is_integer(Val), Available =/= 0 -> +) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), Offset0 = StreamModule:offset(Stream0), Reg = @@ -1230,9 +1164,10 @@ if_block_cond( State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, CC, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '!=', Val} -) when is_integer(Val), Available =/= 0 -> +) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), Offset0 = StreamModule:offset(Stream0), Reg = @@ -1254,8 +1189,7 @@ if_block_cond( if_block_cond( #state{ stream_module = StreamModule, - stream = Stream0, - available_regs = _Available + stream = Stream0 } = State0, {'(bool)', RegOrTuple, '==', false} ) -> @@ -1298,10 +1232,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available + regs = Regs0 } = State0, {RegOrTuple, '&', Val, '!=', 0} -) when Available =/= 0 -> +) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -1332,10 +1267,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available + regs = Regs0 } = State0, {Reg, '&', 16#F, '!=', 16#F} -) when ?IS_GPR(Reg), Available =/= 0 -> +) when ?IS_GPR(Reg) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), % Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG I1 = jit_arm32_asm:and_(al, Temp, Reg, 16#F), @@ -1367,10 +1303,12 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available + regs = Regs0 } = State0, {Reg, '&', Mask, '!=', Val} -) when ?IS_GPR(Reg), Available =/= 0 -> +) when ?IS_GPR(Reg) -> + Available = jit_regs:available_regs(Regs0), + Used = jit_regs:used_regs(Regs0), Temp = first_avail(Available), TempBit = reg_bit(Temp), AT = Available band (bnot TempBit), @@ -1379,7 +1317,9 @@ if_block_cond( I1 = jit_arm32_asm:mov(al, Temp, Reg), Stream1 = StreamModule:append(Stream0, I1), State1 = State0#state{stream = Stream1}, - {State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask), + {State2, Temp} = and_( + State1#state{regs = jit_regs:set_masks(Regs0, AT, Used)}, {free, Temp}, Mask + ), Stream2 = State2#state.stream, % Compare with value I2 = jit_arm32_asm:cmp(al, Temp, Val), @@ -1390,7 +1330,10 @@ if_block_cond( Stream4 = StreamModule:append(Stream3, <<16#FFFFFFFF:32>>), Regs3 = jit_regs:invalidate_reg(State2#state.regs, Temp), State3 = State2#state{ - stream = Stream4, available_regs = State2#state.available_regs bor TempBit, regs = Regs3 + stream = Stream4, + regs = jit_regs:set_available_regs( + Regs3, jit_regs:available_regs(State2#state.regs) bor TempBit + ) }, {State3, CC, OffsetAfter - OffsetBefore}; if_block_cond( @@ -1417,23 +1360,14 @@ if_block_cond( -spec if_block_free_reg(arm32_register() | {free, arm32_register()}, state()) -> state(). if_block_free_reg({free, Reg}, State0) -> - #state{available_regs = AvR0, used_regs = UR0} = State0, + #state{regs = Regs0} = State0, Bit = reg_bit(Reg), - AvR1 = AvR0 bor Bit, - UR1 = UR0 band (bnot Bit), State0#state{ - available_regs = AvR1, - used_regs = UR1 + regs = jit_regs:free_reg(Regs0, Bit) }; if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> State0. --spec merge_used_regs(state(), non_neg_integer()) -> state(). -merge_used_regs(#state{used_regs = UR} = State, OtherUR) -> - MergedUR = UR bor OtherUR, - MergedAvail = ?AVAILABLE_REGS_MASK band (bnot MergedUR), - State#state{used_regs = MergedUR, available_regs = MergedAvail}. - %%----------------------------------------------------------------------------- %% @doc Emit a shift register right by a fixed number of bits, effectively %% dividing it by 2^Shift @@ -1457,8 +1391,6 @@ shift_right( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UR, regs = Regs0 } = State, Reg, @@ -1466,6 +1398,7 @@ shift_right( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), Bit = reg_bit(ResultReg), I = jit_arm32_asm:lsr(al, ResultReg, Reg, Shift), @@ -1474,9 +1407,7 @@ shift_right( { State#state{ stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = UR bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, ResultReg }. @@ -1496,8 +1427,6 @@ shift_right_arith( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UR, regs = Regs0 } = State, Reg, @@ -1505,6 +1434,7 @@ shift_right_arith( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), Bit = reg_bit(ResultReg), I = jit_arm32_asm:asr(al, ResultReg, Reg, Shift), @@ -1513,9 +1443,7 @@ shift_right_arith( { State#state{ stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = UR bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, ResultReg }. @@ -1553,12 +1481,13 @@ call_func_ptr( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0Mask, - used_regs = UsedRegs0Mask + regs = Regs0 } = State0, FuncPtrTuple, Args ) -> + AvailableRegs0Mask = jit_regs:available_regs(Regs0), + UsedRegs0Mask = jit_regs:used_regs(Regs0), AvailableRegs0 = mask_to_list(AvailableRegs0Mask), UsedRegs0 = mask_to_list(UsedRegs0Mask), FreeRegs = lists:flatmap( @@ -1612,9 +1541,12 @@ call_func_ptr( % and the currently available registers to push values to the stack. SetArgsPushStackAvailableArgs = (UsedRegs1 -- (RegArgsRegs ++ StackArgsRegs)) ++ AvailableRegs0, State1 = State0#state{ - available_regs = jit_regs:regs_to_mask(SetArgsPushStackAvailableArgs, fun reg_bit/1), - used_regs = jit_regs:regs_to_mask( - ?AVAILABLE_REGS -- SetArgsPushStackAvailableArgs, fun reg_bit/1 + regs = jit_regs:set_masks( + Regs0, + jit_regs:regs_to_mask(SetArgsPushStackAvailableArgs, fun reg_bit/1), + jit_regs:regs_to_mask( + ?AVAILABLE_REGS -- SetArgsPushStackAvailableArgs, fun reg_bit/1 + ) ), stream = Stream1 }, @@ -1625,7 +1557,7 @@ call_func_ptr( [Arg5, Args6] -> set_stack_args(State1, Arg5, Args6) end, - SetArgsRegsOnlyAvailableArgs = mask_to_list(State2#state.available_regs), + SetArgsRegsOnlyAvailableArgs = mask_to_list(jit_regs:available_regs(State2#state.regs)), ParameterRegs = parameter_regs(RegArgs0), {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} = case FuncPtrTuple of @@ -1678,8 +1610,11 @@ call_func_ptr( end, State3 = State2#state{ - available_regs = jit_regs:regs_to_mask(SetArgsAvailableRegs, fun reg_bit/1), - used_regs = jit_regs:regs_to_mask(?AVAILABLE_REGS -- SetArgsAvailableRegs, fun reg_bit/1), + regs = jit_regs:set_masks( + State2#state.regs, + jit_regs:regs_to_mask(SetArgsAvailableRegs, fun reg_bit/1), + jit_regs:regs_to_mask(?AVAILABLE_REGS -- SetArgsAvailableRegs, fun reg_bit/1) + ), stream = Stream3 }, @@ -1751,9 +1686,11 @@ call_func_ptr( { State4#state{ stream = Stream8, - available_regs = jit_regs:regs_to_mask(AvailableRegs3, fun reg_bit/1), - used_regs = jit_regs:regs_to_mask(UsedRegs2, fun reg_bit/1), - regs = Regs1 + regs = jit_regs:set_masks( + Regs1, + jit_regs:regs_to_mask(AvailableRegs3, fun reg_bit/1), + jit_regs:regs_to_mask(UsedRegs2, fun reg_bit/1) + ) }, ResultReg }. @@ -1848,11 +1785,12 @@ set_registers_args(State0, Args, StackOffset) -> set_registers_args(State0, Args, ParamRegs, StackOffset). set_registers_args( - #state{used_regs = UsedRegsMask} = State0, + #state{regs = Regs0} = State0, Args, ParamRegs, StackOffset ) -> + UsedRegsMask = jit_regs:used_regs(Regs0), ArgsRegs = args_regs(Args), UsedRegsList = mask_to_list(UsedRegsMask), AvailableScratchGP = ((?SCRATCH_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegsList, @@ -1873,8 +1811,11 @@ set_registers_args( FinalUsed = ParamRegs ++ (NewUsedRegsList -- ParamRegs), State1#state{ stream = Stream1, - available_regs = jit_regs:regs_to_mask(FinalAvail, fun reg_bit/1), - used_regs = jit_regs:regs_to_mask(FinalUsed, fun reg_bit/1) + regs = jit_regs:set_masks( + State1#state.regs, + jit_regs:regs_to_mask(FinalAvail, fun reg_bit/1), + jit_regs:regs_to_mask(FinalUsed, fun reg_bit/1) + ) }. parameter_regs(Args) -> @@ -1991,11 +1932,12 @@ set_registers_args1( Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; set_registers_args1( - #state{available_regs = AvailRegs} = State, + #state{regs = Regs0} = State, {y_reg, X}, Reg, _StackOffset ) -> + AvailRegs = jit_regs:available_regs(Regs0), ldr_y_reg(State, Reg, X, AvailRegs); set_registers_args1( #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset @@ -2049,10 +1991,11 @@ move_to_vm_register_emit(State0, Src, {ptr, Reg}) when is_atom(Src) -> Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State0#state{stream = Stream1}; move_to_vm_register_emit( - #state{available_regs = Avail, regs = Regs0} = State0, Src, {y_reg, Y} + #state{regs = Regs0} = State0, Src, {y_reg, Y} ) when is_atom(Src) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp1)), Code = str_y_reg(Src, Y, Temp1, AT), @@ -2067,10 +2010,11 @@ move_to_vm_register_emit( State0#state{stream = Stream1, regs = Regs2}; % Source is an integer to y_reg (optimized: ldr first, then movs) move_to_vm_register_emit( - #state{available_regs = Avail, regs = Regs0} = State0, N, {y_reg, Y} + #state{regs = Regs0} = State0, N, {y_reg, Y} ) when is_integer(N), N >= 0, N =< 255 -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Avail1 = Avail band (bnot reg_bit(Temp1)), Temp2 = first_avail(Avail1), @@ -2087,63 +2031,75 @@ move_to_vm_register_emit( end, State0#state{stream = Stream1, regs = Regs2}; % Source is an integer (0-255 for movs, negative values need different handling) -move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when +move_to_vm_register_emit(State0, N, Dest) when is_integer(N), N >= 0, N =< 255 -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - I1 = jit_arm32_asm:mov(al, Temp, N), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), - State1#state{available_regs = AR0, regs = Regs1}; + with_temp( + State0, + fun(StateAT, Temp, _AT) -> + I1 = jit_arm32_asm:mov(al, Temp, N), + Stream1 = (StateAT#state.stream_module):append(StateAT#state.stream, I1), + move_to_vm_register(StateAT#state{stream = Stream1}, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {imm, N}) end + ); %% Handle large values using simple literal pool (branch-over pattern) -move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when +move_to_vm_register_emit(State0, N, Dest) when is_integer(N) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), - State2 = move_to_vm_register(State1, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State2#state.regs, Temp), - State2#state{available_regs = AR0, regs = Regs1}; + with_temp( + State0, + fun(StateAT, Temp, _AT) -> + State1 = mov_immediate(StateAT, Temp, N), + move_to_vm_register(State1, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {imm, N}) end + ); % Source is a VM register -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, extra}, Dest) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - I1 = jit_arm32_asm:ldr(al, Temp, ?X_REG(?MAX_REG)), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, X}, Dest) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - I1 = jit_arm32_asm:ldr(al, Temp, ?X_REG(X)), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - I1 = jit_arm32_asm:ldr(al, Temp, {Reg, 0}), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {y_reg, Y}, Dest) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - State0a = ldr_y_reg(State0#state{available_regs = AT}, Temp, Y, AT), - State1 = move_to_vm_register(State0a, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), - State1#state{available_regs = AR0, regs = Regs1}; +move_to_vm_register_emit(State0, {x_reg, extra}, Dest) -> + with_temp( + State0, + fun(StateAT, Temp, _AT) -> + I1 = jit_arm32_asm:ldr(al, Temp, ?X_REG(?MAX_REG)), + Stream1 = (StateAT#state.stream_module):append(StateAT#state.stream, I1), + move_to_vm_register(StateAT#state{stream = Stream1}, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {x_reg, ?MAX_REG}) end + ); +move_to_vm_register_emit(State0, {x_reg, X}, Dest) -> + with_temp( + State0, + fun(StateAT, Temp, _AT) -> + I1 = jit_arm32_asm:ldr(al, Temp, ?X_REG(X)), + Stream1 = (StateAT#state.stream_module):append(StateAT#state.stream, I1), + move_to_vm_register(StateAT#state{stream = Stream1}, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {x_reg, X}) end + ); +move_to_vm_register_emit(State0, {ptr, Reg}, Dest) -> + with_temp( + State0, + fun(StateAT, Temp, _AT) -> + I1 = jit_arm32_asm:ldr(al, Temp, {Reg, 0}), + Stream1 = (StateAT#state.stream_module):append(StateAT#state.stream, I1), + move_to_vm_register(StateAT#state{stream = Stream1}, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:invalidate_reg(Regs, Temp) end + ); +move_to_vm_register_emit(State0, {y_reg, Y}, Dest) -> + with_temp( + State0, + fun(StateAT, Temp, AT) -> + State0a = ldr_y_reg(StateAT, Temp, Y, AT), + move_to_vm_register(State0a, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {y_reg, Y}) end + ); % term_to_float move_to_vm_register_emit( #state{ stream_module = StreamModule, - available_regs = Avail, + regs = Regs0, stream = Stream0, variant = Variant } = @@ -2151,6 +2107,7 @@ move_to_vm_register_emit( {free, {ptr, Reg, 1}}, {fp_reg, F} ) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), I1 = jit_arm32_asm:ldr(al, Temp1, ?FP_REGS), @@ -2172,6 +2129,19 @@ move_to_vm_register_emit( Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Temp1), Temp2), State1#state{stream = Stream1, regs = Regs1}. +-spec with_temp( + state(), + fun((state(), arm32_register(), non_neg_integer()) -> state()), + fun((jit_regs:regs(), arm32_register()) -> jit_regs:regs()) +) -> state(). +with_temp(#state{regs = Regs0} = State0, EmitFun, ContentsFun) -> + AR0 = jit_regs:available_regs(Regs0), + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), + State1 = EmitFun(State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, AT), + Regs1 = jit_regs:set_available_regs(ContentsFun(State1#state.regs, Temp), AR0), + State1#state{regs = Regs1}. + %%----------------------------------------------------------------------------- %% @doc Emit a move of an array element (reg[x]) to a vm or a native register. %% @end @@ -2188,12 +2158,13 @@ move_to_vm_register_emit( vm_register() | arm32_register() ) -> state(). move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 4095 -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_arm32_asm:ldr(al, Temp, {Reg, Index * 4}), I2 = jit_arm32_asm:str(al, Temp, ?X_REG(X)), @@ -2202,12 +2173,13 @@ move_array_element( Regs2 = jit_regs:invalidate_reg(Regs1, Temp), State#state{stream = Stream1, regs = Regs2}; move_array_element( - #state{stream_module = StreamModule, available_regs = Avail} = + #state{stream_module = StreamModule, regs = Regs0} = State, Reg, Index, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), % For large offsets, use max offset (4092) in ldr + remainder in temp register @@ -2228,12 +2200,13 @@ move_array_element( Regs2 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs1, Temp1), Temp2), State1#state{stream = Stream2, regs = Regs2}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {ptr, Dest} ) when is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 4095 -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_arm32_asm:ldr(al, Temp, {Reg, Index * 4}), I2 = jit_arm32_asm:str(al, Temp, {Dest, 0}), @@ -2241,12 +2214,13 @@ move_array_element( Regs1 = jit_regs:invalidate_reg(Regs0, Temp), State#state{stream = Stream1, regs = Regs1}; move_array_element( - #state{stream_module = StreamModule, available_regs = Avail} = + #state{stream_module = StreamModule, regs = Regs0} = State, Reg, Index, {ptr, Dest} ) when is_atom(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), % For large offsets, use max offset (4092) in ldr + remainder in temp register Offset = Index * 4, @@ -2262,12 +2236,13 @@ move_array_element( Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), State1#state{stream = Stream2, regs = Regs1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {y_reg, Y} ) when is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 4095 -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Avail1 = Avail band (bnot reg_bit(Temp1)), Temp2 = first_avail(Avail1), @@ -2281,13 +2256,14 @@ move_array_element( State#state{stream = Stream1, regs = Regs2}; move_array_element( #state{ - stream_module = StreamModule, available_regs = Avail + stream_module = StreamModule, regs = Regs0 } = State, Reg, Index, {y_reg, Y} ) when is_atom(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Avail1 = Avail band (bnot reg_bit(Temp1)), Temp2 = first_avail(Avail1), @@ -2307,12 +2283,13 @@ move_array_element( Regs2 = jit_regs:invalidate_vm_loc(Regs1, {y_reg, Y}), State1#state{stream = Stream2, regs = Regs2}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {free, Reg}, Index, {y_reg, Y} ) when is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), I1 = jit_arm32_asm:ldr(al, Reg, {Reg, Index * 4}), @@ -2333,8 +2310,6 @@ move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, @@ -2345,23 +2320,17 @@ move_array_element( I2 = jit_arm32_asm:ldr(al, IndexReg, {Reg, IndexReg}), I3 = jit_arm32_asm:str(al, IndexReg, ?X_REG(X)), Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_vm_loc(Regs0, {x_reg, X}), Regs2 = jit_regs:invalidate_reg(Regs1, IndexReg), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, - regs = Regs2 + regs = jit_regs:free_reg(Regs2, Bit) }; move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, @@ -2372,28 +2341,23 @@ move_array_element( I2 = jit_arm32_asm:ldr(al, IndexReg, {Reg, IndexReg}), I3 = jit_arm32_asm:str(al, IndexReg, {PtrReg, 0}), Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_reg(Regs0, IndexReg), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, - regs = Regs1 + regs = jit_regs:free_reg(Regs1, Bit) }; move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, {free, IndexReg}, {y_reg, Y} ) when is_atom(IndexReg) -> + AvailableRegs0 = jit_regs:available_regs(Regs0), I1 = jit_arm32_asm:lsl(al, IndexReg, IndexReg, 2), I2 = jit_arm32_asm:ldr(al, IndexReg, {Reg, IndexReg}), Temp = first_avail(AvailableRegs0), @@ -2402,18 +2366,14 @@ move_array_element( Code = str_y_reg(IndexReg, Y, Temp, AT), I3 = Code, Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append( Stream0, <> ), Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp), IndexReg), Regs2 = jit_regs:invalidate_vm_loc(Regs1, {y_reg, Y}), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, - regs = Regs2 + regs = jit_regs:free_reg(Regs2, Bit) }. %% @doc move reg[x] to a vm or native register @@ -2435,11 +2395,12 @@ get_array_element( get_array_element( #state{ stream_module = StreamModule, - available_regs = Avail + regs = Regs0 } = State, {free, Reg}, Index ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), % For large offsets, split into ldr immediate (max 4092) + remainder in temp register Offset = Index * 4, @@ -2455,13 +2416,12 @@ get_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, Index ) when Index * 4 =< 4095 -> + Avail = jit_regs:available_regs(Regs0), ElemReg = first_avail(Avail), ElemBit = reg_bit(ElemReg), I1 = jit_arm32_asm:ldr(al, ElemReg, {Reg, Index * 4}), @@ -2470,21 +2430,19 @@ get_array_element( { State#state{ stream = Stream1, - available_regs = Avail band (bnot ElemBit), - used_regs = UsedRegs0 bor ElemBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, ElemBit) }, ElemReg }; get_array_element( #state{ stream_module = StreamModule, - available_regs = Avail, - used_regs = UsedRegs0 + regs = Regs0 } = State, Reg, Index ) -> + Avail = jit_regs:available_regs(Regs0), ElemReg = first_avail(Avail), ElemBit = reg_bit(ElemReg), Avail1 = Avail band (bnot ElemBit), @@ -2502,9 +2460,7 @@ get_array_element( { State1#state{ stream = Stream2, - available_regs = Avail1, - used_regs = UsedRegs0 bor ElemBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, ElemBit) }, ElemReg }. @@ -2523,11 +2479,12 @@ move_to_array_element( Stream1 = StreamModule:append(Stream0, I1), State0#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, available_regs = Avail} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, ValueReg, Reg, Index ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), % For large offsets, split into str immediate (max 4092) + remainder in temp register Offset = Index * 4, @@ -2541,12 +2498,13 @@ move_to_array_element( Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), State1#state{stream = Stream2, regs = Regs1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, ValueReg, Reg, IndexReg ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_arm32_asm:mov(al, Temp, IndexReg), I2 = jit_arm32_asm:lsl(al, Temp, Temp, 2), @@ -2573,12 +2531,13 @@ move_to_array_element( ) when is_integer(IndexReg) andalso is_integer(Offset) -> move_to_array_element(State, Value, BaseReg, IndexReg + Offset); move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, ValueReg, BaseReg, IndexReg, Offset ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_arm32_asm:add(al, Temp, IndexReg, Offset), I2 = jit_arm32_asm:lsl(al, Temp, Temp, 2), @@ -2593,7 +2552,7 @@ move_to_array_element( Offset ) -> {State1, ValueReg} = copy_to_native_register(State0, Value), - Temp = first_avail(State1#state.available_regs), + Temp = first_avail(jit_regs:available_regs(State1#state.regs)), I1 = jit_arm32_asm:add(al, Temp, IndexReg, Offset), I2 = jit_arm32_asm:lsl(al, Temp, Temp, 2), I3 = jit_arm32_asm:str(al, ValueReg, {BaseReg, Temp}), @@ -2611,16 +2570,17 @@ move_to_native_register(#state{regs = Regs} = State, Value) -> case Contents =/= unknown andalso jit_regs:find_reg_with_contents(Regs, Contents) of {ok, CachedReg} -> Bit = reg_bit(CachedReg), - case State#state.used_regs band Bit of + Used = jit_regs:used_regs(Regs), + case Used band Bit of 0 -> - case State#state.available_regs band Bit of + Avail = jit_regs:available_regs(Regs), + case Avail band Bit of 0 -> move_to_native_register_emit(State, Value, Contents); _ -> { State#state{ - used_regs = State#state.used_regs bor Bit, - available_regs = State#state.available_regs band (bnot Bit) + regs = jit_regs:alloc_reg(Regs, Bit) }, CachedReg } @@ -2636,13 +2596,12 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, cp, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), I1 = jit_arm32_asm:ldr(al, Reg, ?CP), @@ -2651,9 +2610,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor Bit, - available_regs = Avail band (bnot Bit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }; @@ -2667,56 +2624,40 @@ move_to_native_register_emit( Regs1 = jit_regs:invalidate_reg(Regs0, Reg), {State#state{stream = Stream1, regs = Regs1}, Reg}; move_to_native_register_emit( - #state{ - available_regs = Avail, - used_regs = Used, - regs = Regs0 - } = State0, + #state{regs = Regs0} = State0, Imm, Contents ) when is_integer(Imm) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), Regs1 = jit_regs:set_contents(Regs0, Reg, Contents), - State1 = State0#state{ - used_regs = Used bor Bit, - available_regs = Avail band (bnot Bit), - regs = Regs1 - }, + Regs2 = jit_regs:alloc_reg(Regs1, Bit), + State1 = State0#state{regs = Regs2}, {move_to_native_register(State1, Imm, Reg), Reg}; move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {x_reg, extra}, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), I1 = jit_arm32_asm:ldr(al, Reg, ?X_REG(?MAX_REG)), Stream1 = StreamModule:append(Stream0, I1), Regs1 = jit_regs:set_contents(Regs0, Reg, Contents), - { - State#state{ - stream = Stream1, - used_regs = Used bor Bit, - available_regs = Avail band (bnot Bit), - regs = Regs1 - }, - Reg - }; + Regs2 = jit_regs:alloc_reg(Regs1, Bit), + {State#state{stream = Stream1, regs = Regs2}, Reg}; move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {x_reg, X}, @@ -2724,52 +2665,42 @@ move_to_native_register_emit( ) when X < ?MAX_REG -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), I1 = jit_arm32_asm:ldr(al, Reg, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, I1), Regs1 = jit_regs:set_contents(Regs0, Reg, Contents), - { - State#state{ - stream = Stream1, - used_regs = Used bor Bit, - available_regs = Avail band (bnot Bit), - regs = Regs1 - }, - Reg - }; + Regs2 = jit_regs:alloc_reg(Regs1, Bit), + {State#state{stream = Stream1, regs = Regs2}, Reg}; move_to_native_register_emit( - #state{ - available_regs = Avail, - used_regs = Used - } = State, + #state{regs = Regs0} = State, {y_reg, Y}, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), AvailT = Avail band (bnot Bit), State1 = ldr_y_reg( - State#state{available_regs = AvailT, used_regs = Used bor Bit}, + State#state{regs = jit_regs:alloc_reg(Regs0, Bit)}, Reg, Y, AvailT ), Regs1 = jit_regs:set_contents(State1#state.regs, Reg, Contents), - { - State1#state{regs = Regs1}, - Reg - }; + {State1#state{regs = Regs1}, Reg}; move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used + regs = Regs0 } = State, {fp_reg, F}, _Contents ) -> + Avail = jit_regs:available_regs(Regs0), + Used = jit_regs:used_regs(Regs0), RegA = first_avail(Avail), BitA = reg_bit(RegA), Avail1 = Avail band (bnot BitA), @@ -2781,10 +2712,8 @@ move_to_native_register_emit( I3 = jit_arm32_asm:ldr(al, RegB, {RegB, F * 8 + 4}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - { - State#state{stream = Stream1, available_regs = AvailT, used_regs = Used bor BitA bor BitB}, - {fp, RegA, RegB} - }. + Regs1 = jit_regs:set_masks(Regs0, AvailT, Used bor BitA bor BitB), + {State#state{stream = Stream1, regs = Regs1}, {fp, RegA, RegB}}. -spec move_to_native_register(state(), value(), arm32_register()) -> state(). move_to_native_register( @@ -2826,14 +2755,15 @@ move_to_native_register( Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, X}), State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{available_regs = AT} = State, + #state{regs = Regs0} = State, {y_reg, Y}, RegDst ) -> + AT = jit_regs:available_regs(Regs0), State1 = ldr_y_reg(State, RegDst, Y, AT), - #state{regs = Regs0} = State1, - Regs1 = jit_regs:set_contents(Regs0, RegDst, {y_reg, Y}), - State1#state{regs = Regs1}; + #state{regs = Regs1} = State1, + Regs2 = jit_regs:set_contents(Regs1, RegDst, {y_reg, Y}), + State1#state{regs = Regs2}; move_to_native_register( #state{ stream_module = StreamModule, @@ -2854,24 +2784,22 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, Reg ) when is_atom(Reg) -> + Avail = jit_regs:available_regs(Regs0), SaveReg = first_avail(Avail), SaveBit = reg_bit(SaveReg), I1 = jit_arm32_asm:mov(al, SaveReg, Reg), Stream1 = StreamModule:append(Stream0, I1), SrcContents = jit_regs:get_contents(Regs0, Reg), Regs1 = jit_regs:set_contents(Regs0, SaveReg, SrcContents), + Regs2 = jit_regs:alloc_reg(Regs1, SaveBit), { State#state{ stream = Stream1, - available_regs = Avail band (bnot SaveBit), - used_regs = Used bor SaveBit, - regs = Regs1 + regs = Regs2 }, SaveReg }; @@ -2879,23 +2807,21 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {ptr, Reg} ) when is_atom(Reg) -> + Avail = jit_regs:available_regs(Regs0), SaveReg = first_avail(Avail), SaveBit = reg_bit(SaveReg), I1 = jit_arm32_asm:ldr(al, SaveReg, {Reg, 0}), Stream1 = StreamModule:append(Stream0, I1), Regs1 = jit_regs:invalidate_reg(Regs0, SaveReg), + Regs2 = jit_regs:alloc_reg(Regs1, SaveBit), { State#state{ stream = Stream1, - available_regs = Avail band (bnot SaveBit), - used_regs = Used bor SaveBit, - regs = Regs1 + regs = Regs2 }, SaveReg }; @@ -2903,9 +2829,10 @@ copy_to_native_register(State, Reg) -> move_to_native_register(State, Reg). move_to_cp( - #state{available_regs = Avail} = State, + #state{regs = Regs0} = State, {y_reg, Y} ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), AvailT = Avail band (bnot reg_bit(Reg)), State1 = ldr_y_reg(State, Reg, Y, AvailT), @@ -2914,10 +2841,11 @@ move_to_cp( State1#state{stream = Stream1}. increment_sp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Offset ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), I1 = jit_arm32_asm:ldr(al, Reg, ?Y_REGS), I2 = jit_arm32_asm:add(al, Reg, Reg, Offset * 4), @@ -2932,10 +2860,11 @@ set_continuation_to_label( stream_module = StreamModule, stream = Stream0, offset = JumpTableOffset, - available_regs = Avail + regs = Regs0 } = State, Label ) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), % Calculate jump table entry offset @@ -2970,11 +2899,11 @@ set_continuation_to_offset( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, branches = Branches, regs = Regs0 } = State ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), TempJitState = first_avail(Avail band (bnot reg_bit(Temp))), OffsetRef = make_ref(), @@ -3013,11 +2942,10 @@ get_module_index( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UsedRegs0, regs = Regs0 } = State ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), RegBit = reg_bit(Reg), Avail1 = Avail band (bnot RegBit), @@ -3030,12 +2958,11 @@ get_module_index( Stream1 = StreamModule:append(Stream0, Code), Regs1 = jit_regs:invalidate_reg(Regs0, TempJitState), Regs2 = jit_regs:set_contents(Regs1, Reg, module_index), + Regs3 = jit_regs:alloc_reg(Regs2, RegBit), { State#state{ stream = Stream1, - available_regs = Avail1, - used_regs = UsedRegs0 bor RegBit, - regs = Regs2 + regs = Regs3 }, Reg }. @@ -3066,81 +2993,98 @@ and_( Regs1 = jit_regs:invalidate_reg(Regs0, Reg), {State0#state{stream = Stream1, regs = Regs1}, Reg}; and_( - #state{stream_module = StreamModule, available_regs = Avail} = State0, - {free, Reg}, - Val -) when Avail =/= 0 andalso Val < 0 andalso Val >= -256 -> - Temp = first_avail(Avail), - TempBit = reg_bit(Temp), - AT = Avail band (bnot TempBit), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), - Stream1 = State1#state.stream, - I = jit_arm32_asm:bic(al, Reg, Reg, Temp), - Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), - {State1#state{available_regs = AT bor TempBit, stream = Stream2, regs = Regs1}, Reg}; -and_( - #state{stream_module = StreamModule, available_regs = Avail} = State0, - {free, Reg}, - Val -) when Avail =/= 0 -> - Temp = first_avail(Avail), - TempBit = reg_bit(Temp), - AT = Avail band (bnot TempBit), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), - Stream1 = State1#state.stream, - I = jit_arm32_asm:and_(al, Reg, Reg, Temp), - Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), - {State1#state{available_regs = AT bor TempBit, stream = Stream2, regs = Regs1}, Reg}; -and_( - #state{stream_module = StreamModule, available_regs = 0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, {free, Reg}, Val ) when Val < 0 andalso Val >= -256 -> - % No available registers, use r0 as temp and save it to r12 - Stream0 = State0#state.stream, - % Save r0 to r12 - Save = jit_arm32_asm:mov(al, ?IP_REG, r0), - Stream1 = StreamModule:append(Stream0, Save), - % Load immediate value into r0 - State1 = mov_immediate(State0#state{stream = Stream1}, r0, bnot (Val)), - Stream2 = State1#state.stream, - % Perform BIC operation - I = jit_arm32_asm:bic(al, Reg, Reg, r0), - Stream3 = StreamModule:append(Stream2, I), - % Restore r0 from r12 - Restore = jit_arm32_asm:mov(al, r0, ?IP_REG), - Stream4 = StreamModule:append(Stream3, Restore), - Regs1 = jit_regs:invalidate_reg(State0#state.regs, Reg), - {State0#state{stream = Stream4, regs = Regs1}, Reg}; + Avail = jit_regs:available_regs(Regs0), + case Avail of + 0 -> + % No available registers, use r0 as temp and save it to r12 + Stream0 = State0#state.stream, + % Save r0 to r12 + Save = jit_arm32_asm:mov(al, ?IP_REG, r0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into r0 + State1 = mov_immediate(State0#state{stream = Stream1}, r0, bnot (Val)), + Stream2 = State1#state.stream, + % Perform BIC operation + I = jit_arm32_asm:bic(al, Reg, Reg, r0), + Stream3 = StreamModule:append(Stream2, I), + % Restore r0 from r12 + Restore = jit_arm32_asm:mov(al, r0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + {State0#state{stream = Stream4, regs = Regs1}, Reg}; + _ -> + Temp = first_avail(Avail), + TempBit = reg_bit(Temp), + AT = Avail band (bnot TempBit), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, bnot (Val) + ), + Stream1 = State1#state.stream, + I = jit_arm32_asm:bic(al, Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + { + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, AT bor TempBit) + }, + Reg + } + end; and_( - #state{stream_module = StreamModule, available_regs = 0, regs = Regs0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, {free, Reg}, Val ) -> - % No available registers, use r0 as temp and save it to r12 - Stream0 = State0#state.stream, - % Save r0 to r12 - Save = jit_arm32_asm:mov(al, ?IP_REG, r0), - Stream1 = StreamModule:append(Stream0, Save), - % Load immediate value into r0 - State1 = mov_immediate(State0#state{stream = Stream1}, r0, Val), - Stream2 = State1#state.stream, - % Perform AND operation - I = jit_arm32_asm:and_(al, Reg, Reg, r0), - Stream3 = StreamModule:append(Stream2, I), - % Restore r0 from r12 - Restore = jit_arm32_asm:mov(al, r0, ?IP_REG), - Stream4 = StreamModule:append(Stream3, Restore), - Regs1 = jit_regs:invalidate_reg(Regs0, Reg), - {State0#state{stream = Stream4, regs = Regs1}, Reg}; + Avail = jit_regs:available_regs(Regs0), + case Avail of + 0 -> + % No available registers, use r0 as temp and save it to r12 + Stream0 = State0#state.stream, + % Save r0 to r12 + Save = jit_arm32_asm:mov(al, ?IP_REG, r0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into r0 + State1 = mov_immediate(State0#state{stream = Stream1}, r0, Val), + Stream2 = State1#state.stream, + % Perform AND operation + I = jit_arm32_asm:and_(al, Reg, Reg, r0), + Stream3 = StreamModule:append(Stream2, I), + % Restore r0 from r12 + Restore = jit_arm32_asm:mov(al, r0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + {State0#state{stream = Stream4, regs = Regs1}, Reg}; + _ -> + Temp = first_avail(Avail), + TempBit = reg_bit(Temp), + AT = Avail band (bnot TempBit), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), + Stream1 = State1#state.stream, + I = jit_arm32_asm:and_(al, Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + { + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, AT bor TempBit) + }, + Reg + } + end; and_( - #state{stream_module = StreamModule, available_regs = Avail, used_regs = UR, regs = Regs0} = + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, ?TERM_PRIMARY_CLEAR_MASK ) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), ResultBit = reg_bit(ResultReg), I1 = jit_arm32_asm:lsr(al, ResultReg, Reg, 2), @@ -3150,9 +3094,7 @@ and_( { State0#state{ stream = Stream1, - available_regs = Avail band (bnot ResultBit), - used_regs = UR bor ResultBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, ResultBit) }, ResultReg }. @@ -3165,18 +3107,21 @@ or_(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State0#state{stream = Stream1, regs = Regs1}; or_( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_arm32_asm:orr(al, Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + State1#state{stream = Stream2, regs = jit_regs:set_available_regs(Regs1, Avail)}. xor_( #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, Reg, SrcReg @@ -3188,18 +3133,21 @@ xor_( Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State0#state{stream = Stream1, regs = Regs1}; xor_( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_arm32_asm:eor(al, Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + State1#state{stream = Stream2, regs = jit_regs:set_available_regs(Regs1, Avail)}. add(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, Reg, Val) when (Val >= 0 andalso Val =< 255) orelse is_atom(Val) @@ -3208,15 +3156,18 @@ add(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State Stream1 = StreamModule:append(Stream0, I), Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State0#state{stream = Stream1, regs = Regs1}; -add(#state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, Reg, Val) -> +add(#state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_arm32_asm:add(al, Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(Regs0, Reg), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{stream = Stream2, regs = jit_regs:set_available_regs(Regs1, Avail)}. mov_immediate( #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Val @@ -3346,21 +3297,25 @@ sub(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State Stream1 = StreamModule:append(Stream0, I1), Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State#state{stream = Stream1, regs = Regs1}; -sub(#state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, Reg, Val) -> +sub(#state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_arm32_asm:sub(al, Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(Regs0, Reg), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{stream = Stream2, regs = jit_regs:set_available_regs(Regs1, Avail)}. mul(State, _Reg, 1) -> State; mul(State, Reg, 2) -> shift_left(State, Reg, 1); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 3) -> +mul(#state{regs = Regs0} = State, Reg, 3) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_arm32_asm:lsl(al, Temp, Reg, 1), I2 = jit_arm32_asm:add(al, Reg, Temp, Reg), @@ -3369,7 +3324,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 3) -> State#state{stream = Stream1, regs = Regs1}; mul(State, Reg, 4) -> shift_left(State, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 5) -> +mul(#state{regs = Regs0} = State, Reg, 5) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_arm32_asm:lsl(al, Temp, Reg, 2), I2 = jit_arm32_asm:add(al, Reg, Temp, Reg), @@ -3379,7 +3335,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 5) -> mul(State0, Reg, 6) -> State1 = mul(State0, Reg, 3), mul(State1, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 7) -> +mul(#state{regs = Regs0} = State, Reg, 7) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_arm32_asm:lsl(al, Temp, Reg, 3), I2 = jit_arm32_asm:sub(al, Reg, Temp, Reg), @@ -3388,7 +3345,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 7) -> State#state{stream = Stream1, regs = Regs1}; mul(State, Reg, 8) -> shift_left(State, Reg, 3); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 9) -> +mul(#state{regs = Regs0} = State, Reg, 9) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_arm32_asm:lsl(al, Temp, Reg, 3), I2 = jit_arm32_asm:add(al, Reg, Temp, Reg), @@ -3398,7 +3356,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 9) -> mul(State0, Reg, 10) -> State1 = mul(State0, Reg, 5), mul(State1, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 15) -> +mul(#state{regs = Regs0} = State, Reg, 15) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_arm32_asm:lsl(al, Temp, Reg, 4), I2 = jit_arm32_asm:sub(al, Reg, Temp, Reg), @@ -3412,20 +3371,26 @@ mul(State, Reg, 32) -> mul(State, Reg, 64) -> shift_left(State, Reg, 6); mul( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) when is_integer(Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), TempBit = reg_bit(Temp), AT = Avail band (bnot TempBit), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_arm32_asm:mul(al, Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp), Reg), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Temp), Reg), State1#state{ - stream = Stream2, available_regs = State1#state.available_regs bor TempBit, regs = Regs1 + stream = Stream2, + regs = jit_regs:set_available_regs( + Regs1, jit_regs:available_regs(State1#state.regs) bor TempBit + ) }; mul( #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DestReg, SrcReg @@ -3442,9 +3407,10 @@ mul( -spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). decrement_reductions_and_maybe_schedule_next( #state{ - stream_module = StreamModule, stream = Stream0, available_regs = Avail + stream_module = StreamModule, stream = Stream0, regs = Regs0 } = State0 ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), TempJitState = first_avail(Avail band (bnot reg_bit(Temp))), % Load jit_state pointer from stack @@ -3485,9 +3451,8 @@ decrement_reductions_and_maybe_schedule_next( Stream5 = StreamModule:replace( Stream4, BNEOffset, <> ), - State3 = merge_used_regs(State2#state{stream = Stream5}, State1#state.used_regs), %% schedule_next clobbers caller-saved regs; invalidate cache at continuation. - State3#state{regs = jit_regs:invalidate_all(State1#state.regs)}. + State2#state{stream = Stream5, regs = jit_regs:invalidate_all(State1#state.regs)}. -spec call_or_schedule_next(state(), non_neg_integer()) -> state(). call_or_schedule_next(State0, Label) -> @@ -3499,10 +3464,11 @@ call_only_or_schedule_next( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail + regs = Regs0 } = State0, Label ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), TempJitState = first_avail(Avail band (bnot reg_bit(Temp))), % Load jit_state pointer from stack @@ -3554,11 +3520,12 @@ call_primitive_with_cp(State0, Primitive, Args) -> set_cp(State0) -> % get module index (dynamically) { - #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State1, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs1} = State1, Reg } = get_module_index( State0 ), + AvailRegs = jit_regs:available_regs(Regs1), % Get a temporary register from available registers TempReg = first_avail(AvailRegs), @@ -3614,10 +3581,11 @@ rewrite_cp_offset( end. set_bs( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, TermReg ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_arm32_asm:str(al, TermReg, ?BS), I2 = jit_arm32_asm:mov(al, Temp, 0), @@ -3759,69 +3727,6 @@ reg_bit(r13) -> ?REG_BIT_R13; reg_bit(r14) -> ?REG_BIT_R14; reg_bit(r15) -> ?REG_BIT_R15. -%% first_avail returns the first available register from a bitmask. -%% Order matches ?AVAILABLE_REGS = [r11, r10, r9, r8, r7, r6, r5, r4, r3, r1] -first_avail(Mask) when Mask band ?REG_BIT_R11 =/= 0 -> r11; -first_avail(Mask) when Mask band ?REG_BIT_R10 =/= 0 -> r10; -first_avail(Mask) when Mask band ?REG_BIT_R9 =/= 0 -> r9; -first_avail(Mask) when Mask band ?REG_BIT_R8 =/= 0 -> r8; -first_avail(Mask) when Mask band ?REG_BIT_R7 =/= 0 -> r7; -first_avail(Mask) when Mask band ?REG_BIT_R6 =/= 0 -> r6; -first_avail(Mask) when Mask band ?REG_BIT_R5 =/= 0 -> r5; -first_avail(Mask) when Mask band ?REG_BIT_R4 =/= 0 -> r4; -first_avail(Mask) when Mask band ?REG_BIT_R3 =/= 0 -> r3; -first_avail(Mask) when Mask band ?REG_BIT_R1 =/= 0 -> r1. - -%% Convert bitmask to list, matching the order of ?AVAILABLE_REGS. -mask_to_list(0) -> []; -mask_to_list(Mask) -> mask_to_list_r11(Mask). - -mask_to_list_r11(Mask) when Mask band ?REG_BIT_R11 =/= 0 -> [r11 | mask_to_list_r10(Mask)]; -mask_to_list_r11(Mask) -> mask_to_list_r10(Mask). -mask_to_list_r10(Mask) when Mask band ?REG_BIT_R10 =/= 0 -> [r10 | mask_to_list_r9(Mask)]; -mask_to_list_r10(Mask) -> mask_to_list_r9(Mask). -mask_to_list_r9(Mask) when Mask band ?REG_BIT_R9 =/= 0 -> [r9 | mask_to_list_r8(Mask)]; -mask_to_list_r9(Mask) -> mask_to_list_r8(Mask). -mask_to_list_r8(Mask) when Mask band ?REG_BIT_R8 =/= 0 -> [r8 | mask_to_list_r7(Mask)]; -mask_to_list_r8(Mask) -> mask_to_list_r7(Mask). -mask_to_list_r7(Mask) when Mask band ?REG_BIT_R7 =/= 0 -> [r7 | mask_to_list_r6(Mask)]; -mask_to_list_r7(Mask) -> mask_to_list_r6(Mask). -mask_to_list_r6(Mask) when Mask band ?REG_BIT_R6 =/= 0 -> [r6 | mask_to_list_r5(Mask)]; -mask_to_list_r6(Mask) -> mask_to_list_r5(Mask). -mask_to_list_r5(Mask) when Mask band ?REG_BIT_R5 =/= 0 -> [r5 | mask_to_list_r4(Mask)]; -mask_to_list_r5(Mask) -> mask_to_list_r4(Mask). -mask_to_list_r4(Mask) when Mask band ?REG_BIT_R4 =/= 0 -> [r4 | mask_to_list_r3(Mask)]; -mask_to_list_r4(Mask) -> mask_to_list_r3(Mask). -mask_to_list_r3(Mask) when Mask band ?REG_BIT_R3 =/= 0 -> [r3 | mask_to_list_r1(Mask)]; -mask_to_list_r3(Mask) -> mask_to_list_r1(Mask). -mask_to_list_r1(Mask) when Mask band ?REG_BIT_R1 =/= 0 -> [r1]; -mask_to_list_r1(_Mask) -> []. - -args_regs(Args) -> - lists:map( - fun - ({free, {ptr, Reg}}) -> Reg; - ({free, Reg}) when is_atom(Reg) -> Reg; - ({free, Imm}) when is_integer(Imm) -> imm; - (offset) -> imm; - (ctx) -> ?CTX_REG; - (jit_state) -> jit_state; - (jit_state_tail_call) -> jit_state; - (stack) -> stack; - (Reg) when is_atom(Reg) -> Reg; - (Imm) when is_integer(Imm) -> imm; - ({ptr, Reg}) -> Reg; - ({x_reg, _}) -> ?CTX_REG; - ({y_reg, _}) -> ?CTX_REG; - ({fp_reg, _}) -> ?CTX_REG; - ({free, {x_reg, _}}) -> ?CTX_REG; - ({free, {y_reg, _}}) -> ?CTX_REG; - ({free, {fp_reg, _}}) -> ?CTX_REG; - ({avm_int64_t, _}) -> imm - end, - Args - ). - %%----------------------------------------------------------------------------- %% @doc Add a label at the current offset. Eventually align it with a nop. %% @end diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index b3555042b1..5b74cc9409 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -163,8 +163,6 @@ offset :: non_neg_integer(), branches :: #{integer() | reference() => [{non_neg_integer(), branch_type()}]}, jump_table_start :: non_neg_integer(), - available_regs :: non_neg_integer(), - used_regs :: non_neg_integer(), labels :: #{integer() | reference() => integer()}, variant :: non_neg_integer(), literal_pool :: [{non_neg_integer(), armv6m_register(), non_neg_integer()}], @@ -306,12 +304,10 @@ new(Variant, StreamModule, Stream) -> branches = #{}, jump_table_start = 0, offset = StreamModule:offset(Stream), - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, labels = #{}, variant = Variant, literal_pool = [], - regs = jit_regs:new(), + regs = jit_regs:new(?AVAILABLE_REGS_MASK, 0), thumb2 = (Variant band ?JIT_VARIANT_THUMB2) =/= 0 }. @@ -357,69 +353,14 @@ debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:bkpt(0)), State#state{stream = Stream1}. -%%----------------------------------------------------------------------------- -%% @doc Return the list of currently used native registers. This is used for -%% debugging and not in production. -%% @end -%% @param State current backend state -%% @return The list of used registers -%%----------------------------------------------------------------------------- --spec used_regs(state()) -> [armv6m_register()]. -used_regs(#state{used_regs = Used}) -> mask_to_list(Used). - -%%----------------------------------------------------------------------------- -%% @doc Return the list of currently available native scratch registers. This -%% is used for debugging and not in production. -%% @end -%% @param State current backend state -%% @return The list of available registers -%%----------------------------------------------------------------------------- --spec available_regs(state()) -> [armv6m_register()]. -available_regs(#state{available_regs = Available}) -> mask_to_list(Available). - -%%----------------------------------------------------------------------------- -%% @doc Free native registers. The passed list of registers can contain -%% registers, pointer to registers or other values that are ignored. -%% @end -%% @param State current backend state -%% @param Regs list of registers or other values -%% @return The updated backend state -%%----------------------------------------------------------------------------- --spec free_native_registers(state(), [value()]) -> state(). -free_native_registers(State, []) -> - State; -free_native_registers(State, [Reg | Rest]) -> - State1 = free_native_register(State, Reg), - free_native_registers(State1, Rest). - --spec free_native_register(state(), value()) -> state(). -free_native_register( - #state{available_regs = Available0, used_regs = Used0} = State, - Reg -) when - is_atom(Reg) --> - Bit = reg_bit(Reg), - State#state{ - available_regs = Available0 bor Bit, used_regs = Used0 band (bnot Bit) - }; -free_native_register(State, {ptr, Reg}) -> - free_native_register(State, Reg); -free_native_register(State, _Other) -> - State. - -%%----------------------------------------------------------------------------- -%% @doc Assert that all native scratch registers are available. This is used -%% for debugging and not in production. -%% @end -%% @param State current backend state -%% @return ok -%%----------------------------------------------------------------------------- --spec assert_all_native_free(state()) -> ok. -assert_all_native_free(State) -> - 0 = State#state.used_regs, - ?AVAILABLE_REGS_MASK = State#state.available_regs, - ok. +%% Native-register allocation bookkeeping (used_regs/1, available_regs/1, +%% free_native_registers/2, free_native_register/2, assert_all_native_free/1, +%% first_avail/1, mask_to_list/1, args_regs/1, prepare_call_scratch/1) is shared +%% across the register-based backends and flows through jit_regs. +-define(FIRST_AVAIL_REGS, [r7, r6, r5, r4, r3, r1]). +-define(MASK_TO_LIST_REGS, ?FIRST_AVAIL_REGS). +-define(JITSTATE_ARG_REG, jit_state). +-include("jit_backend_regs_impl.hrl"). %%----------------------------------------------------------------------------- %% @doc Emit the jump table at the beginning of the module. Branches will be @@ -677,33 +618,39 @@ load_primitive_ptr(Primitive, TargetReg) -> %%----------------------------------------------------------------------------- -spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), armv6m_register()}. call_primitive( + #state{regs = Regs0} = State, + Primitive, + Args +) -> + Available = jit_regs:available_regs(Regs0), + call_primitive_impl(Available, State, Primitive, Args). + +call_primitive_impl(0, State, Primitive, Args) -> + call_func_ptr(State, {primitive, Primitive}, Args); +call_primitive_impl( + Available, #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = Used + regs = Regs0 } = State, Primitive, Args -) when Available =/= 0 -> +) -> % Use a low register for LDR since ARM Thumb LDR only works with low registers TempReg = first_avail(Available), TempBit = reg_bit(TempReg), PrepCall = load_primitive_ptr(Primitive, TempReg), Stream1 = StreamModule:append(Stream0, PrepCall), + Regs1 = jit_regs:alloc_reg( + jit_regs:invalidate_reg(Regs0, TempReg), + TempBit + ), StateCall = State#state{ stream = Stream1, - available_regs = Available band (bnot TempBit), - used_regs = Used bor TempBit, - regs = jit_regs:invalidate_reg(State#state.regs, TempReg) + regs = Regs1 }, - call_func_ptr(StateCall, {free, TempReg}, Args); -call_primitive( - #state{available_regs = 0} = State, - Primitive, - Args -) -> - call_func_ptr(State, {primitive, Primitive}, Args). + call_func_ptr(StateCall, {free, TempReg}, Args). %%----------------------------------------------------------------------------- %% @doc Emit a jump (call without return) to a primitive with arguments. This @@ -726,23 +673,18 @@ call_primitive_last( % We need a register for the function pointer that should not be used as a parameter % Since we're not returning, we can use all scratch registers except % registers used for parameters - ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), - ArgsRegs = args_regs(Args), - ParamMask = jit_regs:regs_to_mask(ParamRegs, fun reg_bit/1), - ArgsMask = jit_regs:regs_to_mask(ArgsRegs, fun reg_bit/1), - ScratchMask = ?AVAILABLE_REGS_MASK band (bnot (ArgsMask bor ParamMask)), - Temp = first_avail(ScratchMask), - TempBit = reg_bit(Temp), - AvailableRegs1 = ScratchMask band (bnot TempBit), - UsedRegs = ?AVAILABLE_REGS_MASK band (bnot AvailableRegs1), + #{temp := Temp, available_mask := AvailableRegs1, used_mask := UsedRegs} = + prepare_call_scratch(Args), PrepCall = load_primitive_ptr(Primitive, Temp), Stream1 = StreamModule:append(Stream0, PrepCall), State1 = State0#state{ stream = Stream1, - available_regs = AvailableRegs1, - used_regs = UsedRegs, - regs = jit_regs:invalidate_reg(State0#state.regs, Temp) + regs = jit_regs:set_masks( + jit_regs:invalidate_reg(State0#state.regs, Temp), + AvailableRegs1, + UsedRegs + ) }, % Preprocess offset special arg @@ -786,9 +728,11 @@ call_primitive_last( tail_call_with_jit_state_registers_only(State2, Temp) end, State5 = State4#state{ - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, - regs = jit_regs:unreachable(State4#state.regs) + regs = jit_regs:set_masks( + jit_regs:unreachable(State4#state.regs), + ?AVAILABLE_REGS_MASK, + 0 + ) }, flush_literal_pool(State5). @@ -843,8 +787,6 @@ return_if_not_equal_to_ctx( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, {free, Reg} @@ -863,9 +805,7 @@ return_if_not_equal_to_ctx( RegBit = reg_bit(Reg), State#state{ stream = Stream1, - available_regs = AvailableRegs0 bor RegBit, - used_regs = UsedRegs0 band (bnot RegBit), - regs = Regs0 + regs = jit_regs:free_reg(Regs0, RegBit) }. %%----------------------------------------------------------------------------- @@ -911,11 +851,12 @@ jump_to_continuation( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, + regs = Regs0, offset = BaseOffset } = State0, {free, OffsetReg} ) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), % ARM v6-M PIC implementation using one temp register: % 1. Use ADR to get PC into temp register @@ -961,9 +902,11 @@ jump_to_continuation( % Free all registers as this is a terminal instruction State2 = State1#state{ stream = Stream2, - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, - regs = jit_regs:unreachable(State1#state.regs) + regs = jit_regs:set_masks( + jit_regs:unreachable(State1#state.regs), + ?AVAILABLE_REGS_MASK, + 0 + ) }, flush_literal_pool(State2). @@ -979,8 +922,10 @@ branch_to_offset_code(_State, Offset, TargetOffset) when Rel = TargetOffset - Offset, jit_armv6m_asm:b(Rel); branch_to_offset_code( - #state{available_regs = Available}, Offset, TargetOffset -) when Available =/= 0 -> + #state{regs = Regs}, Offset, TargetOffset +) -> + Available = jit_regs:available_regs(Regs), + Available =/= 0 orelse error(no_available_registers), TempReg = first_avail(Available), % Far branch: use register-based sequence, need temporary register if @@ -1014,41 +959,10 @@ branch_to_label_code( BrEntry = {Offset, b_w}, State1 = State0#state{branches = Branches#{Label => [BrEntry | maps:get(Label, Branches, [])]}}, {State1, CodeBlock}; -branch_to_label_code( - #state{available_regs = Available, branches = Branches} = State0, Offset, Label, false -) when Available =/= 0 -> - TempReg = first_avail(Available), - SequenceSize = - if - Offset rem 4 =:= 0 -> - % Aligned - I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), - I2 = jit_armv6m_asm:add(TempReg, pc), - I3 = jit_armv6m_asm:bx(TempReg), - % Unaligned : need nop - I4 = jit_armv6m_asm:nop(), - % Placeholder offset - I5 = <<0:32/little>>, - Seq = <>, - byte_size(Seq); - true -> - % Unaligned - I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), - I2 = jit_armv6m_asm:add(TempReg, pc), - I3 = jit_armv6m_asm:bx(TempReg), - % Placeholder offset - I4 = <<0:32/little>>, - Seq = <>, - byte_size(Seq) - end, - % Add relocation entry - CodeBlock = binary:copy(<<16#FF>>, SequenceSize), - BrEntry = {Offset, {far_branch, SequenceSize, TempReg}}, - State1 = State0#state{branches = Branches#{Label => [BrEntry | maps:get(Label, Branches, [])]}}, - {State1, CodeBlock}; -branch_to_label_code( - #state{available_regs = 0, branches = Branches} = State0, Offset, Label, false -) -> +branch_to_label_code(#state{regs = Regs} = State0, Offset, Label, false) -> + branch_to_label_code_impl(jit_regs:available_regs(Regs), State0, Offset, Label). + +branch_to_label_code_impl(0, #state{branches = Branches} = State0, Offset, Label) -> SequenceSize = if Offset rem 4 =/= 0 -> @@ -1088,8 +1002,36 @@ branch_to_label_code( BrEntry = {Offset, {far_branch, SequenceSize, ?IP_REG}}, State1 = State0#state{branches = Branches#{Label => [BrEntry | maps:get(Label, Branches, [])]}}, {State1, CodeBlock}; -branch_to_label_code(#state{available_regs = 0}, _Offset, _Label, _LabelLookup) -> - error({no_available_registers, _LabelLookup}). +branch_to_label_code_impl(Available, #state{branches = Branches} = State0, Offset, Label) -> + TempReg = first_avail(Available), + SequenceSize = + if + Offset rem 4 =:= 0 -> + % Aligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + % Unaligned : need nop + I4 = jit_armv6m_asm:nop(), + % Placeholder offset + I5 = <<0:32/little>>, + Seq = <>, + byte_size(Seq); + true -> + % Unaligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + % Placeholder offset + I4 = <<0:32/little>>, + Seq = <>, + byte_size(Seq) + end, + % Add relocation entry + CodeBlock = binary:copy(<<16#FF>>, SequenceSize), + BrEntry = {Offset, {far_branch, SequenceSize, TempReg}}, + State1 = State0#state{branches = Branches#{Label => [BrEntry | maps:get(Label, Branches, [])]}}, + {State1, CodeBlock}. %%----------------------------------------------------------------------------- %% @doc Emit an if block, i.e. emit a test of a condition and conditionnally @@ -1127,9 +1069,10 @@ if_block( Stream2, Replacements ), - State3 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State3#state{regs = MergedRegs}; + MergedRegs = jit_regs:merge( + State1#state.regs, State2#state.regs, ?AVAILABLE_REGS_MASK + ), + State2#state{stream = Stream3, regs = MergedRegs}; if_block( #state{stream_module = StreamModule, stream = Stream0} = State0, Cond, @@ -1144,9 +1087,10 @@ if_block( BranchOffset = OffsetAfter - (Offset + BranchInstrOffset), NewBranchInstr = rewrite_cond_branch(CC, BranchOffset), Stream3 = StreamModule:replace(Stream2, Offset + BranchInstrOffset, NewBranchInstr), - State3 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State3#state{regs = MergedRegs}. + MergedRegs = jit_regs:merge( + State1#state.regs, State2#state.regs, ?AVAILABLE_REGS_MASK + ), + State2#state{stream = Stream3, regs = MergedRegs}. %%----------------------------------------------------------------------------- %% @doc Emit an if else block, i.e. emit a test of a condition and @@ -1184,8 +1128,6 @@ if_else_block( %% Build the else block StateElse = State2#state{ stream = Stream4, - used_regs = State1#state.used_regs, - available_regs = State1#state.available_regs, regs = State1#state.regs }, State3 = BlockFalseFn(StateElse), @@ -1195,9 +1137,10 @@ if_else_block( FinalJumpOffset = OffsetFinal - ElseJumpOffset, NewElseJumpInstr = jit_armv6m_asm:b(FinalJumpOffset), Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), - State4 = merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs), - MergedRegs = jit_regs:merge(State2#state.regs, State3#state.regs), - State4#state{regs = MergedRegs}. + MergedRegs = jit_regs:merge( + State2#state.regs, State3#state.regs, ?AVAILABLE_REGS_MASK + ), + State3#state{stream = Stream6, regs = MergedRegs}. %% @private %% Regenerate the conditional branch that skips an if-block, given the patched @@ -1240,9 +1183,11 @@ if_block_cond( {State2, le, byte_size(I1)}; %% Handle {Val, '<', Reg} for values > 255, need to load into temp register if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {Val, '<', RegOrTuple} -) when is_integer(Val), Available =/= 0 -> +) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), + Available =/= 0 orelse error(no_available_registers), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -1297,9 +1242,11 @@ if_block_cond( State2 = State1#state{stream = Stream1}, {State2, CC, byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '<', Val} -) when is_integer(Val), Available =/= 0 -> +) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), + Available =/= 0 orelse error(no_available_registers), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -1320,9 +1267,11 @@ if_block_cond( State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, CC, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, available_regs = Available} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, {Val, '<', RegOrTuple} -) when is_integer(Val), Available =/= 0 -> +) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), + Available =/= 0 orelse error(no_available_registers), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -1460,9 +1409,11 @@ if_block_cond( State3 = if_block_free_reg({free, RegB}, State2), {State3, CC, byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '==', Val} -) when is_integer(Val), Available =/= 0 -> +) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), + Available =/= 0 orelse error(no_available_registers), Temp = first_avail(Available), Offset0 = StreamModule:offset(Stream0), Reg = @@ -1482,9 +1433,11 @@ if_block_cond( State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, CC, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '!=', Val} -) when is_integer(Val), Available =/= 0 -> +) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), + Available =/= 0 orelse error(no_available_registers), Temp = first_avail(Available), Offset0 = StreamModule:offset(Stream0), Reg = @@ -1507,10 +1460,12 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available + regs = Regs0 } = State0, {'(bool)', RegOrTuple, '==', false} -) when Available =/= 0 -> +) -> + Available = jit_regs:available_regs(Regs0), + Available =/= 0 orelse error(no_available_registers), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -1531,10 +1486,12 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available + regs = Regs0 } = State0, {'(bool)', RegOrTuple, '!=', false} -) when Available =/= 0 -> +) -> + Available = jit_regs:available_regs(Regs0), + Available =/= 0 orelse error(no_available_registers), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -1555,10 +1512,12 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available + regs = Regs0 } = State0, {RegOrTuple, '&', Val, '!=', 0} -) when Available =/= 0 -> +) -> + Available = jit_regs:available_regs(Regs0), + Available =/= 0 orelse error(no_available_registers), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -1591,10 +1550,12 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available + regs = Regs0 } = State0, {Reg, '&', 16#F, '!=', 16#F} -) when ?IS_GPR(Reg), Available =/= 0 -> +) when ?IS_GPR(Reg) -> + Available = jit_regs:available_regs(Regs0), + Available =/= 0 orelse error(no_available_registers), Temp = first_avail(Available), % Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG I1 = jit_armv6m_asm:mvns(Temp, Reg), @@ -1628,10 +1589,12 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available + regs = Regs0 } = State0, {Reg, '&', Mask, '!=', Val} -) when ?IS_GPR(Reg), Available =/= 0 -> +) when ?IS_GPR(Reg) -> + Available = jit_regs:available_regs(Regs0), + Available =/= 0 orelse error(no_available_registers), Temp = first_avail(Available), TempBit = reg_bit(Temp), AT = Available band (bnot TempBit), @@ -1640,7 +1603,9 @@ if_block_cond( I1 = jit_armv6m_asm:mov(Temp, Reg), Stream1 = StreamModule:append(Stream0, I1), State1 = State0#state{stream = Stream1}, - {State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask), + {State2, Temp} = and_( + State1#state{regs = jit_regs:set_available_regs(Regs0, AT)}, {free, Temp}, Mask + ), Stream2 = State2#state.stream, % Compare with value I2 = jit_armv6m_asm:cmp(Temp, Val), @@ -1650,9 +1615,10 @@ if_block_cond( ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), Stream4 = StreamModule:append(Stream3, <<16#FFFF:16>>), Regs3 = jit_regs:invalidate_reg(State2#state.regs, Temp), - State3 = State2#state{ - stream = Stream4, available_regs = State2#state.available_regs bor TempBit, regs = Regs3 - }, + Regs4 = jit_regs:set_available_regs( + Regs3, jit_regs:available_regs(State2#state.regs) bor TempBit + ), + State3 = State2#state{stream = Stream4, regs = Regs4}, {State3, CC, OffsetAfter - OffsetBefore}; if_block_cond( #state{ @@ -1678,14 +1644,9 @@ if_block_cond( -spec if_block_free_reg(armv6m_register() | {free, armv6m_register()}, state()) -> state(). if_block_free_reg({free, Reg}, State0) -> - #state{available_regs = AvR0, used_regs = UR0} = State0, + #state{regs = Regs0} = State0, Bit = reg_bit(Reg), - AvR1 = AvR0 bor Bit, - UR1 = UR0 band (bnot Bit), - State0#state{ - available_regs = AvR1, - used_regs = UR1 - }; + State0#state{regs = jit_regs:free_reg(Regs0, Bit)}; if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> State0. @@ -1702,12 +1663,6 @@ bit_test_optimization(16#F) -> {low_bits_mask, 4}; bit_test_optimization(16#3F) -> {low_bits_mask, 6}; bit_test_optimization(_) -> no_optimization. --spec merge_used_regs(state(), non_neg_integer()) -> state(). -merge_used_regs(#state{used_regs = UR} = State, OtherUR) -> - MergedUR = UR bor OtherUR, - MergedAvail = ?AVAILABLE_REGS_MASK band (bnot MergedUR), - State#state{used_regs = MergedUR, available_regs = MergedAvail}. - %%----------------------------------------------------------------------------- %% @doc Emit a shift register right by a fixed number of bits, effectively %% dividing it by 2^Shift @@ -1731,8 +1686,6 @@ shift_right( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UR, regs = Regs0 } = State, Reg, @@ -1740,16 +1693,18 @@ shift_right( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), Bit = reg_bit(ResultReg), I = jit_armv6m_asm:lsrs(ResultReg, Reg, Shift), Stream1 = StreamModule:append(Stream0, I), - Regs1 = jit_regs:invalidate_reg(Regs0, ResultReg), + Regs1 = jit_regs:alloc_reg( + jit_regs:invalidate_reg(Regs0, ResultReg), + Bit + ), { State#state{ stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = UR bor Bit, regs = Regs1 }, ResultReg @@ -1770,8 +1725,6 @@ shift_right_arith( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UR, regs = Regs0 } = State, Reg, @@ -1779,16 +1732,18 @@ shift_right_arith( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), Bit = reg_bit(ResultReg), I = jit_armv6m_asm:asrs(ResultReg, Reg, Shift), Stream1 = StreamModule:append(Stream0, I), - Regs1 = jit_regs:invalidate_reg(Regs0, ResultReg), + Regs1 = jit_regs:alloc_reg( + jit_regs:invalidate_reg(Regs0, ResultReg), + Bit + ), { State#state{ stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = UR bor Bit, regs = Regs1 }, ResultReg @@ -1827,12 +1782,13 @@ call_func_ptr( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0Mask, - used_regs = UsedRegs0Mask + regs = Regs0 } = State0, FuncPtrTuple, Args ) -> + AvailableRegs0Mask = jit_regs:available_regs(Regs0), + UsedRegs0Mask = jit_regs:used_regs(Regs0), FreeRegs = lists:flatmap( fun ({free, {ptr, Reg}}) -> [Reg]; @@ -1899,8 +1855,11 @@ call_func_ptr( % and the currently available registers to push values to the stack. SetArgsPushStackMask = (UsedRegs1Mask band (bnot AllArgsRegsMask)) bor AvailableRegs0Mask, State1 = State0#state{ - available_regs = SetArgsPushStackMask, - used_regs = ?AVAILABLE_REGS_MASK band (bnot SetArgsPushStackMask), + regs = jit_regs:set_masks( + Regs0, + SetArgsPushStackMask, + ?AVAILABLE_REGS_MASK band (bnot SetArgsPushStackMask) + ), stream = Stream1 }, State2 = @@ -1910,7 +1869,7 @@ call_func_ptr( [Arg5, Args6] -> set_stack_args(State1, Arg5, Args6) end, - SetArgsMask = State2#state.available_regs, + SetArgsMask = jit_regs:available_regs(State2#state.regs), ParameterRegs = parameter_regs(RegArgs0), ParamMask = regs_to_mask(ParameterRegs), RegArgsRegsMask = regs_to_mask(RegArgsRegs), @@ -1969,8 +1928,11 @@ call_func_ptr( end, State3 = State2#state{ - available_regs = SetArgsAvailMask, - used_regs = ?AVAILABLE_REGS_MASK band (bnot SetArgsAvailMask), + regs = jit_regs:set_masks( + State2#state.regs, + SetArgsAvailMask, + ?AVAILABLE_REGS_MASK band (bnot SetArgsAvailMask) + ), stream = Stream3 }, @@ -2049,12 +2011,14 @@ call_func_ptr( ResultRegBit = reg_bit(ResultReg), AvailableRegs3Mask = (AvailableRegs1Mask band (bnot ResultRegBit)) band ?AVAILABLE_REGS_MASK, - Regs1 = jit_regs:invalidate_all(State0#state.regs), + Regs1 = jit_regs:set_masks( + jit_regs:invalidate_all(State0#state.regs), + AvailableRegs3Mask, + UsedRegs2Mask + ), { State4#state{ stream = Stream8, - available_regs = AvailableRegs3Mask, - used_regs = UsedRegs2Mask, regs = Regs1 }, ResultReg @@ -2143,11 +2107,12 @@ set_registers_args(State0, Args, StackOffset) -> set_registers_args(State0, Args, ParamRegs, StackOffset). set_registers_args( - #state{used_regs = UsedRegsMask} = State0, + #state{regs = Regs0} = State0, Args, ParamRegs, StackOffset ) -> + UsedRegsMask = jit_regs:used_regs(Regs0), ArgsRegs = args_regs(Args), ParamMask = regs_to_mask(ParamRegs), ArgsMask = regs_to_mask(ArgsRegs), @@ -2169,8 +2134,11 @@ set_registers_args( ), State1#state{ stream = Stream1, - available_regs = ?AVAILABLE_REGS_MASK band (bnot (ParamMask bor NewUsedMask)), - used_regs = ParamMask bor NewUsedMask + regs = jit_regs:set_masks( + State1#state.regs, + ?AVAILABLE_REGS_MASK band (bnot (ParamMask bor NewUsedMask)), + ParamMask bor NewUsedMask + ) }. parameter_regs(Args) -> @@ -2287,11 +2255,12 @@ set_registers_args1( Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; set_registers_args1( - #state{available_regs = AvailRegs} = State, + #state{regs = Regs} = State, {y_reg, X}, Reg, _StackOffset ) -> + AvailRegs = jit_regs:available_regs(Regs), ldr_y_reg(State, Reg, X, AvailRegs); set_registers_args1( #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset @@ -2345,10 +2314,11 @@ move_to_vm_register_emit(State0, Src, {ptr, Reg}) when is_atom(Src) -> Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State0#state{stream = Stream1}; move_to_vm_register_emit( - #state{available_regs = Avail, regs = Regs0} = State0, Src, {y_reg, Y} + #state{regs = Regs0} = State0, Src, {y_reg, Y} ) when is_atom(Src) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp1)), Code = str_y_reg(Src, Y, Temp1, AT), @@ -2363,10 +2333,11 @@ move_to_vm_register_emit( State0#state{stream = Stream1, regs = Regs2}; % Source is an integer to y_reg (optimized: ldr first, then movs) move_to_vm_register_emit( - #state{available_regs = Avail, regs = Regs0} = State0, N, {y_reg, Y} + #state{regs = Regs0} = State0, N, {y_reg, Y} ) when is_integer(N), N >= 0, N =< 255 -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Avail1 = Avail band (bnot reg_bit(Temp1)), Temp2 = first_avail(Avail1), @@ -2383,63 +2354,75 @@ move_to_vm_register_emit( end, State0#state{stream = Stream1, regs = Regs2}; % Source is an integer (0-255 for movs, negative values need different handling) -move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when +move_to_vm_register_emit(State0, N, Dest) when is_integer(N), N >= 0, N =< 255 -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - I1 = jit_armv6m_asm:movs(Temp, N), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {imm, N}), - State1#state{available_regs = AR0, regs = Regs1}; + with_temp( + State0, + fun(StateT, Temp, _AT) -> + I1 = jit_armv6m_asm:movs(Temp, N), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, I1), + move_to_vm_register(StateT#state{stream = Stream1}, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {imm, N}) end + ); %% Handle large values using simple literal pool (branch-over pattern) -move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when +move_to_vm_register_emit(State0, N, Dest) when is_integer(N) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), - State2 = move_to_vm_register(State1, Temp, Dest), - Regs1 = jit_regs:set_contents(State2#state.regs, Temp, {imm, N}), - State2#state{available_regs = AR0, regs = Regs1}; + with_temp( + State0, + fun(StateT, Temp, _AT) -> + State1 = mov_immediate(StateT, Temp, N), + move_to_vm_register(State1, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {imm, N}) end + ); % Source is a VM register -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, extra}, Dest) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(?MAX_REG)), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, ?MAX_REG}), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, X}, Dest) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(X)), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, X}), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - I1 = jit_armv6m_asm:ldr(Temp, {Reg, 0}), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {y_reg, Y}, Dest) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - State0a = ldr_y_reg(State0#state{available_regs = AT}, Temp, Y, AT), - State1 = move_to_vm_register(State0a, Temp, Dest), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {y_reg, Y}), - State1#state{available_regs = AR0, regs = Regs1}; +move_to_vm_register_emit(State0, {x_reg, extra}, Dest) -> + with_temp( + State0, + fun(StateT, Temp, _AT) -> + I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(?MAX_REG)), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, I1), + move_to_vm_register(StateT#state{stream = Stream1}, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {x_reg, ?MAX_REG}) end + ); +move_to_vm_register_emit(State0, {x_reg, X}, Dest) -> + with_temp( + State0, + fun(StateT, Temp, _AT) -> + I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(X)), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, I1), + move_to_vm_register(StateT#state{stream = Stream1}, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {x_reg, X}) end + ); +move_to_vm_register_emit(State0, {ptr, Reg}, Dest) -> + with_temp( + State0, + fun(StateT, Temp, _AT) -> + I1 = jit_armv6m_asm:ldr(Temp, {Reg, 0}), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, I1), + move_to_vm_register(StateT#state{stream = Stream1}, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:invalidate_reg(Regs, Temp) end + ); +move_to_vm_register_emit(State0, {y_reg, Y}, Dest) -> + with_temp( + State0, + fun(StateT, Temp, AT) -> + State0a = ldr_y_reg(StateT, Temp, Y, AT), + move_to_vm_register(State0a, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {y_reg, Y}) end + ); % term_to_float move_to_vm_register_emit( #state{ stream_module = StreamModule, - available_regs = Avail, + regs = Regs0, stream = Stream0, variant = Variant } = @@ -2447,6 +2430,7 @@ move_to_vm_register_emit( {free, {ptr, Reg, 1}}, {fp_reg, F} ) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), I1 = jit_armv6m_asm:ldr(Temp1, ?FP_REGS), @@ -2468,6 +2452,19 @@ move_to_vm_register_emit( Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Temp1), Temp2), State1#state{stream = Stream1, regs = Regs1}. +-spec with_temp( + state(), + fun((state(), armv6m_register(), non_neg_integer()) -> state()), + fun((jit_regs:regs(), armv6m_register()) -> jit_regs:regs()) +) -> state(). +with_temp(#state{regs = Regs0} = State0, EmitFun, ContentsFun) -> + AR0 = jit_regs:available_regs(Regs0), + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), + State1 = EmitFun(State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, AT), + Regs1 = jit_regs:set_available_regs(ContentsFun(State1#state.regs, Temp), AR0), + State1#state{regs = Regs1}. + %%----------------------------------------------------------------------------- %% @doc Emit a move of an array element (reg[x]) to a vm or a native register. %% @end @@ -2484,12 +2481,13 @@ move_to_vm_register_emit( vm_register() | armv6m_register() ) -> state(). move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}), I2 = jit_armv6m_asm:str(Temp, ?X_REG(X)), @@ -2498,12 +2496,13 @@ move_array_element( Regs2 = jit_regs:set_contents(Regs1, Temp, {x_reg, X}), State#state{stream = Stream1, regs = Regs2}; move_array_element( - #state{stream_module = StreamModule, available_regs = Avail} = + #state{stream_module = StreamModule, regs = Regs0} = State, Reg, Index, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), % For large offsets, use max offset (124) in ldr + remainder in temp register @@ -2525,12 +2524,13 @@ move_array_element( Regs3 = jit_regs:set_contents(Regs2, Temp2, {x_reg, X}), State1#state{stream = Stream2, regs = Regs3}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {ptr, Dest} ) when is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}), I2 = jit_armv6m_asm:str(Temp, {Dest, 0}), @@ -2538,12 +2538,13 @@ move_array_element( Regs1 = jit_regs:invalidate_reg(Regs0, Temp), State#state{stream = Stream1, regs = Regs1}; move_array_element( - #state{stream_module = StreamModule, available_regs = Avail} = + #state{stream_module = StreamModule, regs = Regs0} = State, Reg, Index, {ptr, Dest} ) when is_atom(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), % For large offsets, use max offset (124) in ldr + remainder in temp register Offset = Index * 4, @@ -2559,12 +2560,13 @@ move_array_element( Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), State1#state{stream = Stream2, regs = Regs1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {y_reg, Y} ) when is_atom(Reg) andalso is_integer(Index) andalso Index * 4 =< 124 -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Avail1 = Avail band (bnot reg_bit(Temp1)), Temp2 = first_avail(Avail1), @@ -2579,13 +2581,14 @@ move_array_element( State#state{stream = Stream1, regs = Regs3}; move_array_element( #state{ - stream_module = StreamModule, available_regs = Avail + stream_module = StreamModule, regs = Regs0 } = State, Reg, Index, {y_reg, Y} ) when is_atom(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Avail1 = Avail band (bnot reg_bit(Temp1)), Temp2 = first_avail(Avail1), @@ -2606,12 +2609,13 @@ move_array_element( Regs3 = jit_regs:invalidate_reg(Regs2, Temp2), State1#state{stream = Stream2, regs = Regs3}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {free, Reg}, Index, {y_reg, Y} ) when is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), I1 = jit_armv6m_asm:ldr(Reg, {Reg, Index * 4}), @@ -2633,8 +2637,6 @@ move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, @@ -2645,14 +2647,13 @@ move_array_element( I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), I3 = jit_armv6m_asm:str(IndexReg, ?X_REG(X)), Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_vm_loc(Regs0, {x_reg, X}), - Regs2 = jit_regs:invalidate_reg(Regs1, IndexReg), + Regs2 = jit_regs:free_reg( + jit_regs:invalidate_reg(Regs1, IndexReg), + Bit + ), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, regs = Regs2 }; @@ -2660,8 +2661,6 @@ move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, @@ -2672,13 +2671,12 @@ move_array_element( I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), I3 = jit_armv6m_asm:str(IndexReg, {PtrReg, 0}), Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), - Regs1 = jit_regs:invalidate_reg(Regs0, IndexReg), + Regs1 = jit_regs:free_reg( + jit_regs:invalidate_reg(Regs0, IndexReg), + Bit + ), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, regs = Regs1 }; @@ -2686,14 +2684,13 @@ move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, {free, IndexReg}, {y_reg, Y} ) when is_atom(IndexReg) -> + AvailableRegs0 = jit_regs:available_regs(Regs0), I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2), I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), Temp = first_avail(AvailableRegs0), @@ -2702,17 +2699,16 @@ move_array_element( Code = str_y_reg(IndexReg, Y, Temp, AT), I3 = Code, Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append( Stream0, <> ), Regs1 = jit_regs:invalidate_reg(Regs0, IndexReg), Regs2 = jit_regs:invalidate_vm_loc(Regs1, {y_reg, Y}), - Regs3 = jit_regs:invalidate_reg(Regs2, Temp), + Regs3 = jit_regs:free_reg( + jit_regs:invalidate_reg(Regs2, Temp), + Bit + ), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, regs = Regs3 }. @@ -2736,11 +2732,12 @@ get_array_element( get_array_element( #state{ stream_module = StreamModule, - available_regs = Avail + regs = Regs0 } = State, {free, Reg}, Index ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), % For large offsets, split into ldr immediate (max 124) + remainder in temp register Offset = Index * 4, @@ -2757,23 +2754,23 @@ get_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, Index ) when Index * 4 =< 124 -> + Avail = jit_regs:available_regs(Regs0), ElemReg = first_avail(Avail), ElemBit = reg_bit(ElemReg), I1 = jit_armv6m_asm:ldr(ElemReg, {Reg, Index * 4}), Stream1 = StreamModule:append(Stream0, <>), - Regs1 = jit_regs:invalidate_reg(Regs0, ElemReg), + Regs1 = jit_regs:alloc_reg( + jit_regs:invalidate_reg(Regs0, ElemReg), + ElemBit + ), { State#state{ stream = Stream1, - available_regs = Avail band (bnot ElemBit), - used_regs = UsedRegs0 bor ElemBit, regs = Regs1 }, ElemReg @@ -2781,12 +2778,12 @@ get_array_element( get_array_element( #state{ stream_module = StreamModule, - available_regs = Avail, - used_regs = UsedRegs0 + regs = Regs0 } = State, Reg, Index ) -> + Avail = jit_regs:available_regs(Regs0), ElemReg = first_avail(Avail), ElemBit = reg_bit(ElemReg), Avail1 = Avail band (bnot ElemBit), @@ -2801,12 +2798,13 @@ get_array_element( I2 = jit_armv6m_asm:ldr(ElemReg, {Temp, 124}), Stream2 = StreamModule:append(Stream1, <>), Regs1 = jit_regs:invalidate_reg(State1#state.regs, ElemReg), - Regs2 = jit_regs:invalidate_reg(Regs1, Temp), + Regs2 = jit_regs:alloc_reg( + jit_regs:invalidate_reg(Regs1, Temp), + ElemBit + ), { State1#state{ stream = Stream2, - available_regs = Avail1, - used_regs = UsedRegs0 bor ElemBit, regs = Regs2 }, ElemReg @@ -2826,11 +2824,12 @@ move_to_array_element( Stream1 = StreamModule:append(Stream0, I1), State0#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, available_regs = Avail} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, ValueReg, Reg, Index ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), % For large offsets, split into str immediate (max 124) + remainder in temp register Offset = Index * 4, @@ -2844,12 +2843,13 @@ move_to_array_element( Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), State1#state{stream = Stream2, regs = Regs1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, ValueReg, Reg, IndexReg ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_armv6m_asm:mov(Temp, IndexReg), I2 = jit_armv6m_asm:lsls(Temp, Temp, 2), @@ -2876,13 +2876,14 @@ move_to_array_element( ) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 -> move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8)); move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, ValueReg, BaseReg, IndexReg, Offset ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_armv6m_asm:adds(Temp, IndexReg, Offset), I2 = jit_armv6m_asm:lsls(Temp, Temp, 2), @@ -2898,7 +2899,7 @@ move_to_array_element( Offset ) -> {State1, ValueReg} = copy_to_native_register(State0, Value), - Temp = first_avail(State1#state.available_regs), + Temp = first_avail(jit_regs:available_regs(State1#state.regs)), I1 = jit_armv6m_asm:adds(Temp, IndexReg, Offset), I2 = jit_armv6m_asm:lsls(Temp, Temp, 2), I3 = jit_armv6m_asm:str(ValueReg, {BaseReg, Temp}), @@ -2917,16 +2918,17 @@ move_to_native_register(#state{regs = Regs} = State, Value) -> case Contents =/= unknown andalso jit_regs:find_reg_with_contents(Regs, Contents) of {ok, CachedReg} -> Bit = reg_bit(CachedReg), - case State#state.used_regs band Bit of + Used = jit_regs:used_regs(Regs), + Avail = jit_regs:available_regs(Regs), + case Used band Bit of 0 -> - case State#state.available_regs band Bit of + case Avail band Bit of 0 -> move_to_native_register_emit(State, Value, Contents); _ -> { State#state{ - used_regs = State#state.used_regs bor Bit, - available_regs = State#state.available_regs band (bnot Bit) + regs = jit_regs:alloc_reg(Regs, Bit) }, CachedReg } @@ -2942,13 +2944,12 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, cp, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), I1 = jit_armv6m_asm:ldr(Reg, ?CP), @@ -2957,9 +2958,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor Bit, - available_regs = Avail band (bnot Bit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }; @@ -2974,8 +2973,6 @@ move_to_native_register_emit( {State#state{stream = Stream1, regs = Regs1}, Reg}; move_to_native_register_emit( #state{ - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State0, Imm, @@ -2983,26 +2980,24 @@ move_to_native_register_emit( ) when is_integer(Imm) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), Regs1 = jit_regs:set_contents(Regs0, Reg, Contents), State1 = State0#state{ - used_regs = Used bor Bit, - available_regs = Avail band (bnot Bit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, {move_to_native_register(State1, Imm, Reg), Reg}; move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {x_reg, extra}, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), I1 = jit_armv6m_asm:ldr(Reg, ?X_REG(?MAX_REG)), @@ -3011,9 +3006,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor Bit, - available_regs = Avail band (bnot Bit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }; @@ -3021,8 +3014,6 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {x_reg, X}, @@ -3030,6 +3021,7 @@ move_to_native_register_emit( ) when X < ?MAX_REG -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), I1 = jit_armv6m_asm:ldr(Reg, ?X_REG(X)), @@ -3038,25 +3030,23 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor Bit, - available_regs = Avail band (bnot Bit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }; move_to_native_register_emit( #state{ - available_regs = Avail, - used_regs = Used + regs = Regs0 } = State, {y_reg, Y}, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), AvailT = Avail band (bnot Bit), State1 = ldr_y_reg( - State#state{available_regs = AvailT, used_regs = Used bor Bit}, + State#state{regs = jit_regs:alloc_reg(Regs0, Bit)}, Reg, Y, AvailT @@ -3070,25 +3060,27 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used + regs = Regs0 } = State, {fp_reg, F}, _Contents ) -> + Avail = jit_regs:available_regs(Regs0), RegA = first_avail(Avail), BitA = reg_bit(RegA), Avail1 = Avail band (bnot BitA), RegB = first_avail(Avail1), BitB = reg_bit(RegB), - AvailT = Avail1 band (bnot BitB), I1 = jit_armv6m_asm:ldr(RegB, ?FP_REGS), I2 = jit_armv6m_asm:ldr(RegA, {RegB, F * 8}), I3 = jit_armv6m_asm:ldr(RegB, {RegB, F * 8 + 4}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), { - State#state{stream = Stream1, available_regs = AvailT, used_regs = Used bor BitA bor BitB}, + State#state{ + stream = Stream1, + regs = jit_regs:alloc_reg(Regs0, BitA bor BitB) + }, {fp, RegA, RegB} }. @@ -3132,10 +3124,11 @@ move_to_native_register( Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, X}), State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{available_regs = AT} = State, + #state{regs = Regs} = State, {y_reg, Y}, RegDst ) -> + AT = jit_regs:available_regs(Regs), State1 = ldr_y_reg(State, RegDst, Y, AT), #state{regs = Regs0} = State1, Regs1 = jit_regs:set_contents(Regs0, RegDst, {y_reg, Y}), @@ -3160,12 +3153,11 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, Reg ) when is_atom(Reg) -> + Avail = jit_regs:available_regs(Regs0), SaveReg = first_avail(Avail), SaveBit = reg_bit(SaveReg), I1 = jit_armv6m_asm:mov(SaveReg, Reg), @@ -3175,9 +3167,7 @@ copy_to_native_register( { State#state{ stream = Stream1, - available_regs = Avail band (bnot SaveBit), - used_regs = Used bor SaveBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, SaveBit) }, SaveReg }; @@ -3185,12 +3175,11 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {ptr, Reg} ) when is_atom(Reg) -> + Avail = jit_regs:available_regs(Regs0), SaveReg = first_avail(Avail), SaveBit = reg_bit(SaveReg), I1 = jit_armv6m_asm:ldr(SaveReg, {Reg, 0}), @@ -3199,9 +3188,7 @@ copy_to_native_register( { State#state{ stream = Stream1, - available_regs = Avail band (bnot SaveBit), - used_regs = Used bor SaveBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, SaveBit) }, SaveReg }; @@ -3209,9 +3196,10 @@ copy_to_native_register(State, Reg) -> move_to_native_register(State, Reg). move_to_cp( - #state{available_regs = Avail} = State, + #state{regs = Regs0} = State, {y_reg, Y} ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), AvailT = Avail band (bnot reg_bit(Reg)), State1 = ldr_y_reg(State, Reg, Y, AvailT), @@ -3220,10 +3208,11 @@ move_to_cp( State1#state{stream = Stream1}. increment_sp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Offset ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), I1 = jit_armv6m_asm:ldr(Reg, ?Y_REGS), I2 = jit_armv6m_asm:adds(Reg, Offset * 4), @@ -3238,10 +3227,11 @@ set_continuation_to_label( stream_module = StreamModule, stream = Stream0, offset = JumpTableOffset, - available_regs = Avail + regs = Regs0 } = State, Label ) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), % Calculate jump table entry offset @@ -3284,11 +3274,11 @@ set_continuation_to_offset( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, branches = Branches, regs = Regs0 } = State ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), TempJitState = first_avail(Avail band (bnot reg_bit(Temp))), OffsetRef = make_ref(), @@ -3333,11 +3323,10 @@ get_module_index( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UsedRegs0, regs = Regs0 } = State ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), RegBit = reg_bit(Reg), Avail1 = Avail band (bnot RegBit), @@ -3353,9 +3342,7 @@ get_module_index( { State#state{ stream = Stream1, - available_regs = Avail1, - used_regs = UsedRegs0 bor RegBit, - regs = Regs2 + regs = jit_regs:alloc_reg(Regs2, RegBit) }, Reg }. @@ -3386,36 +3373,83 @@ and_( Regs1 = jit_regs:invalidate_reg(Regs0, Reg), {State0#state{stream = Stream1, regs = Regs1}, Reg}; and_( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{regs = Regs0} = State0, {free, Reg}, Val +) when is_integer(Val) -> + Avail = jit_regs:available_regs(Regs0), + and_imm_impl(Avail, State0, Reg, Val); +and_( + #state{stream_module = StreamModule, regs = Regs0} = + State0, + Reg, + ?TERM_PRIMARY_CLEAR_MASK +) -> + Avail = jit_regs:available_regs(Regs0), + ResultReg = first_avail(Avail), + ResultBit = reg_bit(ResultReg), + I1 = jit_armv6m_asm:lsrs(ResultReg, Reg, 2), + I2 = jit_armv6m_asm:lsls(ResultReg, ResultReg, 2), + Stream1 = StreamModule:append(State0#state.stream, <>), + Regs1 = jit_regs:invalidate_reg(Regs0, ResultReg), + { + State0#state{ + stream = Stream1, + regs = jit_regs:alloc_reg(Regs1, ResultBit) + }, + ResultReg + }. + +and_imm_impl( + Avail, + #state{stream_module = StreamModule, regs = Regs0} = State0, + Reg, + Val ) when Avail =/= 0 andalso Val < 0 andalso Val >= -256 -> Temp = first_avail(Avail), TempBit = reg_bit(Temp), AT = Avail band (bnot TempBit), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, bnot (Val) + ), Stream1 = State1#state.stream, I = jit_armv6m_asm:bics(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - {State1#state{available_regs = AT bor TempBit, stream = Stream2, regs = Regs1}, Reg}; -and_( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, - {free, Reg}, + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + { + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, AT bor TempBit) + }, + Reg + }; +and_imm_impl( + Avail, + #state{stream_module = StreamModule, regs = Regs0} = State0, + Reg, Val ) when Avail =/= 0 -> Temp = first_avail(Avail), TempBit = reg_bit(Temp), AT = Avail band (bnot TempBit), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_armv6m_asm:ands(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - {State1#state{available_regs = AT bor TempBit, stream = Stream2, regs = Regs1}, Reg}; -and_( - #state{stream_module = StreamModule, available_regs = 0} = State0, - {free, Reg}, + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + { + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, AT bor TempBit) + }, + Reg + }; +and_imm_impl( + 0, + #state{stream_module = StreamModule} = State0, + Reg, Val ) when Val < 0 andalso Val >= -256 -> % No available registers, use r0 as temp and save it to r12 @@ -3434,9 +3468,10 @@ and_( Stream4 = StreamModule:append(Stream3, Restore), Regs1 = jit_regs:invalidate_reg(State0#state.regs, Reg), {State0#state{stream = Stream4, regs = Regs1}, Reg}; -and_( - #state{stream_module = StreamModule, available_regs = 0, regs = Regs0} = State0, - {free, Reg}, +and_imm_impl( + 0, + #state{stream_module = StreamModule, regs = Regs0} = State0, + Reg, Val ) -> % No available registers, use r0 as temp and save it to r12 @@ -3454,28 +3489,7 @@ and_( Restore = jit_armv6m_asm:mov(r0, ?IP_REG), Stream4 = StreamModule:append(Stream3, Restore), Regs1 = jit_regs:invalidate_reg(Regs0, Reg), - {State0#state{stream = Stream4, regs = Regs1}, Reg}; -and_( - #state{stream_module = StreamModule, available_regs = Avail, used_regs = UR, regs = Regs0} = - State0, - Reg, - ?TERM_PRIMARY_CLEAR_MASK -) -> - ResultReg = first_avail(Avail), - ResultBit = reg_bit(ResultReg), - I1 = jit_armv6m_asm:lsrs(ResultReg, Reg, 2), - I2 = jit_armv6m_asm:lsls(ResultReg, ResultReg, 2), - Stream1 = StreamModule:append(State0#state.stream, <>), - Regs1 = jit_regs:invalidate_reg(Regs0, ResultReg), - { - State0#state{ - stream = Stream1, - available_regs = Avail band (bnot ResultBit), - used_regs = UR bor ResultBit, - regs = Regs1 - }, - ResultReg - }. + {State0#state{stream = Stream4, regs = Regs1}, Reg}. or_(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, Reg, SrcReg) when is_atom(SrcReg) @@ -3485,20 +3499,34 @@ or_(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State0#state{stream = Stream1, regs = Regs1}; or_( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{regs = Regs0} = State0, + Reg, + Val +) -> + or_imm_impl(jit_regs:available_regs(Regs0), State0, Reg, Val). + +or_imm_impl( + Avail, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) when Avail =/= 0 -> Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_armv6m_asm:orrs(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}; -or_( - #state{stream_module = StreamModule, available_regs = 0, regs = Regs0} = State0, + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + }; +or_imm_impl( + 0, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) -> @@ -3525,20 +3553,34 @@ xor_( Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State0#state{stream = Stream1, regs = Regs1}; xor_( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{regs = Regs0} = State0, + Reg, + Val +) -> + xor_imm_impl(jit_regs:available_regs(Regs0), State0, Reg, Val). + +xor_imm_impl( + Avail, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) when Avail =/= 0 -> Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_armv6m_asm:eors(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}; -xor_( - #state{stream_module = StreamModule, available_regs = 0, regs = Regs0} = State0, + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + }; +xor_imm_impl( + 0, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) -> @@ -3562,15 +3604,21 @@ add(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State Stream1 = StreamModule:append(Stream0, I), Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State0#state{stream = Stream1, regs = Regs1}; -add(#state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, Reg, Val) -> +add(#state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_armv6m_asm:adds(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + }. mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when Val >= 0 andalso Val =< 255 @@ -3740,23 +3788,29 @@ sub(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State Stream1 = StreamModule:append(Stream0, I1), Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State#state{stream = Stream1, regs = Regs1}; -sub(#state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, Reg, Val) -> +sub(#state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_armv6m_asm:subs(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + }. -spec mul(state(), armv6m_register(), integer() | armv6m_register()) -> state(). mul(State, _Reg, 1) -> State; mul(State, Reg, 2) -> shift_left(State, Reg, 1); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 3) -> - Temp = first_avail(Avail), +mul(#state{regs = Regs0} = State, Reg, 3) -> + Temp = first_avail(jit_regs:available_regs(Regs0)), I1 = jit_armv6m_asm:lsls(Temp, Reg, 1), I2 = jit_armv6m_asm:adds(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -3764,8 +3818,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 3) -> State#state{stream = Stream1, regs = Regs1}; mul(State, Reg, 4) -> shift_left(State, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 5) -> - Temp = first_avail(Avail), +mul(#state{regs = Regs0} = State, Reg, 5) -> + Temp = first_avail(jit_regs:available_regs(Regs0)), I1 = jit_armv6m_asm:lsls(Temp, Reg, 2), I2 = jit_armv6m_asm:adds(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -3774,8 +3828,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 5) -> mul(State0, Reg, 6) -> State1 = mul(State0, Reg, 3), mul(State1, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 7) -> - Temp = first_avail(Avail), +mul(#state{regs = Regs0} = State, Reg, 7) -> + Temp = first_avail(jit_regs:available_regs(Regs0)), I1 = jit_armv6m_asm:lsls(Temp, Reg, 3), I2 = jit_armv6m_asm:subs(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -3783,8 +3837,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 7) -> State#state{stream = Stream1, regs = Regs1}; mul(State, Reg, 8) -> shift_left(State, Reg, 3); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 9) -> - Temp = first_avail(Avail), +mul(#state{regs = Regs0} = State, Reg, 9) -> + Temp = first_avail(jit_regs:available_regs(Regs0)), I1 = jit_armv6m_asm:lsls(Temp, Reg, 3), I2 = jit_armv6m_asm:adds(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -3793,8 +3847,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 9) -> mul(State0, Reg, 10) -> State1 = mul(State0, Reg, 5), mul(State1, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 15) -> - Temp = first_avail(Avail), +mul(#state{regs = Regs0} = State, Reg, 15) -> + Temp = first_avail(jit_regs:available_regs(Regs0)), I1 = jit_armv6m_asm:lsls(Temp, Reg, 4), I2 = jit_armv6m_asm:subs(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), @@ -3807,20 +3861,25 @@ mul(State, Reg, 32) -> mul(State, Reg, 64) -> shift_left(State, Reg, 6); mul( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) when is_integer(Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), TempBit = reg_bit(Temp), AT = Avail band (bnot TempBit), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_armv6m_asm:muls(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp), Reg), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Temp), Reg), + State1Avail = jit_regs:available_regs(State1#state.regs), State1#state{ - stream = Stream2, available_regs = State1#state.available_regs bor TempBit, regs = Regs1 + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, State1Avail bor TempBit) }; mul( #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DestReg, SrcReg @@ -3853,9 +3912,10 @@ mul( -spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). decrement_reductions_and_maybe_schedule_next( #state{ - stream_module = StreamModule, stream = Stream0, available_regs = Avail + stream_module = StreamModule, stream = Stream0, regs = Regs0 } = State0 ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), TempJitState = first_avail(Avail band (bnot reg_bit(Temp))), % Load jit_state pointer from stack @@ -3911,9 +3971,8 @@ decrement_reductions_and_maybe_schedule_next( Stream5 = StreamModule:replace( Stream4, BNEOffset, <> ), - State3 = merge_used_regs(State2#state{stream = Stream5}, State1#state.used_regs), %% schedule_next clobbers caller-saved regs; invalidate cache at continuation. - State3#state{regs = jit_regs:invalidate_all(State1#state.regs)}. + State2#state{stream = Stream5, regs = jit_regs:invalidate_all(State1#state.regs)}. -spec call_or_schedule_next(state(), non_neg_integer()) -> state(). call_or_schedule_next(State0, Label) -> @@ -3925,10 +3984,11 @@ call_only_or_schedule_next( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail + regs = Regs0 } = State0, Label ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), TempJitState = first_avail(Avail band (bnot reg_bit(Temp))), % Load jit_state pointer from stack @@ -4004,13 +4064,13 @@ call_primitive_with_cp(State0, Primitive, Args) -> set_cp(State0) -> % get module index (dynamically) { - #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State1, + #state{stream_module = StreamModule, stream = Stream0, regs = AvailRegs0} = State1, Reg } = get_module_index( State0 ), % Get a temporary register from available registers - TempReg = first_avail(AvailRegs), + TempReg = first_avail(jit_regs:available_regs(AvailRegs0)), Offset = StreamModule:offset(Stream0), % build cp with module_index << 24 @@ -4075,10 +4135,11 @@ rewrite_cp_offset( State0#state{stream = Stream3}. set_bs( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, TermReg ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_armv6m_asm:str(TermReg, ?BS), I2 = jit_armv6m_asm:movs(Temp, 0), @@ -4241,57 +4302,6 @@ regs_to_mask([offset | T]) -> regs_to_mask(T); regs_to_mask([stack | T]) -> regs_to_mask(T); regs_to_mask([Reg | T]) -> reg_bit(Reg) bor regs_to_mask(T). -%% first_avail returns the first available register from a bitmask. -%% Order matches AVAILABLE_REGS = [r7, r6, r5, r4, r3, r1] -first_avail(Mask) when Mask band ?REG_BIT_R7 =/= 0 -> r7; -first_avail(Mask) when Mask band ?REG_BIT_R6 =/= 0 -> r6; -first_avail(Mask) when Mask band ?REG_BIT_R5 =/= 0 -> r5; -first_avail(Mask) when Mask band ?REG_BIT_R4 =/= 0 -> r4; -first_avail(Mask) when Mask band ?REG_BIT_R3 =/= 0 -> r3; -first_avail(Mask) when Mask band ?REG_BIT_R1 =/= 0 -> r1. - -%% Convert bitmask to list, matching the order of AVAILABLE_REGS. -mask_to_list(0) -> []; -mask_to_list(Mask) -> mask_to_list_r7(Mask). - -mask_to_list_r7(Mask) when Mask band ?REG_BIT_R7 =/= 0 -> [r7 | mask_to_list_r6(Mask)]; -mask_to_list_r7(Mask) -> mask_to_list_r6(Mask). -mask_to_list_r6(Mask) when Mask band ?REG_BIT_R6 =/= 0 -> [r6 | mask_to_list_r5(Mask)]; -mask_to_list_r6(Mask) -> mask_to_list_r5(Mask). -mask_to_list_r5(Mask) when Mask band ?REG_BIT_R5 =/= 0 -> [r5 | mask_to_list_r4(Mask)]; -mask_to_list_r5(Mask) -> mask_to_list_r4(Mask). -mask_to_list_r4(Mask) when Mask band ?REG_BIT_R4 =/= 0 -> [r4 | mask_to_list_r3(Mask)]; -mask_to_list_r4(Mask) -> mask_to_list_r3(Mask). -mask_to_list_r3(Mask) when Mask band ?REG_BIT_R3 =/= 0 -> [r3 | mask_to_list_r1(Mask)]; -mask_to_list_r3(Mask) -> mask_to_list_r1(Mask). -mask_to_list_r1(Mask) when Mask band ?REG_BIT_R1 =/= 0 -> [r1]; -mask_to_list_r1(_Mask) -> []. - -args_regs(Args) -> - lists:map( - fun - ({free, {ptr, Reg}}) -> Reg; - ({free, Reg}) when is_atom(Reg) -> Reg; - ({free, Imm}) when is_integer(Imm) -> imm; - (offset) -> imm; - (ctx) -> ?CTX_REG; - (jit_state) -> jit_state; - (jit_state_tail_call) -> jit_state; - (stack) -> stack; - (Reg) when is_atom(Reg) -> Reg; - (Imm) when is_integer(Imm) -> imm; - ({ptr, Reg}) -> Reg; - ({x_reg, _}) -> ?CTX_REG; - ({y_reg, _}) -> ?CTX_REG; - ({fp_reg, _}) -> ?CTX_REG; - ({free, {x_reg, _}}) -> ?CTX_REG; - ({free, {y_reg, _}}) -> ?CTX_REG; - ({free, {fp_reg, _}}) -> ?CTX_REG; - ({avm_int64_t, _}) -> imm - end, - Args - ). - %%----------------------------------------------------------------------------- %% @doc Add a label at the current offset. Eventually align it with a nop. %% @end diff --git a/libs/jit/src/jit_backend_regs_impl.hrl b/libs/jit/src/jit_backend_regs_impl.hrl new file mode 100644 index 0000000000..0b2046e49e --- /dev/null +++ b/libs/jit/src/jit_backend_regs_impl.hrl @@ -0,0 +1,137 @@ +% +% This file is part of AtomVM. +% +% Copyright 2026 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +%% Shared native-register allocation bookkeeping for the register-based JIT +%% backends. The available/used scratch-register bitmasks live inside +%% `jit_regs:regs()' (set via `jit_regs:new/2', read via `jit_regs:available_regs/1' +%% and `jit_regs:used_regs/1'); these functions are the thin per-backend +%% wrappers that read/write the state record's `regs' field. +%% +%% Backends including this file must provide: +%% - `#state{}' record with a `regs :: jit_regs:regs()' field (no separate +%% `available_regs' / `used_regs' fields — they're inside `regs'), +%% - `reg_bit/1' : map a register atom to its bitmask, +%% - `?AVAILABLE_REGS_MASK' : the mask of all allocatable scratch registers, +%% - `?PARAMETER_REGS' : the C-ABI parameter registers, in order, +%% - `?FIRST_AVAIL_REGS' : allocatable scratch registers in allocation-priority +%% order (drives `first_avail/1'), +%% - `?MASK_TO_LIST_REGS' : every register that can appear in a used/available +%% mask, in canonical order (drives `mask_to_list/1'), +%% - `?CTX_REG' : the register holding the execution context, +%% - `?JITSTATE_ARG_REG' : what the `jit_state' argument maps to (a register +%% atom on x86_64/aarch64, the atom `jit_state' on backends that keep it +%% symbolic). +%% +%% These macros must be defined *before* this file is included. +%% +%% wasm32 does NOT include this file: it allocates dynamically-numbered WASM +%% locals, so its bit helpers use `local_bit/1' + `mask_to_locals/1' rather than +%% the register variants, and its scratch pool grows past the initial +%% `?AVAILABLE_REGS_MASK' at runtime. It defines these bookkeeping functions +%% inline. + +%% Return the list of currently used native registers (debugging only). +used_regs(#state{regs = Regs}) -> mask_to_list(jit_regs:used_regs(Regs)). + +%% Return the list of currently available native scratch registers (debugging +%% only). +available_regs(#state{regs = Regs}) -> mask_to_list(jit_regs:available_regs(Regs)). + +%% Free a list of native registers. Entries that are not registers (pointers to +%% registers, immediates, etc.) are handled/ignored by free_native_register/2. +free_native_registers(State, []) -> + State; +free_native_registers(State, [Reg | Rest]) -> + State1 = free_native_register(State, Reg), + free_native_registers(State1, Rest). + +%% Free a single native register. A bare register atom is returned to the +%% available pool; a {ptr, Reg} frees the underlying register; anything else is +%% ignored. +free_native_register(#state{regs = Regs} = State, Reg) when is_atom(Reg) -> + State#state{regs = jit_regs:free_reg(Regs, reg_bit(Reg))}; +free_native_register(State, {ptr, Reg}) -> + free_native_register(State, Reg); +free_native_register(State, _Other) -> + State. + +%% Assert that all native scratch registers are available (debugging only). +assert_all_native_free(#state{regs = Regs}) -> + 0 = jit_regs:used_regs(Regs), + ?AVAILABLE_REGS_MASK = jit_regs:available_regs(Regs), + ok. + +%% Return the first allocatable scratch register set in `Mask', following the +%% backend's allocation-priority order. Crashes if `Mask' has no allocatable +%% register set; callers check availability beforehand. +first_avail(Mask) -> jit_regs:first_set(Mask, ?FIRST_AVAIL_REGS, fun reg_bit/1). + +%% Return the registers set in `Mask' in the backend's canonical order +%% (bookkeeping/debugging only). +mask_to_list(Mask) -> jit_regs:mask_to_reg_list(Mask, ?MASK_TO_LIST_REGS, fun reg_bit/1). + +%% Map a list of call arguments to the native registers (or `imm'/`stack') that +%% hold them, so the caller can compute which registers a call clobbers. +args_regs(Args) -> lists:map(fun arg_reg/1, Args). + +arg_reg({free, {ptr, Reg}}) -> Reg; +arg_reg({free, Reg}) when is_atom(Reg) -> Reg; +arg_reg({free, Imm}) when is_integer(Imm) -> imm; +arg_reg(offset) -> imm; +arg_reg(ctx) -> ?CTX_REG; +arg_reg(jit_state) -> ?JITSTATE_ARG_REG; +arg_reg(jit_state_tail_call) -> ?JITSTATE_ARG_REG; +arg_reg(stack) -> stack; +arg_reg(Reg) when is_atom(Reg) -> Reg; +arg_reg(Imm) when is_integer(Imm) -> imm; +arg_reg({ptr, Reg}) -> Reg; +arg_reg({x_reg, _}) -> ?CTX_REG; +arg_reg({y_reg, _}) -> ?CTX_REG; +arg_reg({fp_reg, _}) -> ?CTX_REG; +arg_reg({free, {x_reg, _}}) -> ?CTX_REG; +arg_reg({free, {y_reg, _}}) -> ?CTX_REG; +arg_reg({free, {fp_reg, _}}) -> ?CTX_REG; +arg_reg({avm_int64_t, _}) -> imm. + +%% Reserve a single scratch register for a non-returning (tail) call that passes +%% `Args'. Since the frame is not reused, every scratch register except those +%% holding parameters/arguments is free. Returns a map with the chosen scratch +%% register (`temp'), the resulting available/used masks with that register +%% removed from the available pool (`available_mask' / `used_mask'), and the +%% intermediate parameter/argument register lists and masks (some backends thread +%% these into their argument-setup code). +prepare_call_scratch(Args) -> + ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), + ArgsRegs = args_regs(Args), + ParamMask = jit_regs:regs_to_mask(ParamRegs, fun reg_bit/1), + ArgsMask = jit_regs:regs_to_mask(ArgsRegs, fun reg_bit/1), + ScratchMask = ?AVAILABLE_REGS_MASK band (bnot (ArgsMask bor ParamMask)), + Temp = first_avail(ScratchMask), + AvailableMask = ScratchMask band (bnot reg_bit(Temp)), + UsedMask = ?AVAILABLE_REGS_MASK band (bnot AvailableMask), + #{ + temp => Temp, + available_mask => AvailableMask, + used_mask => UsedMask, + param_regs => ParamRegs, + args_regs => ArgsRegs, + param_mask => ParamMask, + args_mask => ArgsMask + }. diff --git a/libs/jit/src/jit_regs.erl b/libs/jit/src/jit_regs.erl index 881ceb2ae2..e19fff19d0 100644 --- a/libs/jit/src/jit_regs.erl +++ b/libs/jit/src/jit_regs.erl @@ -36,6 +36,13 @@ -export([ new/0, + new/2, + available_regs/1, + used_regs/1, + set_available_regs/2, + set_masks/3, + alloc_reg/2, + free_reg/2, get_contents/2, get_all_contents/1, set_contents/3, @@ -46,13 +53,16 @@ invalidate_vm_loc/2, find_reg_with_contents/2, merge/2, + merge/3, stack_push/2, stack_pop/1, stack_clear/1, stack_contents/1, value_to_contents/2, vm_dest_to_contents/2, - regs_to_mask/2 + regs_to_mask/2, + first_set/3, + mask_to_reg_list/3 ]). -export_type([regs/0, contents/0]). @@ -77,7 +87,9 @@ -record(regs, { contents = #{} :: #{atom() => contents()}, stack = [] :: [atom() | contents()], - unreachable = false :: boolean() + unreachable = false :: boolean(), + available_regs = 0 :: non_neg_integer(), + used_regs = 0 :: non_neg_integer() }). -opaque regs() :: #regs{}. @@ -87,6 +99,51 @@ new() -> #regs{}. +%% @doc Create a new register tracking state with initial available/used masks. +-spec new(non_neg_integer(), non_neg_integer()) -> regs(). +new(Available, Used) -> + #regs{available_regs = Available, used_regs = Used}. + +%% @doc Get the available-registers bitmask. +-spec available_regs(regs()) -> non_neg_integer(). +available_regs(#regs{available_regs = A}) -> A. + +%% @doc Get the used-registers bitmask. +-spec used_regs(regs()) -> non_neg_integer(). +used_regs(#regs{used_regs = U}) -> U. + +%% @doc Set the available-registers bitmask. +-spec set_available_regs(regs(), non_neg_integer()) -> regs(). +set_available_regs(#regs{} = Regs, A) -> + Regs#regs{available_regs = A}. + +%% @doc Set both available and used bitmasks at once. +-spec set_masks(regs(), non_neg_integer(), non_neg_integer()) -> regs(). +set_masks(#regs{} = Regs, A, U) -> + Regs#regs{available_regs = A, used_regs = U}. + +%% @doc Mark a scratch register (given as its bitmask) as allocated: remove it +%% from the available pool and add it to the used set. This is the single place +%% the allocation bit-law lives; backends pass `reg_bit(Reg)' as `Bit'. +-spec alloc_reg(regs(), non_neg_integer()) -> regs(). +alloc_reg(#regs{available_regs = A, used_regs = U} = Regs, Bit) -> + Regs#regs{available_regs = A band (bnot Bit), used_regs = U bor Bit}. + +%% @doc Mark a scratch register (given as its bitmask) as freed: return it to +%% the available pool and remove it from the used set. Inverse of alloc_reg/2. +-spec free_reg(regs(), non_neg_integer()) -> regs(). +free_reg(#regs{available_regs = A, used_regs = U} = Regs, Bit) -> + Regs#regs{available_regs = A bor Bit, used_regs = U band (bnot Bit)}. + +%% @doc Merge two regs taking the intersection of available masks and union +%% of used masks (constrained by AllRegsMask). +-spec merge(regs(), regs(), non_neg_integer()) -> regs(). +merge(R1, R2, AllRegsMask) -> + Merged0 = merge(R1, R2), + A1 = (R1#regs.available_regs band R2#regs.available_regs) band AllRegsMask, + U1 = (R1#regs.used_regs bor R2#regs.used_regs) band AllRegsMask, + Merged0#regs{available_regs = A1, used_regs = U1}. + %% @doc Get what a CPU register currently holds. -spec get_contents(regs(), atom()) -> contents(). get_contents(#regs{contents = C}, Reg) -> @@ -213,3 +270,26 @@ regs_to_mask([imm | T], RegBitFn) -> regs_to_mask(T, RegBitFn); regs_to_mask([jit_state | T], RegBitFn) -> regs_to_mask(T, RegBitFn); regs_to_mask([stack | T], RegBitFn) -> regs_to_mask(T, RegBitFn); regs_to_mask([Reg | T], RegBitFn) -> RegBitFn(Reg) bor regs_to_mask(T, RegBitFn). + +%% @doc Return the first register in `OrderedRegs' whose bit (per `RegBitFn') is +%% set in `Mask', following the list order. Crashes with `function_clause' if no +%% register matches — callers are expected to have checked that the mask is +%% non-empty, matching the previous per-backend guard-clause `first_avail/1'. +-spec first_set(non_neg_integer(), [atom()], fun((atom()) -> non_neg_integer())) -> atom(). +first_set(Mask, [Reg | Rest], RegBitFn) -> + case Mask band RegBitFn(Reg) of + 0 -> first_set(Mask, Rest, RegBitFn); + _ -> Reg + end. + +%% @doc Return the registers in `OrderedRegs' whose bits (per `RegBitFn') are set +%% in `Mask', preserving the order of `OrderedRegs'. +-spec mask_to_reg_list(non_neg_integer(), [atom()], fun((atom()) -> non_neg_integer())) -> + [atom()]. +mask_to_reg_list(_Mask, [], _RegBitFn) -> + []; +mask_to_reg_list(Mask, [Reg | Rest], RegBitFn) -> + case Mask band RegBitFn(Reg) of + 0 -> mask_to_reg_list(Mask, Rest, RegBitFn); + _ -> [Reg | mask_to_reg_list(Mask, Rest, RegBitFn)] + end. diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index 3a3fd2687a..ada5358686 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -173,8 +173,6 @@ offset :: non_neg_integer(), branches :: #{integer() | reference() => [{non_neg_integer(), tuple()}]}, jump_table_start :: non_neg_integer(), - available_regs :: non_neg_integer(), - used_regs :: non_neg_integer(), labels :: #{integer() | reference() => integer()}, variant :: non_neg_integer(), regs :: jit_regs:regs() @@ -184,6 +182,7 @@ -type immediate() :: non_neg_integer(). -type vm_register() :: {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, riscv32_register()}. +-type native_register() :: riscv32_register(). -type value() :: immediate() | vm_register() | riscv32_register() | {ptr, riscv32_register()}. -type arg() :: ctx | jit_state | offset | value() | {free, value()} | {avm_int64_t, integer()}. @@ -282,6 +281,13 @@ -include("jit_riscv_impl.hrl"). +%% Native-register allocation bookkeeping (used_regs/1, available_regs/1, +%% free_native_registers/2, free_native_register/2, assert_all_native_free/1) +%% is shared across the register-based backends and flows through jit_regs. +%% Must be included after jit_riscv_impl.hrl, which defines reg_bit/1 and +%% mask_to_list/1. +-include("jit_backend_regs_impl.hrl"). + -spec word_size() -> 4 | 8. word_size() -> 4. diff --git a/libs/jit/src/jit_riscv64.erl b/libs/jit/src/jit_riscv64.erl index 3dda3760c0..4384117dcd 100644 --- a/libs/jit/src/jit_riscv64.erl +++ b/libs/jit/src/jit_riscv64.erl @@ -176,11 +176,9 @@ offset :: non_neg_integer(), branches :: #{integer() | reference() => [{non_neg_integer(), tuple()}]}, jump_table_start :: non_neg_integer(), - available_regs :: non_neg_integer(), - used_regs :: non_neg_integer(), labels :: #{integer() | reference() => integer()}, variant :: non_neg_integer(), - %% Register value tracking for optimization + %% Register value tracking and native-register mask bookkeeping regs :: jit_regs:regs() }). @@ -191,6 +189,7 @@ | {y_reg, non_neg_integer()} | {fp_reg, non_neg_integer()} | {ptr, riscv64_register()}. +-type native_register() :: riscv64_register(). -type value() :: immediate() | vm_register() | riscv64_register() | {ptr, riscv64_register()}. -type arg() :: ctx | jit_state | offset | value() | {free, value()} | {avm_int64_t, integer()}. @@ -297,6 +296,13 @@ -include("jit_riscv_impl.hrl"). +%% Native-register allocation bookkeeping (used_regs/1, available_regs/1, +%% free_native_registers/2, free_native_register/2, assert_all_native_free/1) +%% is shared across the register-based backends and flows through jit_regs. +%% Must be included after jit_riscv_impl.hrl, which defines reg_bit/1 and +%% mask_to_list/1. +-include("jit_backend_regs_impl.hrl"). + -spec word_size() -> 4 | 8. word_size() -> 8. diff --git a/libs/jit/src/jit_riscv_impl.hrl b/libs/jit/src/jit_riscv_impl.hrl index 389dd6f5ff..132127d498 100644 --- a/libs/jit/src/jit_riscv_impl.hrl +++ b/libs/jit/src/jit_riscv_impl.hrl @@ -46,11 +46,9 @@ new(Variant, StreamModule, Stream) -> branches = #{}, jump_table_start = 0, offset = StreamModule:offset(Stream), - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, labels = #{}, variant = Variant, - regs = jit_regs:new() + regs = jit_regs:new(?AVAILABLE_REGS_MASK, 0) }. %%----------------------------------------------------------------------------- @@ -92,62 +90,10 @@ debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> Stream1 = StreamModule:append(Stream0, ?ASM:c_ebreak()), State#state{stream = Stream1}. -%%----------------------------------------------------------------------------- -%% @doc Return the list of currently used native registers. This is used for -%% debugging and not in production. -%% @end -%% @param State current backend state -%% @return The list of used registers -%%----------------------------------------------------------------------------- -used_regs(#state{used_regs = Used}) -> mask_to_list(Used). - -%%----------------------------------------------------------------------------- -%% @doc Return the list of currently available native scratch registers. This -%% is used for debugging and not in production. -%% @end -%% @param State current backend state -%% @return The list of available registers -%%----------------------------------------------------------------------------- -available_regs(#state{available_regs = Available}) -> mask_to_list(Available). - -%%----------------------------------------------------------------------------- -%% @doc Free native registers. The passed list of registers can contain -%% registers, pointer to registers or other values that are ignored. -%% @end -%% @param State current backend state -%% @param Regs list of registers or other values -%% @return The updated backend state -%%----------------------------------------------------------------------------- -free_native_registers(State, []) -> - State; -free_native_registers(State, [Reg | Rest]) -> - State1 = free_native_register(State, Reg), - free_native_registers(State1, Rest). - -free_native_register( - #state{available_regs = Available0, used_regs = Used0} = State, - Reg -) when is_atom(Reg) -> - Bit = reg_bit(Reg), - State#state{ - available_regs = Available0 bor Bit, used_regs = Used0 band (bnot Bit) - }; -free_native_register(State, {ptr, Reg}) -> - free_native_register(State, Reg); -free_native_register(State, _Other) -> - State. - -%%----------------------------------------------------------------------------- -%% @doc Assert that all native scratch registers are available. This is used -%% for debugging and not in production. -%% @end -%% @param State current backend state -%% @return ok -%%----------------------------------------------------------------------------- -assert_all_native_free(State) -> - 0 = State#state.used_regs, - ?AVAILABLE_REGS_MASK = State#state.available_regs, - ok. +%% Native-register allocation bookkeeping (used_regs/1, available_regs/1, +%% free_native_registers/2, free_native_register/2, assert_all_native_free/1) +%% is provided by the includer (jit_riscv32 / jit_riscv64) via +%% -include("jit_backend_regs_impl.hrl"). %%----------------------------------------------------------------------------- %% @doc Emit the jump table at the beginning of the module. Branches will be @@ -337,30 +283,28 @@ call_primitive( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = Used + regs = Regs0 } = State, Primitive, Args -) when Available =/= 0 -> - TempReg = first_avail(Available), - TempBit = reg_bit(TempReg), - % Load primitive function pointer - PrepCall = load_primitive_ptr(Primitive, TempReg), - Stream1 = StreamModule:append(Stream0, PrepCall), - StateCall = State#state{ - stream = Stream1, - available_regs = Available band (bnot TempBit), - used_regs = Used bor TempBit, - regs = jit_regs:invalidate_reg(State#state.regs, TempReg) - }, - call_func_ptr(StateCall, {free, TempReg}, Args); -call_primitive( - #state{available_regs = 0} = State, - Primitive, - Args ) -> - call_func_ptr(State, {primitive, Primitive}, Args). + Available = jit_regs:available_regs(Regs0), + case Available of + 0 -> + call_func_ptr(State, {primitive, Primitive}, Args); + _ -> + TempReg = first_avail(Available), + TempBit = reg_bit(TempReg), + % Load primitive function pointer + PrepCall = load_primitive_ptr(Primitive, TempReg), + Stream1 = StreamModule:append(Stream0, PrepCall), + Regs1 = jit_regs:invalidate_reg(Regs0, TempReg), + StateCall = State#state{ + stream = Stream1, + regs = jit_regs:alloc_reg(Regs1, TempBit) + }, + call_func_ptr(StateCall, {free, TempReg}, Args) + end. %%----------------------------------------------------------------------------- %% @doc Emit a jump (call without return) to a primitive with arguments. This @@ -383,23 +327,16 @@ call_primitive_last( % We need a register for the function pointer that should not be used as a parameter % Since we're not returning, we can use all scratch registers except % registers used for parameters - ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), - ArgsRegs = args_regs(Args), - ArgsRegsMask = jit_regs:regs_to_mask(ArgsRegs, fun reg_bit/1), - ParamMask = jit_regs:regs_to_mask(ParamRegs, fun reg_bit/1), - ScratchMask = ?AVAILABLE_REGS_MASK band (bnot (ArgsRegsMask bor ParamMask)), - Temp = first_avail(ScratchMask), - TempBit = reg_bit(Temp), - AvailableRegs1 = ScratchMask band (bnot TempBit), - UsedMask = ?AVAILABLE_REGS_MASK band (bnot AvailableRegs1), + #{temp := Temp, available_mask := AvailableRegs1, used_mask := UsedMask} = + prepare_call_scratch(Args), PrepCall = load_primitive_ptr(Primitive, Temp), Stream1 = StreamModule:append(Stream0, PrepCall), State1 = State0#state{ stream = Stream1, - available_regs = AvailableRegs1, - used_regs = UsedMask, - regs = jit_regs:invalidate_reg(State0#state.regs, Temp) + regs = jit_regs:set_masks( + jit_regs:invalidate_reg(State0#state.regs, Temp), AvailableRegs1, UsedMask + ) }, % Preprocess offset special arg @@ -424,9 +361,9 @@ call_primitive_last( tail_call_with_jit_state_registers_only(State2, Temp) end, State4#state{ - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, - regs = jit_regs:unreachable(State4#state.regs) + regs = jit_regs:set_masks( + jit_regs:unreachable(State4#state.regs), ?AVAILABLE_REGS_MASK, 0 + ) }. %%----------------------------------------------------------------------------- @@ -463,8 +400,7 @@ return_if_not_equal_to_ctx( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0 + regs = Regs0 } = State, {free, Reg} ) -> @@ -484,9 +420,7 @@ return_if_not_equal_to_ctx( RegBit = reg_bit(Reg), State#state{ stream = Stream1, - available_regs = AvailableRegs0 bor RegBit, - used_regs = UsedRegs0 band (bnot RegBit), - regs = State#state.regs + regs = jit_regs:free_reg(Regs0, RegBit) }. %%----------------------------------------------------------------------------- @@ -529,11 +463,12 @@ jump_to_continuation( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, + regs = Regs0, offset = BaseOffset } = State0, {free, OffsetReg} ) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), % Calculate absolute address: native_code_base + target_offset % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) @@ -552,9 +487,9 @@ jump_to_continuation( % Free all registers since this is a tail jump State0#state{ stream = Stream1, - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, - regs = jit_regs:unreachable(State0#state.regs) + regs = jit_regs:set_masks( + jit_regs:unreachable(Regs0), ?AVAILABLE_REGS_MASK, 0 + ) }. branch_to_offset_code(_State, Offset, TargetOffset) when @@ -564,8 +499,10 @@ branch_to_offset_code(_State, Offset, TargetOffset) when Rel = TargetOffset - Offset, ?ASM:j(Rel); branch_to_offset_code( - #state{available_regs = Available}, Offset, TargetOffset -) when Available =/= 0 -> + #state{regs = Regs0}, Offset, TargetOffset +) -> + Available = jit_regs:available_regs(Regs0), + true = Available =/= 0, TempReg = first_avail(Available), % Far branch: use auipc + jalr sequence for PC-relative addressing % This computes: PC + Immediate and jumps to it @@ -594,10 +531,16 @@ branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), {State, CodeBlock}; branch_to_label_code( - #state{available_regs = Available, branches = Branches} = State0, Offset, Label, false -) when Available =/= 0 -> - TempReg = first_avail(Available), + #state{regs = Regs0, branches = Branches} = State0, Offset, Label, false +) -> + Available = jit_regs:available_regs(Regs0), + TempReg = + case Available of + 0 -> t6; + _ -> first_avail(Available) + end, % RISC-V: Far branch sequence using PC-relative auipc + jalr (8 bytes) + % When no scratch register is available, use t6 (caller-saved, safe to clobber). % Placeholder: auipc TempReg, 0 % Placeholder: jalr zero, TempReg, 0 @@ -606,20 +549,7 @@ branch_to_label_code( BrEntry = {Offset, {far_branch, TempReg}}, State1 = State0#state{branches = Branches#{Label => [BrEntry | maps:get(Label, Branches, [])]}}, {State1, CodeBlock}; -branch_to_label_code( - #state{available_regs = 0, branches = Branches} = State0, Offset, Label, false -) -> - % RISC-V: Use t6 as scratch (caller-saved, safe to clobber) - % Far branch sequence using PC-relative auipc + jalr (8 bytes) - - % Placeholder: auipc t6, 0 - % Placeholder: jalr zero, t6, 0 - CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, - % Add relocation entry - BrEntry = {Offset, {far_branch, t6}}, - State1 = State0#state{branches = Branches#{Label => [BrEntry | maps:get(Label, Branches, [])]}}, - {State1, CodeBlock}; -branch_to_label_code(#state{available_regs = 0}, _Offset, _Label, LabelLookup) -> +branch_to_label_code(#state{}, _Offset, _Label, LabelLookup) -> error({no_available_registers, LabelLookup}). %%----------------------------------------------------------------------------- @@ -657,9 +587,10 @@ if_block( Stream2, Replacements ), - State3 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State3#state{regs = MergedRegs}; + MergedRegs = jit_regs:merge( + State1#state.regs, State2#state.regs, ?AVAILABLE_REGS_MASK + ), + State2#state{stream = Stream3, regs = MergedRegs}; if_block( #state{stream_module = StreamModule, stream = Stream0} = State0, Cond, @@ -675,9 +606,10 @@ if_block( BranchOffset = OffsetAfter - BranchInstrOffset, NewBranchInstr = apply(?ASM, BranchFunc, [Reg, Operand, BranchOffset]), Stream3 = StreamModule:replace(Stream2, BranchInstrOffset, NewBranchInstr), - State3 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State3#state{regs = MergedRegs}. + MergedRegs = jit_regs:merge( + State1#state.regs, State2#state.regs, ?AVAILABLE_REGS_MASK + ), + State2#state{stream = Stream3, regs = MergedRegs}. %%----------------------------------------------------------------------------- %% @doc Emit an if else block, i.e. emit a test of a condition and @@ -714,8 +646,6 @@ if_else_block( %% Build the else block StateElse = State2#state{ stream = Stream4, - used_regs = State1#state.used_regs, - available_regs = State1#state.available_regs, regs = State1#state.regs }, State3 = BlockFalseFn(StateElse), @@ -728,9 +658,10 @@ if_else_block( %% If this fails, the if/else blocks are too large 2 = byte_size(NewElseJumpInstr), Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), - State4 = merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs), - MergedRegs = jit_regs:merge(State2#state.regs, State3#state.regs), - State4#state{regs = MergedRegs}. + MergedRegs = jit_regs:merge( + State2#state.regs, State3#state.regs, ?AVAILABLE_REGS_MASK + ), + State3#state{stream = Stream6, regs = MergedRegs}. if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', 0} @@ -757,7 +688,7 @@ if_block_cond( end, % RISC-V: bge Reg, Val, offset (branch if Reg >= Val, i.e., NOT less than) % Load immediate into a temp register for comparison - Temp = first_avail(State0#state.available_regs), + Temp = first_avail(jit_regs:available_regs(State0#state.regs)), OffsetBefore = StreamModule:offset(Stream0), State1 = mov_immediate(State0, Temp, Val), Stream1 = State1#state.stream, @@ -769,9 +700,10 @@ if_block_cond( State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, {bge, Reg, Temp}, BranchDelta}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '<', Val} ) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -790,9 +722,10 @@ if_block_cond( State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, {bge, Reg, Temp}, BranchDelta}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {Val, '<', RegOrTuple} ) when is_integer(Val), Val >= 0, Val =< 255 -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -811,9 +744,10 @@ if_block_cond( State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, {bge, Temp, Reg}, BranchDelta}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {Val, '<', RegOrTuple} ) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -896,9 +830,10 @@ if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) -> if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) -> if_block_cond(State, {RegOrTuple, '==', Val}); if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '!=', Val} ) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), Reg = case RegOrTuple of @@ -934,9 +869,10 @@ if_block_cond( if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) -> if_block_cond(State, {RegOrTuple, '!=', Val}); if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '==', Val} ) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Reg = case RegOrTuple of @@ -966,9 +902,10 @@ if_block_cond( State3 = if_block_free_reg({free, RegB}, State2), {State3, {bne, RegA, RegB}, 0}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '==', Val} ) when is_integer(Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Reg = case RegOrTuple of @@ -987,9 +924,10 @@ if_block_cond( State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, {bne, Reg, Temp}, BranchDelta}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '!=', Val} ) when is_integer(Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Reg = case RegOrTuple of @@ -1011,10 +949,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail + regs = Regs0 } = State0, {'(bool)', RegOrTuple, '==', false} ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Reg = case RegOrTuple of @@ -1034,10 +973,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail + regs = Regs0 } = State0, {'(bool)', RegOrTuple, '!=', false} ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Reg = case RegOrTuple of @@ -1057,10 +997,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail + regs = Regs0 } = State0, {RegOrTuple, '&', Val, '!=', 0} ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Reg = case RegOrTuple of @@ -1093,10 +1034,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail + regs = Regs0 } = State0, {Reg, '&', 16#F, '!=', 16#F} ) when ?IS_GPR(Reg) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG I1 = ?ASM:not_(Temp, Reg), @@ -1128,19 +1070,25 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail + regs = Regs0 } = State0, {Reg, '&', Mask, '!=', Val} ) when ?IS_GPR(Reg) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), - AT = Avail band (bnot reg_bit(Temp)), + TempBit = reg_bit(Temp), %% RISC-V: AND with mask, then compare with value OffsetBefore = StreamModule:offset(Stream0), I1 = ?ASM:mv(Temp, Reg), Stream1 = StreamModule:append(Stream0, I1), - State1 = State0#state{stream = Stream1}, - {State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask), + State1 = State0#state{ + stream = Stream1, + regs = jit_regs:alloc_reg(Regs0, TempBit) + }, + {State2, Temp} = and_(State1, {free, Temp}, Mask), Stream2 = State2#state.stream, + Regs2 = State2#state.regs, + Avail2 = jit_regs:available_regs(Regs2), %% Compare Temp with Val and branch if equal (NOT != Val) case Val of 0 -> @@ -1149,7 +1097,8 @@ if_block_cond( BranchInstr = <<16#FFFFFFFF:32/little>>, Stream3 = StreamModule:append(Stream2, BranchInstr), State3 = State2#state{ - stream = Stream3, available_regs = State2#state.available_regs bor reg_bit(Temp) + stream = Stream3, + regs = jit_regs:free_reg(Regs2, TempBit) }, {State3, {beq, Temp, zero}, BranchDelta}; _ when ?IS_GPR(Val) -> @@ -1158,24 +1107,32 @@ if_block_cond( BranchInstr = <<16#FFFFFFFF:32/little>>, Stream3 = StreamModule:append(Stream2, BranchInstr), State3 = State2#state{ - stream = Stream3, available_regs = State2#state.available_regs bor reg_bit(Temp) + stream = Stream3, + regs = jit_regs:free_reg(Regs2, TempBit) }, {State3, {beq, Temp, Val}, BranchDelta}; _ -> %% Val is an immediate - need second temp register %% Reuse the mask register for the comparison value - MaskReg = first_avail(AT), - AT2 = AT band (bnot reg_bit(MaskReg)), - State3 = mov_immediate(State2#state{available_regs = AT2}, MaskReg, Val), + MaskReg = first_avail(Avail2), + MaskRegBit = reg_bit(MaskReg), + State3 = mov_immediate( + State2#state{ + regs = jit_regs:alloc_reg(Regs2, MaskRegBit) + }, + MaskReg, + Val + ), Stream3 = State3#state.stream, + Regs3 = State3#state.regs, BranchDelta = StreamModule:offset(Stream3) - OffsetBefore, BranchInstr = <<16#FFFFFFFF:32/little>>, Stream4 = StreamModule:append(Stream3, BranchInstr), - Regs4 = jit_regs:invalidate_reg(State3#state.regs, MaskReg), + Regs4 = jit_regs:invalidate_reg(Regs3, MaskReg), + ReleaseBits = TempBit bor MaskRegBit, State4 = State3#state{ stream = Stream4, - available_regs = State3#state.available_regs bor reg_bit(Temp) bor reg_bit(MaskReg), - regs = Regs4 + regs = jit_regs:free_reg(Regs4, ReleaseBits) }, {State4, {beq, Temp, MaskReg}, BranchDelta} end; @@ -1183,10 +1140,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailRegs + regs = Regs0 } = State0, {{free, Reg} = RegTuple, '&', Mask, '!=', Val} ) when ?IS_GPR(Reg) -> + AvailRegs = jit_regs:available_regs(Regs0), %% RISC-V: AND with mask, then compare with value OffsetBefore = StreamModule:offset(Stream0), {State1, Reg} = and_(State0, RegTuple, Mask), @@ -1212,36 +1170,40 @@ if_block_cond( _ -> %% Val is an immediate - need temp register %% Reuse the mask register for the comparison value - MaskReg = first_avail(State1#state.available_regs), - AT = State1#state.available_regs band (bnot reg_bit(MaskReg)), - State2 = mov_immediate(State1#state{available_regs = AT}, MaskReg, Val), + Regs1 = State1#state.regs, + Avail1 = jit_regs:available_regs(Regs1), + MaskReg = first_avail(Avail1), + MaskRegBit = reg_bit(MaskReg), + State2 = mov_immediate( + State1#state{ + regs = jit_regs:alloc_reg(Regs1, MaskRegBit) + }, + MaskReg, + Val + ), Stream2 = State2#state.stream, + Regs2 = State2#state.regs, + Used2 = jit_regs:used_regs(Regs2), BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, BranchInstr = <<16#FFFFFFFF:32/little>>, Stream3 = StreamModule:append(Stream2, BranchInstr), - Regs3 = jit_regs:invalidate_reg(State2#state.regs, MaskReg), - State3 = State2#state{stream = Stream3, available_regs = AvailRegs, regs = Regs3}, + Regs3 = jit_regs:invalidate_reg(Regs2, MaskReg), + State3 = State2#state{ + stream = Stream3, + regs = jit_regs:set_masks( + Regs3, AvailRegs, Used2 band (bnot MaskRegBit) + ) + }, State4 = if_block_free_reg(RegTuple, State3), {State4, {beq, Reg, MaskReg}, BranchDelta} end. -if_block_free_reg({free, Reg}, State0) -> - #state{available_regs = AvR0, used_regs = UR0} = State0, +if_block_free_reg({free, Reg}, #state{regs = Regs0} = State0) -> Bit = reg_bit(Reg), - AvR1 = AvR0 bor Bit, - UR1 = UR0 band (bnot Bit), - State0#state{ - available_regs = AvR1, - used_regs = UR1 - }; + State0#state{regs = jit_regs:free_reg(Regs0, Bit)}; if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> State0. -merge_used_regs(#state{used_regs = UR} = State, OtherUR) -> - MergedUR = UR bor OtherUR, - MergedAvail = ?AVAILABLE_REGS_MASK band (bnot MergedUR), - State#state{used_regs = MergedUR, available_regs = MergedAvail}. - %%----------------------------------------------------------------------------- %% @doc Emit a shift register right by a fixed number of bits, effectively %% dividing it by 2^Shift @@ -1263,8 +1225,6 @@ shift_right( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UR, regs = Regs0 } = State, Reg, @@ -1272,6 +1232,7 @@ shift_right( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), ResultBit = reg_bit(ResultReg), I = ?ASM:srli(ResultReg, Reg, Shift), @@ -1280,9 +1241,7 @@ shift_right( { State#state{ stream = Stream1, - available_regs = Avail band (bnot ResultBit), - used_regs = UR bor ResultBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, ResultBit) }, ResultReg }. @@ -1318,8 +1277,6 @@ shift_right_arith( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UR, regs = Regs0 } = State, Reg, @@ -1327,6 +1284,7 @@ shift_right_arith( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), ResultBit = reg_bit(ResultReg), I = ?ASM:srai(ResultReg, Reg, Shift), @@ -1335,9 +1293,7 @@ shift_right_arith( { State#state{ stream = Stream1, - available_regs = Avail band (bnot ResultBit), - used_regs = UR bor ResultBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, ResultBit) }, ResultReg }. @@ -1355,12 +1311,13 @@ call_func_ptr( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0Mask, - used_regs = UsedRegs0Mask + regs = Regs0 } = State0, FuncPtrTuple, Args ) -> + AvailableRegs0Mask = jit_regs:available_regs(Regs0), + UsedRegs0Mask = jit_regs:used_regs(Regs0), FreeRegs = lists:flatmap( fun ({free, {ptr, Reg}}) -> [Reg]; @@ -1407,9 +1364,10 @@ call_func_ptr( % and the currently available registers SetArgsMask = (UsedRegs1Mask band (bnot RegArgsRegsMask)) bor AvailableRegs0Mask, State1 = State0#state{ - available_regs = SetArgsMask, - used_regs = ?AVAILABLE_REGS_MASK band (bnot SetArgsMask), - stream = Stream1 + stream = Stream1, + regs = jit_regs:set_masks( + Regs0, SetArgsMask, ?AVAILABLE_REGS_MASK band (bnot SetArgsMask) + ) }, ParameterRegs = parameter_regs(RegArgs0), @@ -1469,9 +1427,12 @@ call_func_ptr( end, State3 = State1#state{ - available_regs = SetArgsAvailMask, - used_regs = ?AVAILABLE_REGS_MASK band (bnot SetArgsAvailMask), - stream = Stream3 + stream = Stream3, + regs = jit_regs:set_masks( + State1#state.regs, + SetArgsAvailMask, + ?AVAILABLE_REGS_MASK band (bnot SetArgsAvailMask) + ) }, StackOffset = AlignedStackBytes, @@ -1533,9 +1494,7 @@ call_func_ptr( { State4#state{ stream = Stream8, - available_regs = AvailableRegs3Mask, - used_regs = UsedRegs2Mask, - regs = Regs1 + regs = jit_regs:set_masks(Regs1, AvailableRegs3Mask, UsedRegs2Mask) }, ResultReg }. @@ -1587,11 +1546,12 @@ set_registers_args(State0, Args, StackOffset) -> set_registers_args(State0, Args, ParamRegs, StackOffset). set_registers_args( - #state{used_regs = UsedRegsMask} = State0, + #state{regs = Regs0} = State0, Args, ParamRegs, StackOffset ) -> + UsedRegsMask = jit_regs:used_regs(Regs0), ArgsRegs = args_regs(Args), ParamMask = regs_to_mask(ParamRegs), ArgsMask = regs_to_mask(ArgsRegs), @@ -1613,8 +1573,11 @@ set_registers_args( ), State1#state{ stream = Stream1, - available_regs = ?AVAILABLE_REGS_MASK band (bnot (ParamMask bor NewUsedMask)), - used_regs = ParamMask bor NewUsedMask + regs = jit_regs:set_masks( + State1#state.regs, + ?AVAILABLE_REGS_MASK band (bnot (ParamMask bor NewUsedMask)), + ParamMask bor NewUsedMask + ) }. parameter_regs(Args) -> @@ -1732,11 +1695,12 @@ set_registers_args1( Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; set_registers_args1( - #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {y_reg, X}, Reg, _StackOffset ) -> + AvailRegs = jit_regs:available_regs(Regs0), Code = ldr_y_reg(Reg, X, AvailRegs), Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; @@ -1792,10 +1756,11 @@ move_to_vm_register_emit(State0, Src, {ptr, Reg}) when is_atom(Src) -> Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State0#state{stream = Stream1}; move_to_vm_register_emit( - #state{available_regs = Avail, regs = Regs0} = State0, Src, {y_reg, Y} + #state{regs = Regs0} = State0, Src, {y_reg, Y} ) when is_atom(Src) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp1)), Code = str_y_reg(Src, Y, Temp1, AT), @@ -1810,10 +1775,11 @@ move_to_vm_register_emit( State0#state{stream = Stream1, regs = Regs2}; % Source is an integer to y_reg (optimized: ldr first, then movs) move_to_vm_register_emit( - #state{available_regs = Avail, regs = Regs0} = State0, N, {y_reg, Y} + #state{regs = Regs0} = State0, N, {y_reg, Y} ) when is_integer(N), N >= 0, N =< 255 -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Avail2 = Avail band (bnot reg_bit(Temp1)), Temp2 = first_avail(Avail2), @@ -1830,77 +1796,84 @@ move_to_vm_register_emit( end, State0#state{stream = Stream1, regs = Regs2}; % Source is an integer (0-255 for movs, negative values need different handling) -move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when +move_to_vm_register_emit(State0, N, Dest) when is_integer(N), N >= 0, N =< 255 -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - I1 = ?ASM:li(Temp, N), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {imm, N}), - State1#state{available_regs = AR0, regs = Regs1}; + with_temp( + State0, + fun(StateT, Temp, _AT) -> + I1 = ?ASM:li(Temp, N), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, I1), + move_to_vm_register(StateT#state{stream = Stream1}, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {imm, N}) end + ); %% Handle large values using simple literal pool (branch-over pattern) -move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when +move_to_vm_register_emit(State0, N, Dest) when is_integer(N) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), - State2 = move_to_vm_register(State1, Temp, Dest), - Regs1 = jit_regs:set_contents(State2#state.regs, Temp, {imm, N}), - State2#state{available_regs = AR0, regs = Regs1}; + with_temp( + State0, + fun(StateT, Temp, _AT) -> + State1 = mov_immediate(StateT, Temp, N), + move_to_vm_register(State1, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {imm, N}) end + ); % Source is a VM register -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, extra}, Dest) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - {BaseReg, Off} = ?X_REG(?MAX_REG), - I1 = ?LOAD_WORD(Temp, BaseReg, Off), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, ?MAX_REG}), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, X}, Dest) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - {XReg, X_REGOffset} = ?X_REG(X), - I1 = ?LOAD_WORD(Temp, XReg, X_REGOffset), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, X}), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - I1 = ?LOAD_WORD(Temp, Reg, 0), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {y_reg, Y}, Dest) -> - Temp = first_avail(AR0), - AT = AR0 band (bnot reg_bit(Temp)), - Code = ldr_y_reg(Temp, Y, AT), - Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), - % ldr_y_reg clobbers first_avail(AT) as a hidden temp for loading Y_REGS pointer - Regs0a = - case AT of - 0 -> State0#state.regs; - _ -> jit_regs:invalidate_reg(State0#state.regs, first_avail(AT)) +move_to_vm_register_emit(State0, {x_reg, extra}, Dest) -> + with_temp( + State0, + fun(StateT, Temp, _AT) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = ?LOAD_WORD(Temp, BaseReg, Off), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, I1), + move_to_vm_register(StateT#state{stream = Stream1}, Temp, Dest) end, - State0a = State0#state{ - stream = Stream1, - available_regs = AT, - regs = Regs0a - }, - State1 = move_to_vm_register(State0a, Temp, Dest), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {y_reg, Y}), - State1#state{available_regs = AR0, regs = Regs1}; + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {x_reg, ?MAX_REG}) end + ); +move_to_vm_register_emit(State0, {x_reg, X}, Dest) -> + with_temp( + State0, + fun(StateT, Temp, _AT) -> + {XReg, X_REGOffset} = ?X_REG(X), + I1 = ?LOAD_WORD(Temp, XReg, X_REGOffset), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, I1), + move_to_vm_register(StateT#state{stream = Stream1}, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {x_reg, X}) end + ); +move_to_vm_register_emit(State0, {ptr, Reg}, Dest) -> + with_temp( + State0, + fun(StateT, Temp, _AT) -> + I1 = ?LOAD_WORD(Temp, Reg, 0), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, I1), + move_to_vm_register(StateT#state{stream = Stream1}, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:invalidate_reg(Regs, Temp) end + ); +move_to_vm_register_emit(State0, {y_reg, Y}, Dest) -> + with_temp( + State0, + fun(StateT, Temp, AT) -> + Code = ldr_y_reg(Temp, Y, AT), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, Code), + % ldr_y_reg clobbers first_avail(AT) as a hidden temp for the Y_REGS pointer + Regs0a = + case AT of + 0 -> StateT#state.regs; + _ -> jit_regs:invalidate_reg(StateT#state.regs, first_avail(AT)) + end, + move_to_vm_register(StateT#state{stream = Stream1, regs = Regs0a}, Temp, Dest) + end, + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {y_reg, Y}) end + ); % term_to_float move_to_vm_register_emit( #state{ stream_module = StreamModule, - available_regs = Avail, + regs = Regs0, stream = Stream0, variant = Variant } = @@ -1908,6 +1881,7 @@ move_to_vm_register_emit( {free, {ptr, Reg, 1}}, {fp_reg, F} ) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), {BaseReg, Off} = ?FP_REGS, @@ -1930,6 +1904,19 @@ move_to_vm_register_emit( Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Temp1), Temp2), State1#state{stream = Stream1, regs = Regs1}. +-spec with_temp( + state(), + fun((state(), native_register(), non_neg_integer()) -> state()), + fun((jit_regs:regs(), native_register()) -> jit_regs:regs()) +) -> state(). +with_temp(#state{regs = Regs0} = State0, EmitFun, ContentsFun) -> + AR0 = jit_regs:available_regs(Regs0), + Temp = first_avail(AR0), + AT = AR0 band (bnot reg_bit(Temp)), + State1 = EmitFun(State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, AT), + Regs1 = jit_regs:set_available_regs(ContentsFun(State1#state.regs, Temp), AR0), + State1#state{regs = Regs1}. + %%----------------------------------------------------------------------------- %% @doc Emit a move of an array element (reg[x]) to a vm or a native register. %% @end @@ -1940,12 +1927,13 @@ move_to_vm_register_emit( %% @return Updated backend state %%----------------------------------------------------------------------------- move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = ?LOAD_WORD(Temp, Reg, Index * ?WORD_SIZE_BYTES), {BaseReg, Off} = ?X_REG(X), @@ -1955,12 +1943,13 @@ move_array_element( Regs2 = jit_regs:set_contents(Regs1, Temp, {x_reg, X}), State#state{stream = Stream1, regs = Regs2}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {ptr, Dest} ) when is_atom(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = ?LOAD_WORD(Temp, Reg, Index * ?WORD_SIZE_BYTES), I2 = ?STORE_WORD(Dest, Temp, 0), @@ -1968,12 +1957,13 @@ move_array_element( Regs1 = jit_regs:invalidate_reg(Regs0, Temp), State#state{stream = Stream1, regs = Regs1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {y_reg, Y} ) when is_atom(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Avail2 = Avail band (bnot reg_bit(Temp1)), Temp2 = first_avail(Avail2), @@ -1987,12 +1977,13 @@ move_array_element( Regs3 = jit_regs:invalidate_reg(Regs2, Temp2), State#state{stream = Stream1, regs = Regs3}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {free, Reg}, Index, {y_reg, Y} ) when is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), I1 = ?LOAD_WORD(Reg, Reg, Index * ?WORD_SIZE_BYTES), @@ -2017,8 +2008,6 @@ move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, @@ -2031,23 +2020,17 @@ move_array_element( {BaseReg, Off} = ?X_REG(X), I4 = ?STORE_WORD(BaseReg, IndexReg, Off), Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_vm_loc(Regs0, {x_reg, X}), Regs2 = jit_regs:invalidate_reg(Regs1, IndexReg), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, - regs = Regs2 + regs = jit_regs:free_reg(Regs2, Bit) }; move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, @@ -2059,28 +2042,23 @@ move_array_element( I3 = ?LOAD_WORD(IndexReg, IndexReg, 0), I4 = ?STORE_WORD(PtrReg, IndexReg, 0), Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_reg(Regs0, IndexReg), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, - regs = Regs1 + regs = jit_regs:free_reg(Regs1, Bit) }; move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, {free, IndexReg}, {y_reg, Y} ) when is_atom(IndexReg) -> + AvailableRegs0 = jit_regs:available_regs(Regs0), Temp = first_avail(AvailableRegs0), AT = AvailableRegs0 band (bnot reg_bit(Temp)), I1 = ?ASM:slli(IndexReg, IndexReg, (?WORD_SIZE_BYTES bsr 2) + 1), @@ -2089,8 +2067,6 @@ move_array_element( Code = str_y_reg(IndexReg, Y, Temp, AT), I4 = Code, Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append( Stream0, <> ), @@ -2098,10 +2074,8 @@ move_array_element( Regs2 = jit_regs:invalidate_reg(Regs1, Temp), Regs3 = jit_regs:invalidate_reg(Regs2, IndexReg), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, - regs = Regs3 + regs = jit_regs:free_reg(Regs3, Bit) }. %% @doc move reg[x] to a vm or native register @@ -2122,13 +2096,12 @@ get_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, Index ) -> + Avail = jit_regs:available_regs(Regs0), ElemReg = first_avail(Avail), ElemBit = reg_bit(ElemReg), I1 = ?LOAD_WORD(ElemReg, Reg, Index * ?WORD_SIZE_BYTES), @@ -2137,9 +2110,7 @@ get_array_element( { State#state{ stream = Stream1, - available_regs = Avail band (bnot ElemBit), - used_regs = UsedRegs0 bor ElemBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, ElemBit) }, ElemReg }. @@ -2155,12 +2126,13 @@ move_to_array_element( Stream1 = StreamModule:append(Stream0, I1), State0#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, ValueReg, Reg, IndexReg ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = ?ASM:mv(Temp, IndexReg), I2 = ?ASM:slli(Temp, Temp, (?WORD_SIZE_BYTES bsr 2) + 1), @@ -2188,13 +2160,14 @@ move_to_array_element( ) when is_integer(IndexReg) andalso is_integer(Offset) andalso ?ARRAY_OFFSET_FOLD_GUARD(Offset) -> move_to_array_element(State, Value, BaseReg, ?ARRAY_OFFSET_FOLD(IndexReg, Offset)); move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, ValueReg, BaseReg, IndexReg, Offset ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = ?ASM:addi(Temp, IndexReg, Offset), I2 = ?ASM:slli(Temp, Temp, (?WORD_SIZE_BYTES bsr 2) + 1), @@ -2211,7 +2184,7 @@ move_to_array_element( Offset ) -> {State1, ValueReg} = copy_to_native_register(State0, Value), - Temp = first_avail(State1#state.available_regs), + Temp = first_avail(jit_regs:available_regs(State1#state.regs)), I1 = ?ASM:addi(Temp, IndexReg, Offset), I2 = ?ASM:slli(Temp, Temp, (?WORD_SIZE_BYTES bsr 2) + 1), I3 = ?ASM:add(Temp, BaseReg, Temp), @@ -2230,16 +2203,17 @@ move_to_native_register(#state{regs = Regs} = State, Value) -> case Contents =/= unknown andalso jit_regs:find_reg_with_contents(Regs, Contents) of {ok, CachedReg} -> Bit = reg_bit(CachedReg), - case State#state.used_regs band Bit of + Used = jit_regs:used_regs(Regs), + Avail = jit_regs:available_regs(Regs), + case Used band Bit of 0 -> - case State#state.available_regs band Bit of + case Avail band Bit of 0 -> move_to_native_register_emit(State, Value, Contents); _ -> { State#state{ - used_regs = State#state.used_regs bor Bit, - available_regs = State#state.available_regs band (bnot Bit) + regs = jit_regs:alloc_reg(Regs, Bit) }, CachedReg } @@ -2255,13 +2229,12 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, cp, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), RegBit = reg_bit(Reg), {BaseReg, Off} = ?CP, @@ -2271,9 +2244,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor RegBit, - available_regs = Avail band (bnot RegBit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, RegBit) }, Reg }; @@ -2288,8 +2259,6 @@ move_to_native_register_emit( {State#state{stream = Stream1, regs = Regs1}, Reg}; move_to_native_register_emit( #state{ - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State0, Imm, @@ -2297,26 +2266,24 @@ move_to_native_register_emit( ) when is_integer(Imm) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), RegBit = reg_bit(Reg), Regs1 = jit_regs:set_contents(Regs0, Reg, Contents), State1 = State0#state{ - used_regs = Used bor RegBit, - available_regs = Avail band (bnot RegBit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, RegBit) }, {move_to_native_register(State1, Imm, Reg), Reg}; move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {x_reg, extra}, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), RegBit = reg_bit(Reg), {BaseReg, Off} = ?X_REG(?MAX_REG), @@ -2326,9 +2293,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor RegBit, - available_regs = Avail band (bnot RegBit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, RegBit) }, Reg }; @@ -2336,8 +2301,6 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {x_reg, X}, @@ -2345,6 +2308,7 @@ move_to_native_register_emit( ) when X < ?MAX_REG -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), RegBit = reg_bit(Reg), {BaseReg, Offset} = ?X_REG(X), @@ -2354,9 +2318,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor RegBit, - available_regs = Avail band (bnot RegBit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, RegBit) }, Reg }; @@ -2364,13 +2326,12 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {y_reg, Y}, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), RegBit = reg_bit(Reg), AvailT = Avail band (bnot RegBit), @@ -2386,9 +2347,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - available_regs = AvailT, - used_regs = Used bor RegBit, - regs = Regs2 + regs = jit_regs:alloc_reg(Regs2, RegBit) }, Reg }; @@ -2396,18 +2355,17 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used + regs = Regs0 } = State, {fp_reg, F}, _Contents ) -> + Avail = jit_regs:available_regs(Regs0), RegA = first_avail(Avail), RegABit = reg_bit(RegA), Avail2 = Avail band (bnot RegABit), RegB = first_avail(Avail2), RegBBit = reg_bit(RegB), - AvailT = Avail2 band (bnot RegBBit), {BaseReg, Off} = ?FP_REGS, I1 = ?LOAD_WORD(RegB, BaseReg, Off), I2 = ?ASM:lw(RegA, RegB, F * 8), @@ -2416,7 +2374,8 @@ move_to_native_register_emit( Stream1 = StreamModule:append(Stream0, Code), { State#state{ - stream = Stream1, available_regs = AvailT, used_regs = Used bor RegABit bor RegBBit + stream = Stream1, + regs = jit_regs:alloc_reg(Regs0, RegABit bor RegBBit) }, {fp, RegA, RegB} }. @@ -2460,11 +2419,12 @@ move_to_native_register( Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, X}), State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = AT, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {y_reg, Y}, RegDst ) -> + AT = jit_regs:available_regs(Regs0), Code = ldr_y_reg(RegDst, Y, AT), Stream1 = StreamModule:append(Stream0, Code), % ldr_y_reg clobbers first_avail(AT) as a hidden temp for loading Y_REGS pointer @@ -2497,12 +2457,11 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, Reg ) when is_atom(Reg) -> + Avail = jit_regs:available_regs(Regs0), SaveReg = first_avail(Avail), SaveBit = reg_bit(SaveReg), I1 = ?ASM:mv(SaveReg, Reg), @@ -2512,9 +2471,7 @@ copy_to_native_register( { State#state{ stream = Stream1, - available_regs = Avail band (bnot SaveBit), - used_regs = Used bor SaveBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, SaveBit) }, SaveReg }; @@ -2522,12 +2479,11 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {ptr, Reg} ) when is_atom(Reg) -> + Avail = jit_regs:available_regs(Regs0), SaveReg = first_avail(Avail), SaveBit = reg_bit(SaveReg), I1 = ?LOAD_WORD(SaveReg, Reg, 0), @@ -2536,9 +2492,7 @@ copy_to_native_register( { State#state{ stream = Stream1, - available_regs = Avail band (bnot SaveBit), - used_regs = Used bor SaveBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, SaveBit) }, SaveReg }; @@ -2546,10 +2500,11 @@ copy_to_native_register(State, Reg) -> move_to_native_register(State, Reg). move_to_cp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {y_reg, Y} ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), AvailT = Avail band (bnot reg_bit(Reg)), I1 = ldr_y_reg(Reg, Y, AvailT), @@ -2567,10 +2522,11 @@ move_to_cp( State#state{stream = Stream1, regs = Regs1}. increment_sp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Offset ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), {BaseReg1, Off1} = ?Y_REGS, I1 = ?LOAD_WORD(Reg, BaseReg1, Off1), @@ -2586,13 +2542,13 @@ set_continuation_to_label( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, branches = Branches, labels = Labels, regs = Regs0 } = State, Label ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Regs1 = jit_regs:invalidate_reg(Regs0, Temp), Offset = StreamModule:offset(Stream0), @@ -2629,11 +2585,11 @@ set_continuation_to_offset( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, branches = Branches, regs = Regs0 } = State ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), @@ -2656,11 +2612,10 @@ get_module_index( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UsedRegs0, regs = Regs0 } = State ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), RegBit = reg_bit(Reg), % Load module pointer from jit_state (which is in a1) @@ -2672,9 +2627,7 @@ get_module_index( { State#state{ stream = Stream1, - available_regs = Avail band (bnot RegBit), - used_regs = UsedRegs0 bor RegBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, RegBit) }, Reg }. @@ -2715,39 +2668,59 @@ and_( Regs1 = jit_regs:invalidate_reg(Regs0, Reg), {State0#state{stream = Stream1, regs = Regs1}, Reg}; and_( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, {free, Reg}, Val ) when Val < 0 andalso Val >= -256 -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, bnot (Val) + ), Stream1 = State1#state.stream, % RISC-V doesn't have bics, use not + and I1 = ?ASM:not_(Temp, Temp), I2 = ?ASM:and_(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, <>), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - {State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}, Reg}; + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + { + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + }, + Reg + }; and_( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, {free, Reg}, Val -) when Avail =/= 0 -> +) -> + Avail = jit_regs:available_regs(Regs0), + true = Avail =/= 0, Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = ?ASM:and_(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - {State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}, Reg}; + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + { + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + }, + Reg + }; and_( - #state{stream_module = StreamModule, available_regs = Avail, used_regs = UR, regs = Regs0} = + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, ?TERM_PRIMARY_CLEAR_MASK ) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), ResultBit = reg_bit(ResultReg), I = ?ASM:andi(ResultReg, Reg, -4), @@ -2756,9 +2729,7 @@ and_( { State0#state{ stream = Stream1, - available_regs = Avail band (bnot ResultBit), - used_regs = UR bor ResultBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, ResultBit) }, ResultReg }. @@ -2778,18 +2749,19 @@ or_(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State0#state{stream = Stream1, regs = Regs1}; or_( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate(State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val), Stream1 = State1#state.stream, I = ?ASM:or_(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + State1#state{stream = Stream2, regs = jit_regs:set_available_regs(Regs1, Avail)}. xor_( #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, Reg, SrcReg @@ -2808,18 +2780,19 @@ xor_(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = Stat Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State0#state{stream = Stream1, regs = Regs1}; xor_( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate(State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val), Stream1 = State1#state.stream, I = ?ASM:xor_(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + State1#state{stream = Stream2, regs = jit_regs:set_available_regs(Regs1, Avail)}. add(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, Reg, Val) when Val >= 0 andalso Val =< 255 @@ -2835,15 +2808,16 @@ add(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State Stream1 = StreamModule:append(Stream0, I), Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State0#state{stream = Stream1, regs = Regs1}; -add(#state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, Reg, Val) -> +add(#state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate(State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val), Stream1 = State1#state.stream, I = ?ASM:add(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + State1#state{stream = Stream2, regs = jit_regs:set_available_regs(Regs1, Avail)}. mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when Val >= -16#800, Val =< 16#7FF @@ -2873,21 +2847,23 @@ sub(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State Stream1 = StreamModule:append(Stream0, I), Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State#state{stream = Stream1, regs = Regs1}; -sub(#state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, Reg, Val) -> +sub(#state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate(State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val), Stream1 = State1#state.stream, I = ?ASM:sub(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Reg), Temp), + State1#state{stream = Stream2, regs = jit_regs:set_available_regs(Regs1, Avail)}. mul(State, _Reg, 1) -> State; mul(State, Reg, 2) -> shift_left(State, Reg, 1); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 3) -> +mul(#state{regs = Regs0} = State, Reg, 3) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = ?ASM:slli(Temp, Reg, 1), I2 = ?ASM:add(Reg, Temp, Reg), @@ -2896,7 +2872,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 3) -> State#state{stream = Stream1, regs = Regs1}; mul(State, Reg, 4) -> shift_left(State, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 5) -> +mul(#state{regs = Regs0} = State, Reg, 5) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = ?ASM:slli(Temp, Reg, 2), I2 = ?ASM:add(Reg, Temp, Reg), @@ -2906,7 +2883,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 5) -> mul(State0, Reg, 6) -> State1 = mul(State0, Reg, 3), mul(State1, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 7) -> +mul(#state{regs = Regs0} = State, Reg, 7) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = ?ASM:slli(Temp, Reg, 3), I2 = ?ASM:sub(Reg, Temp, Reg), @@ -2915,7 +2893,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 7) -> State#state{stream = Stream1, regs = Regs1}; mul(State, Reg, 8) -> shift_left(State, Reg, 3); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 9) -> +mul(#state{regs = Regs0} = State, Reg, 9) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = ?ASM:slli(Temp, Reg, 3), I2 = ?ASM:add(Reg, Temp, Reg), @@ -2925,7 +2904,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 9) -> mul(State0, Reg, 10) -> State1 = mul(State0, Reg, 5), mul(State1, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 15) -> +mul(#state{regs = Regs0} = State, Reg, 15) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = ?ASM:slli(Temp, Reg, 4), I2 = ?ASM:sub(Reg, Temp, Reg), @@ -2939,21 +2919,22 @@ mul(State, Reg, 32) -> mul(State, Reg, 64) -> shift_left(State, Reg, 6); mul( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) when is_integer(Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate(State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val), Stream1 = State1#state.stream, I = ?ASM:mul(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp), Reg), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Temp), Reg), + AvailAfter = jit_regs:available_regs(State1#state.regs) bor reg_bit(Temp), State1#state{ stream = Stream2, - available_regs = State1#state.available_regs bor reg_bit(Temp), - regs = Regs1 + regs = jit_regs:set_available_regs(Regs1, AvailAfter) }; mul( #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DestReg, SrcReg @@ -2975,9 +2956,10 @@ mul( %% When reductions == 0, we schedule the next process, and resume at the continuation point. %% decrement_reductions_and_maybe_schedule_next( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0 ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Regs1 = jit_regs:invalidate_reg(Regs0, Temp), % Load reduction count @@ -3022,9 +3004,8 @@ decrement_reductions_and_maybe_schedule_next( Stream3, BNEOffset, <> ), StreamN = Stream4, - State3 = merge_used_regs(State2#state{stream = StreamN}, State1#state.used_regs), %% schedule_next clobbers caller-saved regs; invalidate cache at continuation. - State3#state{regs = jit_regs:invalidate_all(State1#state.regs)}. + State2#state{stream = StreamN, regs = jit_regs:invalidate_all(State1#state.regs)}. call_or_schedule_next(State0, Label) -> {State1, RewriteOffset, TempReg} = set_cp(State0), @@ -3035,10 +3016,11 @@ call_only_or_schedule_next( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail + regs = RegsCO } = State0, Label ) -> + Avail = jit_regs:available_regs(RegsCO), Temp = first_avail(Avail), % Load reduction count (jit_state is in a1) I1 = ?ASM:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), @@ -3111,13 +3093,14 @@ call_primitive_with_cp(State0, Primitive, Args) -> State2 = call_primitive_last(State1, Primitive, Args), rewrite_cp_offset(State2, RewriteOffset, TempReg). -set_cp(#state{available_regs = Avail, used_regs = UsedRegs} = State0) -> +set_cp(#state{regs = RegsSC} = State0) -> + Avail = jit_regs:available_regs(RegsSC), TempReg = first_avail(Avail), TempBit = reg_bit(TempReg), % Reserve a temporary register for the offset BEFORE calling get_module_index % to avoid running out of available registers State0b = State0#state{ - available_regs = Avail band (bnot TempBit), used_regs = UsedRegs bor TempBit + regs = jit_regs:alloc_reg(RegsSC, TempBit) }, % get module index (dynamically) { @@ -3177,10 +3160,11 @@ rewrite_cp_offset( State0#state{stream = Stream1}. set_bs( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, TermReg ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), {BaseReg1, Off1} = ?BS, I1 = ?STORE_WORD(BaseReg1, TermReg, Off1), @@ -3328,6 +3312,15 @@ ldr_y_reg(DstReg, Y, 0) when Y * ?WORD_SIZE_BYTES =< ?Y_OFFSET_LIMIT -> I2 = ?LOAD_WORD(DstReg, DstReg, Y * ?WORD_SIZE_BYTES), <>. +%% Scratch-register orderings consumed by jit_backend_regs_impl.hrl. They must be +%% defined before that file is included by jit_riscv32 / jit_riscv64; since this +%% file is itself included earlier, defining them here suffices. first_avail uses +%% only the temporaries (t0-t6); mask_to_list additionally covers the argument +%% registers (a0-a7), which appear in used masks during calls. +-define(FIRST_AVAIL_REGS, [t6, t5, t4, t3, t2, t1, t0]). +-define(MASK_TO_LIST_REGS, [t6, t5, t4, t3, t2, t1, t0, a7, a6, a5, a4, a3, a2, a1, a0]). +-define(JITSTATE_ARG_REG, jit_state). + reg_bit(a0) -> ?REG_BIT_A0; reg_bit(a1) -> ?REG_BIT_A1; reg_bit(a2) -> ?REG_BIT_A2; @@ -3352,76 +3345,6 @@ regs_to_mask([offset | T]) -> regs_to_mask(T); regs_to_mask([stack | T]) -> regs_to_mask(T); regs_to_mask([Reg | T]) -> reg_bit(Reg) bor regs_to_mask(T). -%% first_avail returns the first available register from a bitmask. -%% Order matches AVAILABLE_REGS = [t6, t5, t4, t3, t2, t1, t0] -first_avail(Mask) when Mask band ?REG_BIT_T6 =/= 0 -> t6; -first_avail(Mask) when Mask band ?REG_BIT_T5 =/= 0 -> t5; -first_avail(Mask) when Mask band ?REG_BIT_T4 =/= 0 -> t4; -first_avail(Mask) when Mask band ?REG_BIT_T3 =/= 0 -> t3; -first_avail(Mask) when Mask band ?REG_BIT_T2 =/= 0 -> t2; -first_avail(Mask) when Mask band ?REG_BIT_T1 =/= 0 -> t1; -first_avail(Mask) when Mask band ?REG_BIT_T0 =/= 0 -> t0. - -%% Convert bitmask to list, covering all register bits. -mask_to_list(0) -> []; -mask_to_list(Mask) -> mask_to_list_t6(Mask). - -mask_to_list_t6(Mask) when Mask band ?REG_BIT_T6 =/= 0 -> [t6 | mask_to_list_t5(Mask)]; -mask_to_list_t6(Mask) -> mask_to_list_t5(Mask). -mask_to_list_t5(Mask) when Mask band ?REG_BIT_T5 =/= 0 -> [t5 | mask_to_list_t4(Mask)]; -mask_to_list_t5(Mask) -> mask_to_list_t4(Mask). -mask_to_list_t4(Mask) when Mask band ?REG_BIT_T4 =/= 0 -> [t4 | mask_to_list_t3(Mask)]; -mask_to_list_t4(Mask) -> mask_to_list_t3(Mask). -mask_to_list_t3(Mask) when Mask band ?REG_BIT_T3 =/= 0 -> [t3 | mask_to_list_t2(Mask)]; -mask_to_list_t3(Mask) -> mask_to_list_t2(Mask). -mask_to_list_t2(Mask) when Mask band ?REG_BIT_T2 =/= 0 -> [t2 | mask_to_list_t1(Mask)]; -mask_to_list_t2(Mask) -> mask_to_list_t1(Mask). -mask_to_list_t1(Mask) when Mask band ?REG_BIT_T1 =/= 0 -> [t1 | mask_to_list_t0(Mask)]; -mask_to_list_t1(Mask) -> mask_to_list_t0(Mask). -mask_to_list_t0(Mask) when Mask band ?REG_BIT_T0 =/= 0 -> [t0 | mask_to_list_a7(Mask)]; -mask_to_list_t0(Mask) -> mask_to_list_a7(Mask). -mask_to_list_a7(Mask) when Mask band ?REG_BIT_A7 =/= 0 -> [a7 | mask_to_list_a6(Mask)]; -mask_to_list_a7(Mask) -> mask_to_list_a6(Mask). -mask_to_list_a6(Mask) when Mask band ?REG_BIT_A6 =/= 0 -> [a6 | mask_to_list_a5(Mask)]; -mask_to_list_a6(Mask) -> mask_to_list_a5(Mask). -mask_to_list_a5(Mask) when Mask band ?REG_BIT_A5 =/= 0 -> [a5 | mask_to_list_a4(Mask)]; -mask_to_list_a5(Mask) -> mask_to_list_a4(Mask). -mask_to_list_a4(Mask) when Mask band ?REG_BIT_A4 =/= 0 -> [a4 | mask_to_list_a3(Mask)]; -mask_to_list_a4(Mask) -> mask_to_list_a3(Mask). -mask_to_list_a3(Mask) when Mask band ?REG_BIT_A3 =/= 0 -> [a3 | mask_to_list_a2(Mask)]; -mask_to_list_a3(Mask) -> mask_to_list_a2(Mask). -mask_to_list_a2(Mask) when Mask band ?REG_BIT_A2 =/= 0 -> [a2 | mask_to_list_a1(Mask)]; -mask_to_list_a2(Mask) -> mask_to_list_a1(Mask). -mask_to_list_a1(Mask) when Mask band ?REG_BIT_A1 =/= 0 -> [a1 | mask_to_list_a0(Mask)]; -mask_to_list_a1(Mask) -> mask_to_list_a0(Mask). -mask_to_list_a0(Mask) when Mask band ?REG_BIT_A0 =/= 0 -> [a0]; -mask_to_list_a0(_Mask) -> []. - -args_regs(Args) -> - lists:map( - fun - ({free, {ptr, Reg}}) -> Reg; - ({free, Reg}) when is_atom(Reg) -> Reg; - ({free, Imm}) when is_integer(Imm) -> imm; - (offset) -> imm; - (ctx) -> ?CTX_REG; - (jit_state) -> jit_state; - (jit_state_tail_call) -> jit_state; - (stack) -> stack; - (Reg) when is_atom(Reg) -> Reg; - (Imm) when is_integer(Imm) -> imm; - ({ptr, Reg}) -> Reg; - ({x_reg, _}) -> ?CTX_REG; - ({y_reg, _}) -> ?CTX_REG; - ({fp_reg, _}) -> ?CTX_REG; - ({free, {x_reg, _}}) -> ?CTX_REG; - ({free, {y_reg, _}}) -> ?CTX_REG; - ({free, {fp_reg, _}}) -> ?CTX_REG; - ({avm_int64_t, _}) -> imm - end, - Args - ). - %%----------------------------------------------------------------------------- %% @doc Add a label at the current offset. %% @end diff --git a/libs/jit/src/jit_wasm32.erl b/libs/jit/src/jit_wasm32.erl index f8e00effaa..6414118a78 100644 --- a/libs/jit/src/jit_wasm32.erl +++ b/libs/jit/src/jit_wasm32.erl @@ -160,8 +160,6 @@ offset :: non_neg_integer(), branches :: #{integer() | reference() => [{non_neg_integer(), non_neg_integer()}]}, jump_table_start :: non_neg_integer(), - available_regs :: non_neg_integer(), - used_regs :: non_neg_integer(), max_scratch :: non_neg_integer(), labels :: #{integer() | reference() => integer()}, variant :: non_neg_integer(), @@ -220,12 +218,10 @@ new(Variant, StreamModule, Stream) -> branches = #{}, jump_table_start = 0, offset = StreamModule:offset(Stream), - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, max_scratch = ?NUM_SCRATCH_LOCALS, labels = #{}, variant = Variant, - regs = jit_regs:new(), + regs = jit_regs:new(?AVAILABLE_REGS_MASK, 0), func_bodies = [], current_body = <<>>, current_label = undefined, @@ -265,11 +261,13 @@ flush(#state{stream_module = StreamModule, stream = Stream0} = State) -> debugger(State) -> emit(State, jit_wasm32_asm:unreachable()). +%% Native-register allocation bookkeeping. Flow through jit_regs so the masks +%% live inside jit_regs:regs() rather than #state{}. -spec used_regs(state()) -> [wasm_local()]. -used_regs(#state{used_regs = Used}) -> mask_to_locals(Used). +used_regs(#state{regs = Regs}) -> mask_to_locals(jit_regs:used_regs(Regs)). -spec available_regs(state()) -> [wasm_local()]. -available_regs(#state{available_regs = Available}) -> mask_to_locals(Available). +available_regs(#state{regs = Regs}) -> mask_to_locals(jit_regs:available_regs(Regs)). -spec free_native_registers(state(), [value()]) -> state(). free_native_registers(State, []) -> @@ -279,17 +277,14 @@ free_native_registers(State, [Val | Rest]) -> free_native_registers(State1, Rest). -spec free_native_register(state(), value()) -> state(). -free_native_register( - #state{available_regs = Available0, used_regs = Used0} = State, - Local -) when is_atom(Local) -> +free_native_register(#state{regs = Regs} = State, Local) when is_atom(Local) -> LocalIdx = jit_wasm32_asm:local_index(Local), case LocalIdx >= ?FIRST_SCRATCH_LOCAL of true -> Bit = local_bit(LocalIdx), - State#state{ - available_regs = Available0 bor Bit, used_regs = Used0 band (bnot Bit) - }; + A = jit_regs:available_regs(Regs), + U = jit_regs:used_regs(Regs), + State#state{regs = jit_regs:set_masks(Regs, A bor Bit, U band (bnot Bit))}; false -> State end; @@ -299,10 +294,10 @@ free_native_register(State, _Other) -> State. -spec assert_all_native_free(state()) -> ok. -assert_all_native_free(#state{max_scratch = MS} = State) -> - 0 = State#state.used_regs, +assert_all_native_free(#state{max_scratch = MS, regs = Regs}) -> + 0 = jit_regs:used_regs(Regs), AllFree = (1 bsl MS) - 1, - AllFree = State#state.available_regs, + AllFree = jit_regs:available_regs(Regs), ok. %%============================================================================= @@ -362,10 +357,9 @@ call_primitive(State0, Primitive, Args) -> call_primitive_last(State0, Primitive, Args) -> State1 = emit_call_primitive(State0, Primitive, Args, none, true), AllFree = (1 bsl State1#state.max_scratch) - 1, + Regs1 = jit_regs:unreachable(State1#state.regs), State1#state{ - available_regs = AllFree, - used_regs = 0, - regs = jit_regs:unreachable(State1#state.regs) + regs = jit_regs:set_masks(Regs1, AllFree, 0) }. call_primitive_with_cp(State0, Primitive, Args) -> @@ -390,14 +384,13 @@ call_primitive_with_cp(State0, Primitive, Args) -> NewFuncBodies = [{PrevLabel, FinalizedBody} | FuncBodies], ContLabelOff = JumpTableStart + ContLabel * ?JUMP_TABLE_ENTRY_SIZE, AllFree = (1 bsl State3#state.max_scratch) - 1, + Regs1 = jit_regs:invalidate_all(State3#state.regs), State3#state{ func_bodies = NewFuncBodies, current_body = <<>>, current_label = ContLabel, labels = Labels#{ContLabel => ContLabelOff}, - available_regs = AllFree, - used_regs = 0, - regs = jit_regs:invalidate_all(State3#state.regs) + regs = jit_regs:set_masks(Regs1, AllFree, 0) }. %%============================================================================= @@ -405,9 +398,11 @@ call_primitive_with_cp(State0, Primitive, Args) -> %%============================================================================= return_if_not_equal_to_ctx( - #state{available_regs = Available0, used_regs = Used0} = State0, + #state{regs = Regs0} = State0, {free, Local} ) -> + Available0 = jit_regs:available_regs(Regs0), + Used0 = jit_regs:used_regs(Regs0), Code = << (jit_wasm32_asm:local_get(Local))/binary, (jit_wasm32_asm:local_get(?CTX_LOCAL))/binary, @@ -419,12 +414,10 @@ return_if_not_equal_to_ctx( (jit_wasm32_asm:end_())/binary >>, Bit = local_bit(Local), - Regs1 = jit_regs:invalidate_reg(State0#state.regs, Local), + Regs1 = jit_regs:invalidate_reg(Regs0, Local), State1 = emit(State0, Code), State1#state{ - available_regs = Available0 bor Bit, - used_regs = Used0 band (bnot Bit), - regs = Regs1 + regs = jit_regs:set_masks(Regs1, Available0 bor Bit, Used0 band (bnot Bit)) }. jump_to_label(State0, Label) -> @@ -479,10 +472,9 @@ jump_to_continuation(State0, {free, OffsetLocal}) -> >>, State1 = emit(State0, Code), AllFree = (1 bsl State1#state.max_scratch) - 1, + Regs1 = jit_regs:unreachable(State1#state.regs), State1#state{ - available_regs = AllFree, - used_regs = 0, - regs = jit_regs:unreachable(State1#state.regs) + regs = jit_regs:set_masks(Regs1, AllFree, 0) }. %%============================================================================= @@ -497,15 +489,17 @@ if_block(State0, {'and', CondList}, BlockFn) -> State2 = emit(State1, jit_wasm32_asm:if_(jit_wasm32_asm:blocktype_void())), State3 = BlockFn(State2), State4 = emit(State3, jit_wasm32_asm:end_()), - MergedRegs = jit_regs:merge(State1#state.regs, State4#state.regs), - merge_used_regs(State4#state{regs = MergedRegs}, State1#state.used_regs); + AllRegsMask = (1 bsl State4#state.max_scratch) - 1, + MergedRegs = jit_regs:merge(State1#state.regs, State4#state.regs, AllRegsMask), + State4#state{regs = MergedRegs}; if_block(State0, Cond, BlockFn) -> State1 = emit_condition(State0, Cond), State2 = emit(State1, jit_wasm32_asm:if_(jit_wasm32_asm:blocktype_void())), State3 = BlockFn(State2), State4 = emit(State3, jit_wasm32_asm:end_()), - MergedRegs = jit_regs:merge(State1#state.regs, State4#state.regs), - merge_used_regs(State4#state{regs = MergedRegs}, State1#state.used_regs). + AllRegsMask = (1 bsl State4#state.max_scratch) - 1, + MergedRegs = jit_regs:merge(State1#state.regs, State4#state.regs, AllRegsMask), + State4#state{regs = MergedRegs}. -spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) -> state(). @@ -515,13 +509,17 @@ if_else_block(State0, Cond, BlockTrueFn, BlockFalseFn) -> State3 = BlockTrueFn(State2), State4 = emit(State3, jit_wasm32_asm:else_()), StateElse = State4#state{ - used_regs = State1#state.used_regs, - available_regs = State1#state.available_regs + regs = jit_regs:set_masks( + State4#state.regs, + jit_regs:available_regs(State1#state.regs), + jit_regs:used_regs(State1#state.regs) + ) }, State5 = BlockFalseFn(StateElse), State6 = emit(State5, jit_wasm32_asm:end_()), - MergedRegs = jit_regs:merge(State3#state.regs, State5#state.regs), - merge_used_regs(State6#state{regs = MergedRegs}, State3#state.used_regs). + AllRegsMask = (1 bsl State6#state.max_scratch) - 1, + MergedRegs = jit_regs:merge(State3#state.regs, State5#state.regs, AllRegsMask), + State6#state{regs = MergedRegs}. %%============================================================================= %% Arithmetic and bitwise operations @@ -1077,14 +1075,13 @@ decrement_reductions_and_maybe_schedule_next(State0) -> NewFuncBodies = [{PrevLabel, FinalizedBody} | FuncBodies], ContLabelOff = JumpTableStart + ContLabel * ?JUMP_TABLE_ENTRY_SIZE, AllFree = (1 bsl State9#state.max_scratch) - 1, + Regs1 = jit_regs:invalidate_all(State9#state.regs), State9#state{ func_bodies = NewFuncBodies, current_body = <<>>, current_label = ContLabel, labels = Labels#{ContLabel => ContLabelOff}, - available_regs = AllFree, - used_regs = 0, - regs = jit_regs:invalidate_all(State9#state.regs) + regs = jit_regs:set_masks(Regs1, AllFree, 0) }. call_or_schedule_next(State0, Label) -> @@ -1115,14 +1112,13 @@ call_or_schedule_next(State0, Label) -> NewFuncBodies = [{PrevLabel, FinalizedBody} | FuncBodies], ContLabelOff = JumpTableStart + ContLabel * ?JUMP_TABLE_ENTRY_SIZE, AllFree = (1 bsl State3#state.max_scratch) - 1, + Regs1 = jit_regs:invalidate_all(State3#state.regs), State3#state{ func_bodies = NewFuncBodies, current_body = <<>>, current_label = ContLabel, labels = Labels#{ContLabel => ContLabelOff}, - available_regs = AllFree, - used_regs = 0, - regs = jit_regs:invalidate_all(State3#state.regs) + regs = jit_regs:set_masks(Regs1, AllFree, 0) }. call_only_or_schedule_next(State0, Label) -> @@ -1176,8 +1172,10 @@ call_func_ptr(State0, FuncPtrTuple, Args) -> State4 = emit(State3, jit_wasm32_asm:call_indirect(TypeIdx, 0)), %% Store result State5 = emit(State4, jit_wasm32_asm:local_set(ResultLocal)), - Regs1 = jit_regs:invalidate_all(State0#state.regs), State6 = free_func_ptr(State5, FuncPtrTuple), + %% Invalidate contents tracking but keep the masks from State6: they + %% account for ResultLocal being allocated and the freed arguments. + Regs1 = jit_regs:invalidate_all(State6#state.regs), {State6#state{regs = Regs1}, ResultLocal}. %%============================================================================= @@ -1322,9 +1320,7 @@ add_label( current_body = <<>>, current_label = ContLabel, labels = Labels#{ContLabel => ContLabelOff}, - available_regs = AllFree, - used_regs = 0, - regs = Regs1 + regs = jit_regs:set_masks(Regs1, AllFree, 0) }; add_label( #state{ @@ -1357,28 +1353,34 @@ emit(#state{current_body = Body} = State, Code) -> State#state{current_body = <>}. %% Allocate a scratch local, extending the pool if exhausted. -alloc_local(#state{available_regs = 0, used_regs = Used, max_scratch = MaxScratch} = State) -> - LocalIdx = ?FIRST_SCRATCH_LOCAL + MaxScratch, - Bit = 1 bsl MaxScratch, - LocalAtom = index_to_local(LocalIdx), - { - State#state{ - used_regs = Used bor Bit, - max_scratch = MaxScratch + 1 - }, - LocalAtom - }; -alloc_local(#state{available_regs = Available, used_regs = Used} = State) -> - LocalIdx = first_avail_local(Available), - Bit = local_bit(LocalIdx), - LocalAtom = index_to_local(LocalIdx), - { - State#state{ - available_regs = Available band (bnot Bit), - used_regs = Used bor Bit - }, - LocalAtom - }. +alloc_local(#state{regs = Regs, max_scratch = MaxScratch} = State) -> + Available = jit_regs:available_regs(Regs), + Used = jit_regs:used_regs(Regs), + case Available of + 0 -> + LocalIdx = ?FIRST_SCRATCH_LOCAL + MaxScratch, + Bit = 1 bsl MaxScratch, + LocalAtom = index_to_local(LocalIdx), + { + State#state{ + regs = jit_regs:set_masks(Regs, Available, Used bor Bit), + max_scratch = MaxScratch + 1 + }, + LocalAtom + }; + _ -> + LocalIdx = first_avail_local(Available), + Bit = local_bit(LocalIdx), + LocalAtom = index_to_local(LocalIdx), + { + State#state{ + regs = jit_regs:set_masks( + Regs, Available band (bnot Bit), Used bor Bit + ) + }, + LocalAtom + } + end. %% Get the first available local from bitmask first_avail_local(Mask) -> @@ -1426,12 +1428,6 @@ local_bit(LocalAtom) when is_atom(LocalAtom) -> local_bit(LocalIdx) when is_integer(LocalIdx), LocalIdx >= ?FIRST_SCRATCH_LOCAL -> 1 bsl (LocalIdx - ?FIRST_SCRATCH_LOCAL). -merge_used_regs(#state{used_regs = UR, max_scratch = MS} = State, OtherUR) -> - MergedUR = UR bor OtherUR, - AllFree = (1 bsl MS) - 1, - MergedAvail = AllFree band (bnot MergedUR), - State#state{used_regs = MergedUR, available_regs = MergedAvail}. - emit_value_to_stack(cp) -> << (jit_wasm32_asm:local_get(?CTX_LOCAL))/binary, diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index 1f06c7482d..7bc907b974 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -137,8 +137,6 @@ offset :: non_neg_integer(), branches :: #{integer() | reference() => [{non_neg_integer(), non_neg_integer()}]}, jump_table_start :: non_neg_integer(), - available_regs :: non_neg_integer(), - used_regs :: non_neg_integer(), labels :: #{integer() | reference() => integer()}, variant :: non_neg_integer(), regs :: jit_regs:regs() @@ -263,11 +261,9 @@ new(Variant, StreamModule, Stream) -> branches = #{}, jump_table_start = 0, offset = StreamModule:offset(Stream), - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, labels = #{}, variant = Variant, - regs = jit_regs:new() + regs = jit_regs:new(?AVAILABLE_REGS_MASK, 0) }. %%----------------------------------------------------------------------------- @@ -312,70 +308,14 @@ debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> Stream1 = StreamModule:append(Stream0, <<16#CC>>), State#state{stream = Stream1}. -%%----------------------------------------------------------------------------- -%% @doc Return the list of currently used native registers. This is used for -%% debugging and not in production. -%% @end -%% @param State current backend state -%% @return The list of used registers -%%----------------------------------------------------------------------------- --spec used_regs(state()) -> [x86_64_register()]. -used_regs(#state{used_regs = Used}) -> mask_to_list(Used). - -%%----------------------------------------------------------------------------- -%% @doc Return the list of currently available native scratch registers. This -%% is used for debugging and not in production. -%% @end -%% @param State current backend state -%% @return The list of available registers -%%----------------------------------------------------------------------------- --spec available_regs(state()) -> [x86_64_register()]. -available_regs(#state{available_regs = Available}) -> mask_to_list(Available). - -%%----------------------------------------------------------------------------- -%% @doc Free native registers. The passed list of registers can contain -%% registers, pointer to registers or other values that are ignored. -%% @end -%% @param State current backend state -%% @param Regs list of registers or other values -%% @return The updated backend state -%%----------------------------------------------------------------------------- --spec free_native_registers(state(), [value()]) -> state(). -free_native_registers(State, []) -> - State; -free_native_registers(State, [Reg | Rest]) -> - State1 = free_native_register(State, Reg), - free_native_registers(State1, Rest). - --spec free_native_register(state(), value()) -> state(). -free_native_register( - #state{available_regs = Available0, used_regs = Used0} = State, - Reg -) when - is_atom(Reg) --> - Bit = reg_bit(Reg), - State#state{ - available_regs = Available0 bor Bit, - used_regs = Used0 band (bnot Bit) - }; -free_native_register(State, {ptr, Reg}) -> - free_native_register(State, Reg); -free_native_register(State, _Other) -> - State. - -%%----------------------------------------------------------------------------- -%% @doc Assert that all native scratch registers are available. This is used -%% for debugging and not in production. -%% @end -%% @param State current backend state -%% @return ok -%%----------------------------------------------------------------------------- --spec assert_all_native_free(state()) -> ok. -assert_all_native_free(State) -> - 0 = State#state.used_regs, - ?AVAILABLE_REGS_MASK = State#state.available_regs, - ok. +%% Native-register allocation bookkeeping (used_regs/1, available_regs/1, +%% free_native_registers/2, free_native_register/2, assert_all_native_free/1, +%% first_avail/1, mask_to_list/1, args_regs/1, prepare_call_scratch/1) is shared +%% across the register-based backends and flows through jit_regs. +-define(FIRST_AVAIL_REGS, [rax, r11, r10, r9, r8, rcx]). +-define(MASK_TO_LIST_REGS, [rcx, r8, r9, r10, r11, rax]). +-define(JITSTATE_ARG_REG, ?JITSTATE_REG). +-include("jit_backend_regs_impl.hrl"). %%----------------------------------------------------------------------------- %% @doc Emit the jump table at the beginning of the module. Branches will be @@ -500,12 +440,12 @@ call_primitive( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs + regs = Regs0 } = State, Primitive, Args ) -> + AvailableRegs0 = jit_regs:available_regs(Regs0), % We need a register for the function pointer that should not be used as a parameter ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), ParamMask = jit_regs:regs_to_mask(ParamRegs, fun reg_bit/1), @@ -529,9 +469,10 @@ call_primitive( call_func_ptr( State#state{ stream = Stream1, - available_regs = AvailableRegs0 band (bnot TempBit), - used_regs = UsedRegs bor TempBit, - regs = jit_regs:invalidate_reg(State#state.regs, Temp) + regs = jit_regs:alloc_reg( + jit_regs:invalidate_reg(Regs0, Temp), + TempBit + ) }, {free, Temp}, Args @@ -566,9 +507,9 @@ call_primitive_last( Stream1 = StreamModule:append(Stream0, Call), State0#state{ stream = Stream1, - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, - regs = jit_regs:unreachable(State0#state.regs) + regs = jit_regs:set_masks( + jit_regs:unreachable(State0#state.regs), ?AVAILABLE_REGS_MASK, 0 + ) }; call_primitive_last( #state{ @@ -581,16 +522,15 @@ call_primitive_last( % We need a register for the function pointer that should not be used as a parameter % Since we're not returning, we can use all scratch registers except % registers used for parameters - ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), - ArgsRegs = args_regs(Args), - ParamMask = jit_regs:regs_to_mask(ParamRegs, fun reg_bit/1), - ArgsMask = jit_regs:regs_to_mask(ArgsRegs, fun reg_bit/1), - ScratchMask = - ?AVAILABLE_REGS_MASK band (bnot (ArgsMask bor ParamMask)), - Temp = first_avail(ScratchMask), - TempBit = reg_bit(Temp), - AvailableRegs1 = ScratchMask band (bnot TempBit), - UsedRegs = ?AVAILABLE_REGS_MASK band (bnot AvailableRegs1), + #{ + temp := Temp, + available_mask := AvailableRegs1, + used_mask := UsedRegs, + param_regs := ParamRegs, + args_regs := ArgsRegs, + param_mask := ParamMask, + args_mask := ArgsMask + } = prepare_call_scratch(Args), PrepCall = case Primitive of 0 -> @@ -602,9 +542,11 @@ call_primitive_last( State1 = set_args2( State0#state{ stream = Stream1, - available_regs = AvailableRegs1, - used_regs = UsedRegs, - regs = jit_regs:invalidate_reg(State0#state.regs, Temp) + regs = jit_regs:set_masks( + jit_regs:invalidate_reg(State0#state.regs, Temp), + AvailableRegs1, + UsedRegs + ) }, Args, ParamRegs, @@ -617,9 +559,9 @@ call_primitive_last( Stream3 = StreamModule:append(Stream2, Call), State1#state{ stream = Stream3, - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, - regs = jit_regs:unreachable(State1#state.regs) + regs = jit_regs:set_masks( + jit_regs:unreachable(State1#state.regs), ?AVAILABLE_REGS_MASK, 0 + ) }. %%----------------------------------------------------------------------------- @@ -635,8 +577,7 @@ return_if_not_equal_to_ctx( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0 + regs = Regs0 } = State, {free, Reg} ) -> @@ -652,9 +593,7 @@ return_if_not_equal_to_ctx( RegBit = reg_bit(Reg), State#state{ stream = Stream1, - available_regs = AvailableRegs0 bor RegBit, - used_regs = UsedRegs0 band (bnot RegBit), - regs = State#state.regs + regs = jit_regs:free_reg(Regs0, RegBit) }. %%----------------------------------------------------------------------------- @@ -714,10 +653,11 @@ jump_to_continuation( stream_module = StreamModule, stream = Stream0, offset = BaseOffset, - available_regs = Avail + regs = Regs0 } = State, {free, OffsetReg} ) -> + Avail = jit_regs:available_regs(Regs0), TempReg = first_avail(Avail), % Calculate absolute address: native_code_base + target_offset % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) @@ -738,9 +678,9 @@ jump_to_continuation( % Free all registers since this is a tail jump State#state{ stream = Stream1, - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, - regs = jit_regs:unreachable(State#state.regs) + regs = jit_regs:set_masks( + jit_regs:unreachable(Regs0), ?AVAILABLE_REGS_MASK, 0 + ) }. %%----------------------------------------------------------------------------- @@ -781,11 +721,12 @@ if_block( Stream2, Replacements ), - State3 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), %% At the merge point, only keep register tracking that is consistent %% in both the taken (State2) and not-taken (State1) paths - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State3#state{regs = MergedRegs}; + MergedRegs = jit_regs:merge( + State1#state.regs, State2#state.regs, ?AVAILABLE_REGS_MASK + ), + State2#state{stream = Stream3, regs = MergedRegs}; if_block( #state{stream_module = StreamModule, stream = Stream0} = State0, Cond, @@ -801,9 +742,10 @@ if_block( Stream3 = StreamModule:replace(Stream2, Offset + ReplaceDelta, << (OffsetAfter - OffsetAfterCond) >>), - State3 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State3#state{regs = MergedRegs}. + MergedRegs = jit_regs:merge( + State1#state.regs, State2#state.regs, ?AVAILABLE_REGS_MASK + ), + State2#state{stream = Stream3, regs = MergedRegs}. %%----------------------------------------------------------------------------- %% @doc Emit an if else block, i.e. emit a test of a condition and @@ -838,8 +780,6 @@ if_else_block( >>), StateElse = State2#state{ stream = Stream4, - used_regs = State1#state.used_regs, - available_regs = State1#state.available_regs, regs = State1#state.regs }, State3 = BlockFalseFn(StateElse), @@ -848,10 +788,11 @@ if_else_block( Stream6 = StreamModule:replace(Stream5, ElseJumpOffset + RelocJMPOffset, << (OffsetFinal - OffsetAfter) >>), - State4 = merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs), %% Merge register tracking from both branches (true=State2, false=State3) - MergedRegs = jit_regs:merge(State2#state.regs, State3#state.regs), - State4#state{regs = MergedRegs}. + MergedRegs = jit_regs:merge( + State2#state.regs, State3#state.regs, ?AVAILABLE_REGS_MASK + ), + State3#state{stream = Stream6, regs = MergedRegs}. -spec if_block_cond(state(), condition()) -> {state(), non_neg_integer()}. if_block_cond(#state{stream_module = StreamModule} = State0, Cond) -> @@ -884,8 +825,9 @@ if_block_cond0(State0, {Value, '<', RegOrTuple}) when ?IS_SINT32_T(Value) -> {State1, <>, byte_size(I1) + RelocJLEOffset}; % Catch-all for large values outside SINT32_T range if_block_cond0( - #state{available_regs = Avail, regs = Regs0} = State0, {Value, '<', RegOrTuple} + #state{regs = Regs0} = State0, {Value, '<', RegOrTuple} ) when is_integer(Value) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Reg = case RegOrTuple of @@ -920,8 +862,9 @@ if_block_cond0(State0, {RegOrTuple, '<', RegB}) when is_atom(RegB) -> {State1, <>, byte_size(I1) + RelocJGEOffset}; % Catch-all for large values outside SINT32_T range if_block_cond0( - #state{available_regs = Avail, regs = Regs0} = State0, {RegOrTuple, '<', Value} + #state{regs = Regs0} = State0, {RegOrTuple, '<', Value} ) when is_integer(Value) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Reg = case RegOrTuple of @@ -988,9 +931,10 @@ if_block_cond0( State1 = if_block_free_reg(RegOrTuple, State0), {State1, <>, byte_size(I1) + RelocJZOffset}; if_block_cond0( - #state{available_regs = Avail, regs = Regs0} = State0, + #state{regs = Regs0} = State0, {RegOrTuple, '!=', Val} ) when is_integer(Val) orelse ?IS_GPR(Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Reg = case RegOrTuple of @@ -1030,9 +974,10 @@ if_block_cond0( State1 = if_block_free_reg(RegOrTuple, State0), {State1, <>, byte_size(I1) + RelocJZOffset}; if_block_cond0( - #state{available_regs = Avail, regs = Regs0} = State0, + #state{regs = Regs0} = State0, {RegOrTuple, '==', Val} ) when is_integer(Val) orelse ?IS_GPR(Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Reg = case RegOrTuple of @@ -1112,7 +1057,7 @@ if_block_cond0(#state{regs = Regs0} = State0, {{free, Reg} = RegTuple, '&', Mask State1 = if_block_free_reg(RegTuple, State0#state{regs = Regs1}), {State1, <>, byte_size(I1) + byte_size(I2) + RelocJZOffset}; if_block_cond0(#state{regs = Regs0} = State0, {Reg, '&', Mask, '!=', Val}) when ?IS_UINT8_T(Mask) -> - Temp = first_avail(State0#state.available_regs), + Temp = first_avail(jit_regs:available_regs(Regs0)), I1 = jit_x86_64_asm:movq(Reg, Temp), I2 = jit_x86_64_asm:andb(Mask, Temp), I3 = jit_x86_64_asm:cmpb(Val, Temp), @@ -1125,21 +1070,14 @@ if_block_cond0(#state{regs = Regs0} = State0, {Reg, '&', Mask, '!=', Val}) when }. -spec if_block_free_reg(x86_64_register() | {free, x86_64_register()}, state()) -> state(). -if_block_free_reg({free, Reg}, #state{available_regs = AvR0, used_regs = UR0} = State0) -> +if_block_free_reg({free, Reg}, #state{regs = Regs0} = State0) -> Bit = reg_bit(Reg), State0#state{ - available_regs = AvR0 bor Bit, - used_regs = UR0 band (bnot Bit) + regs = jit_regs:free_reg(Regs0, Bit) }; if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> State0. --spec merge_used_regs(state(), non_neg_integer()) -> state(). -merge_used_regs(#state{used_regs = UR} = State, OtherUR) -> - MergedUR = UR bor OtherUR, - MergedAvail = ?AVAILABLE_REGS_MASK band (bnot MergedUR), - State#state{used_regs = MergedUR, available_regs = MergedAvail}. - %%----------------------------------------------------------------------------- %% @doc Emit a shift register right by a fixed number of bits, effectively %% dividing it by 2^Shift @@ -1162,8 +1100,6 @@ shift_right( shift_right( #state{ stream_module = StreamModule, - available_regs = Avail, - used_regs = UR, stream = Stream0, regs = Regs0 } = State, @@ -1172,6 +1108,7 @@ shift_right( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), Bit = reg_bit(ResultReg), I1 = jit_x86_64_asm:movq(Reg, ResultReg), @@ -1181,9 +1118,7 @@ shift_right( { State#state{ stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = UR bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, ResultReg }. @@ -1209,8 +1144,6 @@ shift_right_arith( shift_right_arith( #state{ stream_module = StreamModule, - available_regs = Avail, - used_regs = UR, stream = Stream0, regs = Regs0 } = State, @@ -1219,6 +1152,7 @@ shift_right_arith( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), Bit = reg_bit(ResultReg), I1 = jit_x86_64_asm:movq(Reg, ResultReg), @@ -1228,9 +1162,7 @@ shift_right_arith( { State#state{ stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = UR bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, ResultReg }. @@ -1268,12 +1200,13 @@ call_func_ptr( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0 + regs = Regs0 } = State0, FuncPtrTuple, Args ) -> + AvailableRegs0 = jit_regs:available_regs(Regs0), + UsedRegs0 = jit_regs:used_regs(Regs0), FreeMask = lists:foldl( fun ({free, {ptr, Reg}}, Acc) -> Acc bor reg_bit(Reg); @@ -1354,9 +1287,7 @@ call_func_ptr( { State1#state{ stream = Stream9, - available_regs = AvailableRegs2, - used_regs = UsedRegs2, - regs = Regs1 + regs = jit_regs:set_masks(Regs1, AvailableRegs2, UsedRegs2) }, ResultReg }. @@ -1370,13 +1301,14 @@ set_args(State0, Args) -> set_args2(State0, Args, ParamRegs, ArgsRegs, ParamMask, ArgsMask). set_args2( - #state{stream = Stream0, stream_module = StreamModule, used_regs = UsedRegs} = State0, + #state{stream = Stream0, stream_module = StreamModule, regs = Regs0} = State0, Args, ParamRegs, ArgsRegs, ParamMask, ArgsMask ) -> + UsedRegs = jit_regs:used_regs(Regs0), AvailableScratchGP = ?SCRATCH_REGS_MASK band (bnot (ParamMask bor ArgsMask bor UsedRegs)), Offset = StreamModule:offset(Stream0), @@ -1400,8 +1332,11 @@ set_args2( ), State0#state{ stream = Stream1, - available_regs = ?AVAILABLE_REGS_MASK band (bnot (ParamMask bor NewUsedMask)), - used_regs = ParamMask bor NewUsedMask + regs = jit_regs:set_masks( + Regs0, + ?AVAILABLE_REGS_MASK band (bnot (ParamMask bor NewUsedMask)), + ParamMask bor NewUsedMask + ) }. parameter_regs(Args) -> @@ -1584,7 +1519,8 @@ move_to_vm_register_emit(State, 0, {ptr, Reg}) -> I1 = jit_x86_64_asm:andq(0, {0, Reg}), Stream1 = (State#state.stream_module):append(State#state.stream, I1), State#state{stream = Stream1}; -move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, 0, {y_reg, Y}) -> +move_to_vm_register_emit(#state{regs = Regs0} = State, 0, {y_reg, Y}) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:andq(0, {Y * 8, Temp}), @@ -1607,9 +1543,10 @@ move_to_vm_register_emit(State, N, {ptr, Reg}) when ?IS_SINT32_T(N) -> State#state.stream, jit_x86_64_asm:movq(N, {0, Reg}) ), State#state{stream = Stream1}; -move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, N, {y_reg, Y}) when +move_to_vm_register_emit(#state{regs = Regs0} = State, N, {y_reg, Y}) when ?IS_SINT32_T(N) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:movq(N, {Y * 8, Temp}), @@ -1617,9 +1554,10 @@ move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, N Regs1 = jit_regs:invalidate_reg(Regs0, Temp), State#state{stream = Stream1, regs = Regs1}; % ?is_integer(Src), we need to use movabsq -move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, N, {x_reg, X}) when +move_to_vm_register_emit(#state{regs = Regs0} = State, N, {x_reg, X}) when X < ?MAX_REG andalso is_integer(N) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(N, Temp), I2 = jit_x86_64_asm:movq(Temp, ?X_REG(X)), @@ -1627,28 +1565,31 @@ move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, N Regs1 = jit_regs:set_contents(Regs0, Temp, {imm, N}), State#state{stream = Stream1, regs = Regs1}; move_to_vm_register_emit( - #state{available_regs = Avail, regs = Regs0} = State, N, {x_reg, extra} + #state{regs = Regs0} = State, N, {x_reg, extra} ) when is_integer(N) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(N, Temp), I2 = jit_x86_64_asm:movq(Temp, ?X_REG(?MAX_REG)), Stream1 = (State#state.stream_module):append(State#state.stream, <>), Regs1 = jit_regs:set_contents(Regs0, Temp, {imm, N}), State#state{stream = Stream1, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, N, {ptr, Reg}) when +move_to_vm_register_emit(#state{regs = Regs0} = State, N, {ptr, Reg}) when is_integer(N) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(N, Temp), I2 = jit_x86_64_asm:movq(Temp, {0, Reg}), Stream1 = (State#state.stream_module):append(State#state.stream, <>), Regs1 = jit_regs:set_contents(Regs0, Temp, {imm, N}), State#state{stream = Stream1, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, N, {y_reg, Y}) when +move_to_vm_register_emit(#state{regs = Regs0} = State, N, {y_reg, Y}) when is_integer(N) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp1), @@ -1672,9 +1613,10 @@ move_to_vm_register_emit(State, Reg, {ptr, Dest}) when is_atom(Reg) -> I1 = jit_x86_64_asm:movq(Reg, {0, Dest}), Stream1 = (State#state.stream_module):append(State#state.stream, I1), State#state{stream = Stream1}; -move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, Reg, {y_reg, Y}) when +move_to_vm_register_emit(#state{regs = Regs0} = State, Reg, {y_reg, Y}) when is_atom(Reg) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:movq(Reg, {Y * 8, Temp}), @@ -1684,67 +1626,40 @@ move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, R State#state{stream = Stream1, regs = Regs1}; % Src is x_reg, store in temporary register and call move_to_vm_register_emit for the four cases move_to_vm_register_emit( - #state{available_regs = Avail, regs = Regs0} = State0, {x_reg, X}, Dest + #state{regs = Regs0} = State0, {x_reg, X}, Dest ) when X < ?MAX_REG -> - Temp = first_avail(Avail), - TempBit = reg_bit(Temp), - I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - Regs1 = jit_regs:set_contents(Regs0, Temp, {x_reg, X}), - State1 = move_to_vm_register_emit( - State0#state{stream = Stream1, available_regs = Avail band (bnot TempBit), regs = Regs1}, - Temp, - Dest - ), - State1#state{available_regs = Avail}; + with_temp(State0, Dest, fun(Temp) -> + {jit_x86_64_asm:movq(?X_REG(X), Temp), jit_regs:set_contents(Regs0, Temp, {x_reg, X})} + end); move_to_vm_register_emit( - #state{available_regs = Avail, regs = Regs0} = State0, {x_reg, extra}, Dest + #state{regs = Regs0} = State0, {x_reg, extra}, Dest ) -> - Temp = first_avail(Avail), - TempBit = reg_bit(Temp), - I1 = jit_x86_64_asm:movq(?X_REG(?MAX_REG), Temp), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - Regs1 = jit_regs:set_contents(Regs0, Temp, {x_reg, ?MAX_REG}), - State1 = move_to_vm_register_emit( - State0#state{stream = Stream1, available_regs = Avail band (bnot TempBit), regs = Regs1}, - Temp, - Dest - ), - State1#state{available_regs = Avail}; -move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State0, {ptr, Reg}, Dest) -> - Temp = first_avail(Avail), - TempBit = reg_bit(Temp), - I1 = jit_x86_64_asm:movq({0, Reg}, Temp), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - Regs1 = jit_regs:invalidate_reg(Regs0, Temp), - State1 = move_to_vm_register_emit( - State0#state{stream = Stream1, available_regs = Avail band (bnot TempBit), regs = Regs1}, - Temp, - Dest - ), - State1#state{available_regs = Avail}; -move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State0, {y_reg, Y}, Dest) -> - Temp = first_avail(Avail), - TempBit = reg_bit(Temp), - I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), - I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), - Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), - Regs1 = jit_regs:set_contents(Regs0, Temp, {y_reg, Y}), - State1 = move_to_vm_register_emit( - State0#state{stream = Stream1, available_regs = Avail band (bnot TempBit), regs = Regs1}, - Temp, - Dest - ), - State1#state{available_regs = Avail}; + with_temp(State0, Dest, fun(Temp) -> + { + jit_x86_64_asm:movq(?X_REG(?MAX_REG), Temp), + jit_regs:set_contents(Regs0, Temp, {x_reg, ?MAX_REG}) + } + end); +move_to_vm_register_emit(#state{regs = Regs0} = State0, {ptr, Reg}, Dest) -> + with_temp(State0, Dest, fun(Temp) -> + {jit_x86_64_asm:movq({0, Reg}, Temp), jit_regs:invalidate_reg(Regs0, Temp)} + end); +move_to_vm_register_emit(#state{regs = Regs0} = State0, {y_reg, Y}, Dest) -> + with_temp(State0, Dest, fun(Temp) -> + I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), + I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), + {<>, jit_regs:set_contents(Regs0, Temp, {y_reg, Y})} + end); % term_to_float move_to_vm_register_emit( - #state{stream_module = StreamModule, available_regs = Avail, stream = Stream0, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {free, {ptr, Reg, 1}}, {fp_reg, F} ) when is_atom(Reg) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq({8, Reg}, Reg), I2 = jit_x86_64_asm:movq(?FP_REGS, Temp), @@ -1755,6 +1670,29 @@ move_to_vm_register_emit( State1 = free_native_register(State0#state{regs = Regs1}, Reg), State1#state{stream = Stream1}. +-spec with_temp( + state(), + vm_register(), + fun((x86_64_register()) -> {binary(), jit_regs:regs()}) +) -> state(). +with_temp( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, Dest, EmitFn +) -> + Avail = jit_regs:available_regs(Regs0), + Temp = first_avail(Avail), + TempBit = reg_bit(Temp), + {Code, Regs1} = EmitFn(Temp), + Stream1 = StreamModule:append(Stream0, Code), + State1 = move_to_vm_register_emit( + State0#state{ + stream = Stream1, + regs = jit_regs:set_available_regs(Regs1, Avail band (bnot TempBit)) + }, + Temp, + Dest + ), + State1#state{regs = jit_regs:set_available_regs(State1#state.regs, Avail)}. + %%----------------------------------------------------------------------------- %% @doc Emit a move of an array element (reg[x]) to a vm or a native register. %% @end @@ -1771,12 +1709,13 @@ move_to_vm_register_emit( Dest :: vm_register() | x86_64_register() ) -> state(). move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {x_reg, X} ) when X < ?MAX_REG andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq({Index * 8, Reg}, Temp), I2 = jit_x86_64_asm:movq(Temp, ?X_REG(X)), @@ -1785,12 +1724,13 @@ move_array_element( Regs2 = jit_regs:set_contents(Regs1, Temp, {x_reg, X}), State#state{stream = Stream1, regs = Regs2}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {ptr, Dest} ) when is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq({Index * 8, Reg}, Temp), I2 = jit_x86_64_asm:movq(Temp, {0, Dest}), @@ -1798,12 +1738,13 @@ move_array_element( Regs1 = jit_regs:invalidate_reg(Regs0, Temp), State#state{stream = Stream1, regs = Regs1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {y_reg, Y} ) when is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp1), @@ -1826,8 +1767,6 @@ move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, @@ -1843,17 +1782,13 @@ move_array_element( Regs1 = jit_regs:invalidate_vm_loc(Regs0, {x_reg, X}), Regs2 = jit_regs:invalidate_reg(Regs1, IndexReg), State#state{ - available_regs = AvailableRegs0 bor IndexBit, - used_regs = UsedRegs0 band (bnot IndexBit), stream = Stream1, - regs = Regs2 + regs = jit_regs:free_reg(Regs2, IndexBit) }; move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, @@ -1868,23 +1803,20 @@ move_array_element( Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_reg(Regs0, IndexReg), State#state{ - available_regs = AvailableRegs0 bor IndexBit, - used_regs = UsedRegs0 band (bnot IndexBit), stream = Stream1, - regs = Regs1 + regs = jit_regs:free_reg(Regs1, IndexBit) }; move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, {free, IndexReg}, {y_reg, Y} ) when ?IS_GPR(IndexReg) -> + AvailableRegs0 = jit_regs:available_regs(Regs0), Temp = first_avail(AvailableRegs0), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:shlq(3, IndexReg), @@ -1899,10 +1831,8 @@ move_array_element( Regs2 = jit_regs:invalidate_reg(Regs1, Temp), Regs3 = jit_regs:invalidate_reg(Regs2, IndexReg), State#state{ - available_regs = AvailableRegs0 bor IndexBit, - used_regs = UsedRegs0 band (bnot IndexBit), stream = Stream1, - regs = Regs3 + regs = jit_regs:free_reg(Regs3, IndexBit) }. %%----------------------------------------------------------------------------- @@ -1936,13 +1866,12 @@ get_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, Index ) -> + Avail = jit_regs:available_regs(Regs0), ElemReg = first_avail(Avail), Bit = reg_bit(ElemReg), I1 = jit_x86_64_asm:movq({Index * 8, Reg}, ElemReg), @@ -1951,9 +1880,7 @@ get_array_element( { State#state{ stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = UsedRegs0 bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, ElemReg }. @@ -1975,12 +1902,13 @@ get_array_element( Index :: non_neg_integer() ) -> state(). move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {x_reg, X}, Reg, Index ) when X < ?MAX_REG andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), @@ -1988,12 +1916,13 @@ move_to_array_element( Regs1 = jit_regs:set_contents(Regs0, Temp, {x_reg, X}), State#state{stream = Stream1, regs = Regs1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {ptr, Source}, Reg, Index ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq({0, Source}, Temp), I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), @@ -2001,12 +1930,13 @@ move_to_array_element( Regs1 = jit_regs:invalidate_reg(Regs0, Temp), State#state{stream = Stream1, regs = Regs1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {y_reg, Y}, Reg, Index ) when ?IS_GPR(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), @@ -2028,12 +1958,13 @@ move_to_array_element( Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Source, Reg, Index ) when is_integer(Source) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(Source, Temp), I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), @@ -2061,13 +1992,14 @@ move_to_array_element( ) when is_integer(Index) andalso is_integer(Offset) -> move_to_array_element(State, Source, BaseReg, Index + Offset); move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {x_reg, X}, BaseReg, IndexReg, Offset ) when X < ?MAX_REG andalso ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), I2 = jit_x86_64_asm:movq(Temp, {Offset * ?WORD_SIZE, BaseReg, IndexReg, 8}), @@ -2075,13 +2007,14 @@ move_to_array_element( Regs1 = jit_regs:set_contents(Regs0, Temp, {x_reg, X}), State#state{stream = Stream1, regs = Regs1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {y_reg, Y}, BaseReg, IndexReg, Offset ) when ?IS_GPR(BaseReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), @@ -2123,16 +2056,17 @@ move_to_native_register(#state{regs = Regs} = State, Value) -> case Contents =/= unknown andalso jit_regs:find_reg_with_contents(Regs, Contents) of {ok, CachedReg} -> Bit = reg_bit(CachedReg), - case State#state.used_regs band Bit of + Used = jit_regs:used_regs(Regs), + case Used band Bit of 0 -> - case State#state.available_regs band Bit of + Avail = jit_regs:available_regs(Regs), + case Avail band Bit of 0 -> move_to_native_register_emit(State, Value, Contents); _ -> { State#state{ - used_regs = State#state.used_regs bor Bit, - available_regs = State#state.available_regs band (bnot Bit) + regs = jit_regs:alloc_reg(Regs, Bit) }, CachedReg } @@ -2148,13 +2082,12 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, cp, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), I1 = jit_x86_64_asm:movq(?CP, Reg), @@ -2163,9 +2096,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor Bit, - available_regs = Avail band (bnot Bit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }; @@ -2183,8 +2114,6 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, Imm, @@ -2192,6 +2121,7 @@ move_to_native_register_emit( ) when is_integer(Imm) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), I1 = @@ -2206,9 +2136,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor Bit, - available_regs = Avail band (bnot Bit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }; @@ -2216,13 +2144,12 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {x_reg, extra}, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), I1 = jit_x86_64_asm:movq(?X_REG(?MAX_REG), Reg), @@ -2231,9 +2158,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor Bit, - available_regs = Avail band (bnot Bit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }; @@ -2241,8 +2166,6 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {x_reg, X}, @@ -2250,6 +2173,7 @@ move_to_native_register_emit( ) when X < ?MAX_REG -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), I1 = jit_x86_64_asm:movq(?X_REG(X), Reg), @@ -2258,9 +2182,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor Bit, - available_regs = Avail band (bnot Bit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }; @@ -2268,13 +2190,12 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {y_reg, Y}, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), I1 = jit_x86_64_asm:movq(?Y_REGS, Reg), @@ -2285,9 +2206,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = Used bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }. @@ -2321,12 +2240,11 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, Reg ) when is_atom(Reg) -> + Avail = jit_regs:available_regs(Regs0), SaveReg = first_avail(Avail), Bit = reg_bit(SaveReg), I1 = jit_x86_64_asm:movq(Reg, SaveReg), @@ -2336,9 +2254,7 @@ copy_to_native_register( { State#state{ stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = Used bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, SaveReg }; @@ -2346,12 +2262,11 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {ptr, Reg} ) when is_atom(Reg) -> + Avail = jit_regs:available_regs(Regs0), SaveReg = first_avail(Avail), Bit = reg_bit(SaveReg), I1 = jit_x86_64_asm:movq({0, Reg}, SaveReg), @@ -2360,9 +2275,7 @@ copy_to_native_register( { State#state{ stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = Used bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, SaveReg }; @@ -2370,10 +2283,11 @@ copy_to_native_register(State, Reg) -> move_to_native_register(State, Reg). move_to_cp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {y_reg, Y} ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Reg), I2 = jit_x86_64_asm:movq({Y * 8, Reg}, Reg), @@ -2384,10 +2298,11 @@ move_to_cp( State#state{stream = Stream1, regs = Regs1}. increment_sp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Offset ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), I1 = jit_x86_64_asm:movq(?Y_REGS, Reg), I2 = jit_x86_64_asm:addq(Offset * 8, Reg), @@ -2401,13 +2316,13 @@ set_continuation_to_label( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, branches = Branches, labels = Labels, regs = Regs0 } = State, Label ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Offset = StreamModule:offset(Stream0), Regs1 = jit_regs:invalidate_reg(Regs0, Temp), @@ -2440,11 +2355,11 @@ set_continuation_to_offset( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, branches = Branches, regs = Regs0 } = State ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), @@ -2473,11 +2388,10 @@ get_module_index( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UsedRegs0, regs = Regs0 } = State ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), Bit = reg_bit(Reg), I1 = jit_x86_64_asm:movq(?JITSTATE_MODULE, Reg), @@ -2488,9 +2402,7 @@ get_module_index( { State#state{ stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = UsedRegs0 bor Bit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, Bit) }, Reg }. @@ -2507,13 +2419,14 @@ and_( Regs1 = jit_regs:invalidate_reg(Regs0, Reg), {State#state{stream = Stream1, regs = Regs1}, Reg}; and_( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {free, Reg}, Val ) when ?IS_GPR(Reg), is_integer(Val), Val < -16#80 orelse Val > 16#FFFFFFFF -> + Avail = jit_regs:available_regs(Regs0), TempReg = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(Val, TempReg), I2 = jit_x86_64_asm:andq(TempReg, Reg), @@ -2538,8 +2451,6 @@ and_( and_( #state{ stream_module = StreamModule, - available_regs = Avail, - used_regs = UR, stream = Stream0, regs = Regs0 } = State, @@ -2548,26 +2459,18 @@ and_( ) when ?IS_GPR(Reg), is_integer(Val), Val < -16#80 orelse Val > 16#FFFFFFFF -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), Bit = reg_bit(ResultReg), I1 = jit_x86_64_asm:movabsq(Val, ResultReg), I2 = jit_x86_64_asm:andq(Reg, ResultReg), Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_reg(Regs0, ResultReg), - { - State#state{ - stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = UR bor Bit, - regs = Regs1 - }, - ResultReg - }; + Regs2 = jit_regs:alloc_reg(Regs1, Bit), + {State#state{stream = Stream1, regs = Regs2}, ResultReg}; and_( #state{ stream_module = StreamModule, - available_regs = Avail, - used_regs = UR, stream = Stream0, regs = Regs0 } = State, @@ -2576,6 +2479,7 @@ and_( ) when ?IS_GPR(Reg) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), Bit = reg_bit(ResultReg), I1 = jit_x86_64_asm:movq(Reg, ResultReg), @@ -2586,15 +2490,8 @@ and_( end, Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_reg(Regs0, ResultReg), - { - State#state{ - stream = Stream1, - available_regs = Avail band (bnot Bit), - used_regs = UR bor Bit, - regs = Regs1 - }, - ResultReg - }. + Regs2 = jit_regs:alloc_reg(Regs1, Bit), + {State#state{stream = Stream1, regs = Regs2}, ResultReg}. or_(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, SrcReg) when is_atom(SrcReg) @@ -2604,11 +2501,11 @@ or_(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State#state{stream = Stream1, regs = Regs1}; or_( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Val ) when is_integer(Val), Val < -16#80000000 orelse Val > 16#7FFFFFFF -> + Avail = jit_regs:available_regs(Regs0), TempReg = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(Val, TempReg), I2 = jit_x86_64_asm:orq(TempReg, Reg), @@ -2629,11 +2526,11 @@ xor_(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = Stat Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State#state{stream = Stream1, regs = Regs1}; xor_( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Val ) when is_integer(Val), Val < -16#80000000 orelse Val > 16#7FFFFFFF -> + Avail = jit_regs:available_regs(Regs0), TempReg = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(Val, TempReg), I2 = jit_x86_64_asm:xorq(TempReg, Reg), @@ -2650,12 +2547,12 @@ add( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, regs = Regs0 } = State, Reg, Val ) when is_integer(Val), Val < -16#80000000 orelse Val > 16#7FFFFFFF -> + Avail = jit_regs:available_regs(Regs0), TempReg = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(Val, TempReg), I2 = jit_x86_64_asm:addq(TempReg, Reg), @@ -2672,12 +2569,12 @@ sub( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, regs = Regs0 } = State, Reg, Val ) when is_integer(Val), Val < -16#80000000 orelse Val > 16#7FFFFFFF -> + Avail = jit_regs:available_regs(Regs0), TempReg = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(Val, TempReg), I2 = jit_x86_64_asm:subq(TempReg, Reg), @@ -2707,11 +2604,12 @@ mul(State, Reg, 64) -> shift_left(State, Reg, 6); mul( #state{ - stream_module = StreamModule, stream = Stream0, regs = Regs0, available_regs = Avail + stream_module = StreamModule, stream = Stream0, regs = Regs0 } = State, Reg, Val ) when is_integer(Val), (Val < -16#80000000 orelse Val > 16#7FFFFFFF) -> + Avail = jit_regs:available_regs(Regs0), TempReg = first_avail(Avail), I1 = jit_x86_64_asm:movabsq(Val, TempReg), I2 = jit_x86_64_asm:imulq(TempReg, Reg), @@ -2738,11 +2636,11 @@ mul( %% rdx is the native interface pointer and must be saved/restored. -spec div_(state(), x86_64_register(), x86_64_register()) -> {state(), rax}. div_( - #state{stream_module = StreamModule, stream = Stream0, regs = Regs0, available_regs = Avail} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DividendReg, DivisorReg ) -> + Avail = jit_regs:available_regs(Regs0), %% DivisorReg must not be rax (clobbered by dividend move) or rdx (clobbered by cqo). %% If DivisorReg is rax, move it to a temp register first. {I0, ActualDivisor, Regs1} = @@ -2775,11 +2673,11 @@ div_( %% rdx is the native interface pointer and must be saved/restored. -spec rem_(state(), x86_64_register(), x86_64_register()) -> {state(), x86_64_register()}. rem_( - #state{stream_module = StreamModule, stream = Stream0, regs = Regs0, available_regs = Avail} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DividendReg, DivisorReg ) -> + Avail = jit_regs:available_regs(Regs0), %% We need a temp register to save the remainder (rdx) before restoring rdx. %% This temp must not be rax (quotient) or the DivisorReg. RemTemp = first_avail( @@ -2816,21 +2714,14 @@ rem_( RemBit = reg_bit(RemTemp), Regs2 = jit_regs:invalidate_reg(Regs1, rax), Regs3 = jit_regs:invalidate_reg(Regs2, RemTemp), - { - State#state{ - stream = Stream1, - regs = Regs3, - available_regs = Avail band (bnot RemBit), - used_regs = State#state.used_regs bor RemBit - }, - RemTemp - }. + Regs4 = jit_regs:alloc_reg(Regs3, RemBit), + {State#state{stream = Stream1, regs = Regs4}, RemTemp}. -spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). decrement_reductions_and_maybe_schedule_next( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State0 + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0 ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Regs1 = jit_regs:invalidate_reg(Regs0, Temp), Offset = StreamModule:offset(Stream0), @@ -2853,9 +2744,9 @@ decrement_reductions_and_maybe_schedule_next( (NewOffset - Offset - byte_size(I1) - byte_size(I2) - byte_size(I3)):32/little >> ), - State3 = merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs), - %% schedule_next clobbers caller-saved regs; invalidate cache at continuation. - State3#state{regs = jit_regs:invalidate_all(State1#state.regs)}. + %% schedule_next clobbers caller-saved regs; restore the pre-call masks + %% (preserved by invalidate_all) and drop any cached contents. + State2#state{stream = Stream4, regs = jit_regs:invalidate_all(State1#state.regs)}. -spec call_or_schedule_next(state(), non_neg_integer()) -> state(). call_or_schedule_next(State0, Label) -> @@ -2989,59 +2880,6 @@ reg_bit(r9) -> ?REG_BIT_R9; reg_bit(r10) -> ?REG_BIT_R10; reg_bit(r11) -> ?REG_BIT_R11. -first_avail(Mask) when Mask band ?REG_BIT_RAX =/= 0 -> rax; -first_avail(Mask) when Mask band ?REG_BIT_R11 =/= 0 -> r11; -first_avail(Mask) when Mask band ?REG_BIT_R10 =/= 0 -> r10; -first_avail(Mask) when Mask band ?REG_BIT_R9 =/= 0 -> r9; -first_avail(Mask) when Mask band ?REG_BIT_R8 =/= 0 -> r8; -first_avail(Mask) when Mask band ?REG_BIT_RCX =/= 0 -> rcx. - -%% Convert bitmask to list in reverse allocation order -%% Iteration order: rcx, r8, r9, r10, r11, rax -mask_to_list(0) -> []; -mask_to_list(Mask) -> mask_to_list_rcx(Mask). - -mask_to_list_rcx(Mask) when Mask band ?REG_BIT_RCX =/= 0 -> [rcx | mask_to_list_r8(Mask)]; -mask_to_list_rcx(Mask) -> mask_to_list_r8(Mask). - -mask_to_list_r8(Mask) when Mask band ?REG_BIT_R8 =/= 0 -> [r8 | mask_to_list_r9(Mask)]; -mask_to_list_r8(Mask) -> mask_to_list_r9(Mask). - -mask_to_list_r9(Mask) when Mask band ?REG_BIT_R9 =/= 0 -> [r9 | mask_to_list_r10(Mask)]; -mask_to_list_r9(Mask) -> mask_to_list_r10(Mask). - -mask_to_list_r10(Mask) when Mask band ?REG_BIT_R10 =/= 0 -> [r10 | mask_to_list_r11(Mask)]; -mask_to_list_r10(Mask) -> mask_to_list_r11(Mask). - -mask_to_list_r11(Mask) when Mask band ?REG_BIT_R11 =/= 0 -> [r11 | mask_to_list_rax(Mask)]; -mask_to_list_r11(Mask) -> mask_to_list_rax(Mask). - -mask_to_list_rax(Mask) when Mask band ?REG_BIT_RAX =/= 0 -> [rax]; -mask_to_list_rax(_Mask) -> []. - -args_regs(Args) -> - lists:map( - fun - ({free, {ptr, Reg}}) -> Reg; - ({free, Reg}) when is_atom(Reg) -> Reg; - ({free, Imm}) when is_integer(Imm) -> imm; - (offset) -> imm; - (ctx) -> ?CTX_REG; - (jit_state) -> ?JITSTATE_REG; - (Reg) when is_atom(Reg) -> Reg; - (Imm) when is_integer(Imm) -> imm; - ({ptr, Reg}) -> Reg; - ({x_reg, _}) -> ?CTX_REG; - ({y_reg, _}) -> ?CTX_REG; - ({fp_reg, _}) -> ?CTX_REG; - ({free, {x_reg, _}}) -> ?CTX_REG; - ({free, {y_reg, _}}) -> ?CTX_REG; - ({free, {fp_reg, _}}) -> ?CTX_REG; - ({avm_int64_t, _}) -> imm - end, - Args - ). - %%----------------------------------------------------------------------------- %% @doc Add a label at the current offset %% @end diff --git a/libs/jit/src/jit_xtensa.erl b/libs/jit/src/jit_xtensa.erl index 5ece009636..ec3e5c2d98 100644 --- a/libs/jit/src/jit_xtensa.erl +++ b/libs/jit/src/jit_xtensa.erl @@ -166,8 +166,6 @@ offset :: non_neg_integer(), branches :: #{integer() | reference() => [{non_neg_integer(), non_neg_integer()}]}, jump_table_start :: non_neg_integer(), - available_regs :: non_neg_integer(), - used_regs :: non_neg_integer(), labels :: #{integer() | reference() => integer()}, variant :: non_neg_integer(), regs :: jit_regs:regs() @@ -313,11 +311,9 @@ new(Variant, StreamModule, Stream) -> branches = #{}, jump_table_start = 0, offset = StreamModule:offset(Stream), - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, labels = #{}, variant = Variant, - regs = jit_regs:new() + regs = jit_regs:new(?AVAILABLE_REGS_MASK, 0) }. %%----------------------------------------------------------------------------- @@ -363,67 +359,15 @@ debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> Stream1 = StreamModule:append(Stream0, jit_xtensa_asm:break(1, 15)), State#state{stream = Stream1}. -%%----------------------------------------------------------------------------- -%% @doc Return the list of currently used native registers. This is used for -%% debugging and not in production. -%% @end -%% @param State current backend state -%% @return The list of used registers -%%----------------------------------------------------------------------------- --spec used_regs(state()) -> [xtensa_register()]. -used_regs(#state{used_regs = Used}) -> mask_to_list(Used). - -%%----------------------------------------------------------------------------- -%% @doc Return the list of currently available native scratch registers. This -%% is used for debugging and not in production. -%% @end -%% @param State current backend state -%% @return The list of available registers -%%----------------------------------------------------------------------------- --spec available_regs(state()) -> [xtensa_register()]. -available_regs(#state{available_regs = Available}) -> mask_to_list(Available). - -%%----------------------------------------------------------------------------- -%% @doc Free native registers. The passed list of registers can contain -%% registers, pointer to registers or other values that are ignored. -%% @end -%% @param State current backend state -%% @param Regs list of registers or other values -%% @return The updated backend state -%%----------------------------------------------------------------------------- --spec free_native_registers(state(), [value()]) -> state(). -free_native_registers(State, []) -> - State; -free_native_registers(State, [Reg | Rest]) -> - State1 = free_native_register(State, Reg), - free_native_registers(State1, Rest). - --spec free_native_register(state(), value()) -> state(). -free_native_register( - #state{available_regs = Available0, used_regs = Used0} = State, - Reg -) when is_atom(Reg) -> - Bit = reg_bit(Reg), - State#state{ - available_regs = Available0 bor Bit, used_regs = Used0 band (bnot Bit) - }; -free_native_register(State, {ptr, Reg}) -> - free_native_register(State, Reg); -free_native_register(State, _Other) -> - State. - -%%----------------------------------------------------------------------------- -%% @doc Assert that all native scratch registers are available. This is used -%% for debugging and not in production. -%% @end -%% @param State current backend state -%% @return ok -%%----------------------------------------------------------------------------- --spec assert_all_native_free(state()) -> ok. -assert_all_native_free(State) -> - 0 = State#state.used_regs, - ?AVAILABLE_REGS_MASK = State#state.available_regs, - ok. +%% Native-register allocation bookkeeping (used_regs/1, available_regs/1, +%% free_native_registers/2, free_native_register/2, assert_all_native_free/1, +%% first_avail/1, mask_to_list/1, args_regs/1, prepare_call_scratch/1) is shared +%% across the register-based backends and flows through jit_regs. +%% High registers (a15-a9) are clobbered by CALLX8 so prefer them to minimize saves. +-define(FIRST_AVAIL_REGS, [a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5]). +-define(MASK_TO_LIST_REGS, [a15, a14, a13, a12, a11, a10, a9, a8, a7, a6, a5, a4, a3, a2, a1, a0]). +-define(JITSTATE_ARG_REG, jit_state). +-include("jit_backend_regs_impl.hrl"). %%----------------------------------------------------------------------------- %% @doc Emit the jump table at the beginning of the module. Branches will be @@ -619,28 +563,26 @@ call_primitive( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, - used_regs = Used + regs = Regs0 } = State, Primitive, Args -) when Available =/= 0 -> - TempReg = first_avail(Available), - TempBit = reg_bit(TempReg), - PrepCall = load_primitive_ptr(Primitive, TempReg), - Stream1 = StreamModule:append(Stream0, PrepCall), - StateCall = State#state{ - stream = Stream1, - available_regs = Available band (bnot TempBit), - used_regs = Used bor TempBit - }, - call_func_ptr(StateCall, {free, TempReg}, Args); -call_primitive( - #state{available_regs = 0} = State, - Primitive, - Args ) -> - call_func_ptr(State, {primitive, Primitive}, Args). + Available = jit_regs:available_regs(Regs0), + case Available of + 0 -> + call_func_ptr(State, {primitive, Primitive}, Args); + _ -> + TempReg = first_avail(Available), + TempBit = reg_bit(TempReg), + PrepCall = load_primitive_ptr(Primitive, TempReg), + Stream1 = StreamModule:append(Stream0, PrepCall), + StateCall = State#state{ + stream = Stream1, + regs = jit_regs:alloc_reg(Regs0, TempBit) + }, + call_func_ptr(StateCall, {free, TempReg}, Args) + end. %%----------------------------------------------------------------------------- %% @doc Emit a jump (call without return) to a primitive with arguments. This @@ -675,20 +617,14 @@ call_primitive_last( ) -> %% Xtensa windowed ABI: CALLX8 to the primitive, move its return value %% from a10 (our view of callee's a2) into our a2, then RETW to C. - ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), - ArgsRegs = args_regs(Args), - ArgsRegsMask = jit_regs:regs_to_mask(ArgsRegs, fun reg_bit/1), - ParamMask = jit_regs:regs_to_mask(ParamRegs, fun reg_bit/1), - ScratchMask = ?AVAILABLE_REGS_MASK band (bnot (ArgsRegsMask bor ParamMask)), - Temp = first_avail(ScratchMask), - TempBit = reg_bit(Temp), - AvailableRegs1 = ScratchMask band (bnot TempBit), - UsedMask = ?AVAILABLE_REGS_MASK band (bnot AvailableRegs1), + #{temp := Temp, available_mask := AvailableRegs1, used_mask := UsedMask} = + prepare_call_scratch(Args), PrepCall = load_primitive_ptr(Primitive, Temp), Stream1 = StreamModule:append(Stream0, PrepCall), State1 = State0#state{ - stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedMask + stream = Stream1, + regs = jit_regs:set_masks(State0#state.regs, AvailableRegs1, UsedMask) }, Args1 = lists:map( @@ -712,9 +648,9 @@ call_primitive_last( Stream3 = StreamModule:append(Stream2, <>), State2#state{ stream = Stream3, - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, - regs = jit_regs:unreachable(State2#state.regs) + regs = jit_regs:set_masks( + jit_regs:unreachable(State2#state.regs), ?AVAILABLE_REGS_MASK, 0 + ) } end. @@ -731,8 +667,7 @@ return_if_not_equal_to_ctx( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0 + regs = Regs0 } = State, {free, Reg} ) -> @@ -753,9 +688,7 @@ return_if_not_equal_to_ctx( RegBit = reg_bit(Reg), State#state{ stream = Stream1, - available_regs = AvailableRegs0 bor RegBit, - used_regs = UsedRegs0 band (bnot RegBit), - regs = State#state.regs + regs = jit_regs:free_reg(Regs0, RegBit) }. %%----------------------------------------------------------------------------- @@ -800,11 +733,12 @@ jump_to_continuation( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Available, + regs = Regs0, offset = BaseOffset } = State0, {free, OffsetReg} ) -> + Available = jit_regs:available_regs(Regs0), Temp = first_avail(Available), % Calculate absolute address: native_code_base + target_offset % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) @@ -825,9 +759,9 @@ jump_to_continuation( % Free all registers since this is a tail jump State0#state{ stream = Stream1, - available_regs = ?AVAILABLE_REGS_MASK, - used_regs = 0, - regs = jit_regs:unreachable(State0#state.regs) + regs = jit_regs:set_masks( + jit_regs:unreachable(Regs0), ?AVAILABLE_REGS_MASK, 0 + ) }. branch_to_offset_code(State, Offset, TargetOffset) -> @@ -840,7 +774,8 @@ branch_to_offset_code(State, Offset, TargetOffset) -> %% Far jump: use code_base + code-relative offset via indirect jump. %% Need two scratch regs: A8_REG (a8) for code_base/target, and one %% available register for the offset immediate. - #state{jump_table_start = JumpTableStart, available_regs = Avail} = State, + #state{jump_table_start = JumpTableStart, regs = RegsBR} = State, + Avail = jit_regs:available_regs(RegsBR), CodeRelativeTarget = TargetOffset - JumpTableStart, Temp = first_avail(Avail), I1 = jit_xtensa_asm:l32i(?A8_REG, ?JITSTATE_REG, ?JITSTATE_CODE_BASE_OFFSET), @@ -854,11 +789,12 @@ branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), {State, CodeBlock}; branch_to_label_code( - #state{branches = Branches, available_regs = Avail, jump_table_start = JTS} = State0, + #state{branches = Branches, regs = RegsBL, jump_table_start = JTS} = State0, Offset, Label, false ) -> + Avail = jit_regs:available_regs(RegsBL), %% Reserve 24 bytes for forward branch placeholder. %% Near targets will use J + NOPs, far targets use indirect jump. %% Pick a temp register now and encode it in the relocation. @@ -904,9 +840,10 @@ if_block( Stream2, Replacements ), - State3 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State3#state{regs = MergedRegs}; + MergedRegs = jit_regs:merge( + State1#state.regs, State2#state.regs, ?AVAILABLE_REGS_MASK + ), + State2#state{stream = Stream3, regs = MergedRegs}; if_block( #state{stream_module = StreamModule, stream = Stream0} = State0, Cond, @@ -922,9 +859,10 @@ if_block( JumpRel = OffsetAfter - JumpOffset - 4, NewJumpInstr = jit_xtensa_asm:j(JumpRel), Stream3 = StreamModule:replace(Stream2, JumpOffset, NewJumpInstr), - State3 = merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs), - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State3#state{regs = MergedRegs}. + MergedRegs = jit_regs:merge( + State1#state.regs, State2#state.regs, ?AVAILABLE_REGS_MASK + ), + State2#state{stream = Stream3, regs = MergedRegs}. %%----------------------------------------------------------------------------- %% @doc Emit an if else block, i.e. emit a test of a condition and @@ -963,8 +901,11 @@ if_else_block( %% Build the else block StateElse = State2#state{ stream = Stream4, - used_regs = State1#state.used_regs, - available_regs = State1#state.available_regs + regs = jit_regs:set_masks( + State2#state.regs, + jit_regs:available_regs(State1#state.regs), + jit_regs:used_regs(State1#state.regs) + ) }, State3 = BlockFalseFn(StateElse), Stream5 = State3#state.stream, @@ -974,9 +915,10 @@ if_else_block( NewElseJumpInstr = jit_xtensa_asm:j(FinalJumpOffset), 3 = byte_size(NewElseJumpInstr), Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), - State4 = merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs), - MergedRegs = jit_regs:merge(State2#state.regs, State3#state.regs), - State4#state{regs = MergedRegs}. + MergedRegs = jit_regs:merge( + State2#state.regs, State3#state.regs, ?AVAILABLE_REGS_MASK + ), + State3#state{stream = Stream6, regs = MergedRegs}. -spec if_block_cond(state(), condition()) -> {state(), non_neg_integer()}. @@ -1021,10 +963,11 @@ if_block_cond( RegOrTuple -> RegOrTuple end, %% Xtensa: load Val, blt Reg, Temp, +2; J placeholder + Available0 = jit_regs:available_regs(State0#state.regs), Temp = - case State0#state.available_regs of + case Available0 of 0 -> ?A8_REG; - _ -> first_avail(State0#state.available_regs) + _ -> first_avail(Available0) end, OffsetBefore = StreamModule:offset(Stream0), State1 = mov_immediate(State0, Temp, Val), @@ -1038,9 +981,10 @@ if_block_cond( JumpDelta = MovSize + byte_size(I1), {State3, JumpDelta}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '<', Val} ) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), Temp = case Available of 0 -> ?A8_REG; @@ -1064,9 +1008,10 @@ if_block_cond( JumpDelta = MovSize + byte_size(I1), {State3, JumpDelta}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {Val, '<', RegOrTuple} ) when is_integer(Val), Val >= 0, Val =< 255 -> + Available = jit_regs:available_regs(Regs0), Temp = case Available of 0 -> ?A8_REG; @@ -1090,9 +1035,10 @@ if_block_cond( JumpDelta = MovSize + byte_size(I1), {State3, JumpDelta}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {Val, '<', RegOrTuple} ) when is_integer(Val) -> + Available = jit_regs:available_regs(Regs0), Temp = case Available of 0 -> ?A8_REG; @@ -1199,9 +1145,10 @@ if_block_cond( State2 = State1#state{stream = Stream1}, {State2, byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '!=', Val} ) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Available = jit_regs:available_regs(Regs0), Temp = case Available of 0 -> ?A8_REG; @@ -1260,9 +1207,10 @@ if_block_cond( State2 = State1#state{stream = Stream1}, {State2, byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '==', Val} ) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -1298,9 +1246,10 @@ if_block_cond( State3 = if_block_free_reg({free, RegB}, State2), {State3, byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '==', Val} ) when is_integer(Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -1324,9 +1273,10 @@ if_block_cond( JumpDelta = MovSize + byte_size(I1), {State3, JumpDelta}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, {RegOrTuple, '!=', Val} ) when is_integer(Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -1395,11 +1345,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, regs = Regs0 } = State0, {RegOrTuple, '&', Val, '!=', 0} ) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -1430,11 +1380,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, regs = Regs0 } = State0, {Reg, '&', 16#F, '!=', 16#F} ) when ?IS_GPR(Reg) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -1473,10 +1423,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail + regs = Regs0 } = State0, {Reg, '&', Mask, '!=', Val} ) when ?IS_GPR(Reg), Val =/= 0 -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -1487,7 +1438,11 @@ if_block_cond( I1 = jit_xtensa_asm:mov(Temp, Reg), Stream1 = StreamModule:append(Stream0, I1), State1 = State0#state{stream = Stream1}, - {State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask), + {State2, Temp} = and_( + State1#state{regs = jit_regs:set_available_regs(State1#state.regs, AT)}, + {free, Temp}, + Mask + ), Stream2 = State2#state.stream, case Val of _ when ?IS_GPR(Val) -> @@ -1496,7 +1451,11 @@ if_block_cond( BranchDelta = StreamModule:offset(Stream2) - OffsetBefore + byte_size(I_beq), Stream3 = StreamModule:append(Stream2, <>), State3 = State2#state{ - stream = Stream3, available_regs = State2#state.available_regs bor reg_bit(Temp) + stream = Stream3, + regs = jit_regs:set_available_regs( + State2#state.regs, + jit_regs:available_regs(State2#state.regs) bor reg_bit(Temp) + ) }, {State3, BranchDelta}; _ -> @@ -1507,7 +1466,11 @@ if_block_cond( _ -> first_avail(AT) end, AT2 = AT band (bnot reg_bit(MaskReg)), - State3 = mov_immediate(State2#state{available_regs = AT2}, MaskReg, Val), + State3 = mov_immediate( + State2#state{regs = jit_regs:set_available_regs(State2#state.regs, AT2)}, + MaskReg, + Val + ), Stream3 = State3#state.stream, I_beq2 = jit_xtensa_asm:bne(Temp, MaskReg, 2), JPlaceholder3 = <<16#FF, 16#FF, 16#FF>>, @@ -1515,7 +1478,11 @@ if_block_cond( Stream4 = StreamModule:append(Stream3, <>), State4 = State3#state{ stream = Stream4, - available_regs = State3#state.available_regs bor reg_bit(Temp) bor reg_bit(MaskReg) + regs = jit_regs:set_available_regs( + State3#state.regs, + jit_regs:available_regs(State3#state.regs) bor reg_bit(Temp) bor + reg_bit(MaskReg) + ) }, {State4, BranchDelta} end; @@ -1523,10 +1490,11 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailRegs + regs = Regs0 } = State0, {{free, Reg} = RegTuple, '&', Mask, '!=', Val} ) when ?IS_GPR(Reg), Val =/= 0 -> + AvailRegs = jit_regs:available_regs(Regs0), OffsetBefore = StreamModule:offset(Stream0), {State1, Reg} = and_(State0, RegTuple, Mask), Stream1 = State1#state.stream, @@ -1541,38 +1509,35 @@ if_block_cond( {State3, BranchDelta}; _ -> %% Val is an immediate - need temp register - MaskReg = first_avail(State1#state.available_regs), - AT = State1#state.available_regs band (bnot reg_bit(MaskReg)), - State2 = mov_immediate(State1#state{available_regs = AT}, MaskReg, Val), + State1Avail = jit_regs:available_regs(State1#state.regs), + MaskReg = first_avail(State1Avail), + AT = State1Avail band (bnot reg_bit(MaskReg)), + State2 = mov_immediate( + State1#state{regs = jit_regs:set_available_regs(State1#state.regs, AT)}, + MaskReg, + Val + ), Stream2 = State2#state.stream, I_beq4 = jit_xtensa_asm:bne(Reg, MaskReg, 2), JPlaceholder6 = <<16#FF, 16#FF, 16#FF>>, BranchDelta = StreamModule:offset(Stream2) - OffsetBefore + byte_size(I_beq4), Stream3 = StreamModule:append(Stream2, <>), - State3 = State2#state{stream = Stream3, available_regs = AvailRegs}, + State3 = State2#state{ + stream = Stream3, + regs = jit_regs:set_available_regs(State2#state.regs, AvailRegs) + }, State4 = if_block_free_reg(RegTuple, State3), {State4, BranchDelta} end. -spec if_block_free_reg(xtensa_register() | {free, xtensa_register()}, state()) -> state(). if_block_free_reg({free, Reg}, State0) -> - #state{available_regs = AvR0, used_regs = UR0} = State0, + #state{regs = Regs0} = State0, Bit = reg_bit(Reg), - AvR1 = AvR0 bor Bit, - UR1 = UR0 band (bnot Bit), - State0#state{ - available_regs = AvR1, - used_regs = UR1 - }; + State0#state{regs = jit_regs:free_reg(Regs0, Bit)}; if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> State0. --spec merge_used_regs(state(), non_neg_integer()) -> state(). -merge_used_regs(#state{used_regs = UR} = State, OtherUR) -> - MergedUR = UR bor OtherUR, - MergedAvail = ?AVAILABLE_REGS_MASK band (bnot MergedUR), - State#state{used_regs = MergedUR, available_regs = MergedAvail}. - %%----------------------------------------------------------------------------- %% @doc Emit a shift register right by a fixed number of bits, effectively %% dividing it by 2^Shift @@ -1593,13 +1558,13 @@ shift_right( Regs1 = jit_regs:invalidate_reg(Regs0, Reg), {State#state{stream = Stream1, regs = Regs1}, Reg}; shift_right( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {free, Reg}, Shift ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -1615,8 +1580,6 @@ shift_right( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UR, regs = Regs0 } = State, Reg, @@ -1624,6 +1587,7 @@ shift_right( ) when ?IS_GPR(Reg) andalso is_integer(Shift) andalso Shift =< 15 -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), ResultBit = reg_bit(ResultReg), I = jit_xtensa_asm:srli(ResultReg, Reg, Shift), @@ -1632,9 +1596,7 @@ shift_right( { State#state{ stream = Stream1, - available_regs = Avail band (bnot ResultBit), - used_regs = UR bor ResultBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, ResultBit) }, ResultReg }; @@ -1642,8 +1604,6 @@ shift_right( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UR, regs = Regs0 } = State, Reg, @@ -1651,6 +1611,7 @@ shift_right( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), ResultBit = reg_bit(ResultReg), I1 = jit_xtensa_asm:movi(ResultReg, Shift), @@ -1661,9 +1622,7 @@ shift_right( { State#state{ stream = Stream1, - available_regs = Avail band (bnot ResultBit), - used_regs = UR bor ResultBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, ResultBit) }, ResultReg }. @@ -1701,8 +1660,6 @@ shift_right_arith( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UR, regs = Regs0 } = State, Reg, @@ -1710,6 +1667,7 @@ shift_right_arith( ) when ?IS_GPR(Reg) andalso is_integer(Shift) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = first_avail(Avail), ResultBit = reg_bit(ResultReg), I = jit_xtensa_asm:srai(ResultReg, Reg, Shift), @@ -1718,9 +1676,7 @@ shift_right_arith( { State#state{ stream = Stream1, - available_regs = Avail band (bnot ResultBit), - used_regs = UR bor ResultBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, ResultBit) }, ResultReg }. @@ -1760,12 +1716,13 @@ call_func_ptr( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0Mask, - used_regs = UsedRegs0Mask + regs = Regs0 } = State0, FuncPtrTuple, Args ) -> + AvailableRegs0Mask = jit_regs:available_regs(Regs0), + UsedRegs0Mask = jit_regs:used_regs(Regs0), AvailableRegs0 = mask_to_list(AvailableRegs0Mask), UsedRegs0 = mask_to_list(UsedRegs0Mask), FreeRegs = lists:flatmap( @@ -1820,9 +1777,12 @@ call_func_ptr( %% safe to reuse as scratch among the currently-used registers. SetArgsRegsOnlyAvailableArgs = (RegsToSave -- RegArgsRegs) ++ AvailableRegs0, State1 = State0#state{ - available_regs = jit_regs:regs_to_mask(SetArgsRegsOnlyAvailableArgs, fun reg_bit/1), - used_regs = jit_regs:regs_to_mask( - ?AVAILABLE_REGS -- SetArgsRegsOnlyAvailableArgs, fun reg_bit/1 + regs = jit_regs:set_masks( + Regs0, + jit_regs:regs_to_mask(SetArgsRegsOnlyAvailableArgs, fun reg_bit/1), + jit_regs:regs_to_mask( + ?AVAILABLE_REGS -- SetArgsRegsOnlyAvailableArgs, fun reg_bit/1 + ) ), stream = Stream1 }, @@ -1892,8 +1852,13 @@ call_func_ptr( end, State3 = State1#state{ - available_regs = jit_regs:regs_to_mask(SetArgsAvailableRegs, fun reg_bit/1), - used_regs = jit_regs:regs_to_mask(?AVAILABLE_REGS -- SetArgsAvailableRegs, fun reg_bit/1), + regs = jit_regs:set_masks( + State1#state.regs, + jit_regs:regs_to_mask(SetArgsAvailableRegs, fun reg_bit/1), + jit_regs:regs_to_mask( + ?AVAILABLE_REGS -- SetArgsAvailableRegs, fun reg_bit/1 + ) + ), stream = Stream3 }, @@ -1943,9 +1908,11 @@ call_func_ptr( { State4#state{ stream = Stream8, - available_regs = jit_regs:regs_to_mask(AvailableRegs3, fun reg_bit/1), - used_regs = jit_regs:regs_to_mask(UsedRegs2, fun reg_bit/1), - regs = Regs1 + regs = jit_regs:set_masks( + Regs1, + jit_regs:regs_to_mask(AvailableRegs3, fun reg_bit/1), + jit_regs:regs_to_mask(UsedRegs2, fun reg_bit/1) + ) }, ResultReg }. @@ -1986,11 +1953,12 @@ pop_registers([], _AlignedStackBytes, _StreamModule, Stream0) -> Stream0. set_registers_args( - #state{used_regs = UsedRegsMask} = State0, + #state{regs = Regs0} = State0, Args, ParamRegs, StackOffset ) -> + UsedRegsMask = jit_regs:used_regs(Regs0), UsedRegs = mask_to_list(UsedRegsMask), ArgsRegs = args_regs(Args), AvailableScratchGP = ((?AVAILABLE_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs, @@ -2009,10 +1977,15 @@ set_registers_args( ), State1#state{ stream = Stream1, - available_regs = jit_regs:regs_to_mask( - ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, fun reg_bit/1 - ), - used_regs = jit_regs:regs_to_mask(ParamRegs ++ (NewUsedRegs -- ParamRegs), fun reg_bit/1) + regs = jit_regs:set_masks( + State1#state.regs, + jit_regs:regs_to_mask( + ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, fun reg_bit/1 + ), + jit_regs:regs_to_mask( + ParamRegs ++ (NewUsedRegs -- ParamRegs), fun reg_bit/1 + ) + ) }. parameter_regs(Args) -> @@ -2176,12 +2149,12 @@ set_registers_args1( Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; set_registers_args1( - #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {y_reg, X}, Reg, _StackOffset ) -> + AvailRegs = jit_regs:available_regs(Regs0), Code = ldr_y_reg(Reg, X, AvailRegs), Stream1 = StreamModule:append(Stream0, Code), Regs1 = @@ -2244,10 +2217,11 @@ move_to_vm_register_emit(State0, Src, {ptr, Reg}) when is_atom(Src) -> Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State0#state{stream = Stream1}; move_to_vm_register_emit( - #state{available_regs = Avail, regs = Regs0} = State0, Src, {y_reg, Y} + #state{regs = Regs0} = State0, Src, {y_reg, Y} ) when is_atom(Src) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = case Avail of 0 -> ?A8_REG; @@ -2265,10 +2239,11 @@ move_to_vm_register_emit( State0#state{stream = Stream1, regs = Regs2}; % Source is an integer to y_reg (optimized: ldr first, then movs) move_to_vm_register_emit( - #state{available_regs = Avail, regs = Regs0} = State0, N, {y_reg, Y} + #state{regs = Regs0} = State0, N, {y_reg, Y} ) when is_integer(N), N >= 0, N =< 255 -> + Avail = jit_regs:available_regs(Regs0), Temp1 = case Avail of 0 -> ?A8_REG; @@ -2292,97 +2267,84 @@ move_to_vm_register_emit( end, State0#state{stream = Stream1, regs = Regs1}; % Source is an integer (0-255 for movs, negative values need different handling) -move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when +move_to_vm_register_emit(State0, N, Dest) when is_integer(N), N >= 0, N =< 255 -> - Temp = - case AR0 of - 0 -> ?A8_REG; - _ -> first_avail(AR0) + with_temp( + State0, + fun(StateT, Temp, _AT) -> + I1 = mov_immediate(Temp, N), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, I1), + move_to_vm_register(StateT#state{stream = Stream1}, Temp, Dest) end, - AT = AR0 band (bnot reg_bit(Temp)), - I1 = mov_immediate(Temp, N), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {imm, N}), - State1#state{available_regs = AR0, regs = Regs1}; + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {imm, N}) end + ); %% Handle large values using simple literal pool (branch-over pattern) -move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when +move_to_vm_register_emit(State0, N, Dest) when is_integer(N) -> - Temp = - case AR0 of - 0 -> ?A8_REG; - _ -> first_avail(AR0) + with_temp( + State0, + fun(StateT, Temp, _AT) -> + State1 = mov_immediate(StateT, Temp, N), + move_to_vm_register(State1, Temp, Dest) end, - AT = AR0 band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), - State2 = move_to_vm_register(State1, Temp, Dest), - Regs1 = jit_regs:set_contents(State2#state.regs, Temp, {imm, N}), - State2#state{available_regs = AR0, regs = Regs1}; + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {imm, N}) end + ); % Source is a VM register -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, extra}, Dest) -> - Temp = - case AR0 of - 0 -> ?A8_REG; - _ -> first_avail(AR0) - end, - AT = AR0 band (bnot reg_bit(Temp)), - {BaseReg, Off} = ?X_REG(?MAX_REG), - I1 = jit_xtensa_asm:l32i(Temp, BaseReg, Off), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, extra}), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, X}, Dest) -> - Temp = - case AR0 of - 0 -> ?A8_REG; - _ -> first_avail(AR0) +move_to_vm_register_emit(State0, {x_reg, extra}, Dest) -> + with_temp( + State0, + fun(StateT, Temp, _AT) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_xtensa_asm:l32i(Temp, BaseReg, Off), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, I1), + move_to_vm_register(StateT#state{stream = Stream1}, Temp, Dest) end, - AT = AR0 band (bnot reg_bit(Temp)), - {XReg, X_REGOffset} = ?X_REG(X), - I1 = jit_xtensa_asm:l32i(Temp, XReg, X_REGOffset), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, X}), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest) -> - Temp = - case AR0 of - 0 -> ?A8_REG; - _ -> first_avail(AR0) + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {x_reg, extra}) end + ); +move_to_vm_register_emit(State0, {x_reg, X}, Dest) -> + with_temp( + State0, + fun(StateT, Temp, _AT) -> + {XReg, X_REGOffset} = ?X_REG(X), + I1 = jit_xtensa_asm:l32i(Temp, XReg, X_REGOffset), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, I1), + move_to_vm_register(StateT#state{stream = Stream1}, Temp, Dest) end, - AT = AR0 band (bnot reg_bit(Temp)), - I1 = jit_xtensa_asm:l32i(Temp, Reg, 0), - Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), - State1#state{available_regs = AR0, regs = Regs1}; -move_to_vm_register_emit(#state{available_regs = AR0} = State0, {y_reg, Y}, Dest) -> - Temp = - case AR0 of - 0 -> ?A8_REG; - _ -> first_avail(AR0) + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {x_reg, X}) end + ); +move_to_vm_register_emit(State0, {ptr, Reg}, Dest) -> + with_temp( + State0, + fun(StateT, Temp, _AT) -> + I1 = jit_xtensa_asm:l32i(Temp, Reg, 0), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, I1), + move_to_vm_register(StateT#state{stream = Stream1}, Temp, Dest) end, - AT = AR0 band (bnot reg_bit(Temp)), - Code = ldr_y_reg(Temp, Y, AT), - Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), - Regs1 = - case AT of - 0 -> State0#state.regs; - _ -> jit_regs:invalidate_reg(State0#state.regs, first_avail(AT)) + fun(Regs, Temp) -> jit_regs:invalidate_reg(Regs, Temp) end + ); +move_to_vm_register_emit(State0, {y_reg, Y}, Dest) -> + with_temp( + State0, + fun(StateT, Temp, AT) -> + Code = ldr_y_reg(Temp, Y, AT), + Stream1 = (StateT#state.stream_module):append(StateT#state.stream, Code), + % ldr_y_reg clobbers first_avail(AT) as a hidden temp for the Y_REGS pointer + Regs1 = + case AT of + 0 -> StateT#state.regs; + _ -> jit_regs:invalidate_reg(StateT#state.regs, first_avail(AT)) + end, + move_to_vm_register(StateT#state{stream = Stream1, regs = Regs1}, Temp, Dest) end, - State1 = move_to_vm_register( - State0#state{stream = Stream1, available_regs = AT, regs = Regs1}, Temp, Dest - ), - Regs2 = jit_regs:set_contents(State1#state.regs, Temp, {y_reg, Y}), - State1#state{available_regs = AR0, regs = Regs2}; + fun(Regs, Temp) -> jit_regs:set_contents(Regs, Temp, {y_reg, Y}) end + ); % term_to_float move_to_vm_register_emit( #state{ stream_module = StreamModule, - available_regs = Avail, + regs = Regs0, stream = Stream0, variant = Variant } = @@ -2390,6 +2352,7 @@ move_to_vm_register_emit( {free, {ptr, Reg, 1}}, {fp_reg, F} ) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Temp2 = first_avail(Avail band (bnot reg_bit(Temp1))), {BaseReg, Off} = ?FP_REGS, @@ -2412,6 +2375,24 @@ move_to_vm_register_emit( Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(State1#state.regs, Temp1), Temp2), State1#state{stream = Stream1, regs = Regs1}. +-spec with_temp( + state(), + fun((state(), xtensa_register(), non_neg_integer()) -> state()), + fun((jit_regs:regs(), xtensa_register()) -> jit_regs:regs()) +) -> state(). +with_temp(#state{regs = Regs0} = State0, EmitFun, ContentsFun) -> + AR0 = jit_regs:available_regs(Regs0), + %% When no register is free, fall back to a8 as the scratch (xtensa reserves it). + Temp = + case AR0 of + 0 -> ?A8_REG; + _ -> first_avail(AR0) + end, + AT = AR0 band (bnot reg_bit(Temp)), + State1 = EmitFun(State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, AT), + Regs1 = jit_regs:set_available_regs(ContentsFun(State1#state.regs, Temp), AR0), + State1#state{regs = Regs1}. + %%----------------------------------------------------------------------------- %% @doc Emit a move of an array element (reg[x]) to a vm or a native register. %% @end @@ -2428,12 +2409,12 @@ move_to_vm_register_emit( vm_register() | xtensa_register() ) -> state(). move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_xtensa_asm:l32i(Temp, Reg, Index * 4), {BaseReg, Off} = ?X_REG(X), @@ -2443,12 +2424,12 @@ move_array_element( Regs2 = jit_regs:set_contents(Regs1, Temp, {x_reg, X}), State#state{stream = Stream1, regs = Regs2}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {ptr, Dest} ) when is_atom(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_xtensa_asm:l32i(Temp, Reg, Index * 4), I2 = jit_xtensa_asm:s32i(Temp, Dest, 0), @@ -2456,12 +2437,12 @@ move_array_element( Regs1 = jit_regs:invalidate_reg(Regs0, Temp), State#state{stream = Stream1, regs = Regs1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Index, {y_reg, Y} ) when is_atom(Reg) andalso is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp1 = first_avail(Avail), Avail2 = Avail band (bnot reg_bit(Temp1)), Temp2 = first_avail(Avail2), @@ -2474,12 +2455,12 @@ move_array_element( Regs2 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs1, Temp1), Temp2), State#state{stream = Stream1, regs = Regs2}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {free, Reg}, Index, {y_reg, Y} ) when is_integer(Index) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), AT = Avail band (bnot reg_bit(Temp)), I1 = jit_xtensa_asm:l32i(Reg, Reg, Index * 4), @@ -2499,8 +2480,6 @@ move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, @@ -2513,22 +2492,17 @@ move_array_element( {BaseReg, Off} = ?X_REG(X), I4 = jit_xtensa_asm:s32i(IndexReg, BaseReg, Off), Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_vm_loc(Regs0, {x_reg, X}), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, - regs = Regs1 + regs = jit_regs:free_reg(Regs1, Bit) }; move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0 + regs = Regs0 } = State, Reg, {free, IndexReg}, @@ -2539,26 +2513,22 @@ move_array_element( I3 = jit_xtensa_asm:l32i(IndexReg, IndexReg, 0), I4 = jit_xtensa_asm:s32i(IndexReg, PtrReg, 0), Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append(Stream0, <>), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, - stream = Stream1 + stream = Stream1, + regs = jit_regs:free_reg(Regs0, Bit) }; move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = AvailableRegs0, - used_regs = UsedRegs0, regs = Regs0 } = State, Reg, {free, IndexReg}, {y_reg, Y} ) when is_atom(IndexReg) -> + AvailableRegs0 = jit_regs:available_regs(Regs0), Temp = first_avail(AvailableRegs0), AT = AvailableRegs0 band (bnot reg_bit(Temp)), I1 = jit_xtensa_asm:slli(IndexReg, IndexReg, 2), @@ -2567,18 +2537,14 @@ move_array_element( Code = str_y_reg(IndexReg, Y, Temp, AT), I4 = Code, Bit = reg_bit(IndexReg), - AvailableRegs1 = AvailableRegs0 bor Bit, - UsedRegs1 = UsedRegs0 band (bnot Bit), Stream1 = StreamModule:append( Stream0, <> ), Regs1a = jit_regs:invalidate_vm_loc(Regs0, {y_reg, Y}), Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs1a, IndexReg), Temp), State#state{ - available_regs = AvailableRegs1, - used_regs = UsedRegs1, stream = Stream1, - regs = Regs1 + regs = jit_regs:free_reg(Regs1, Bit) }. %% @doc move reg[x] to a vm or native register @@ -2601,12 +2567,12 @@ get_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UsedRegs0 + regs = Regs0 } = State, Reg, Index ) -> + Avail = jit_regs:available_regs(Regs0), ElemReg = case Avail of 0 -> ?A8_REG; @@ -2618,8 +2584,7 @@ get_array_element( { State#state{ stream = Stream1, - available_regs = Avail band (bnot ElemBit), - used_regs = UsedRegs0 bor ElemBit + regs = jit_regs:alloc_reg(Regs0, ElemBit) }, ElemReg }. @@ -2638,12 +2603,12 @@ move_to_array_element( Stream1 = StreamModule:append(Stream0, I1), State0#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, ValueReg, Reg, IndexReg ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -2675,13 +2640,13 @@ move_to_array_element( ) when is_integer(IndexReg) andalso is_integer(Offset) -> move_to_array_element(State, Value, BaseReg, IndexReg + Offset); move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, ValueReg, BaseReg, IndexReg, Offset ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -2702,10 +2667,11 @@ move_to_array_element( Offset ) -> {State1, ValueReg} = copy_to_native_register(State0, Value), + State1Avail = jit_regs:available_regs(State1#state.regs), Temp = - case State1#state.available_regs of + case State1Avail of 0 -> ?A8_REG; - _ -> first_avail(State1#state.available_regs) + _ -> first_avail(State1Avail) end, I1 = jit_xtensa_asm:addi(Temp, IndexReg, Offset), I2 = jit_xtensa_asm:slli(Temp, Temp, 2), @@ -2725,16 +2691,17 @@ move_to_native_register(#state{regs = Regs} = State, Value) -> case Contents =/= unknown andalso jit_regs:find_reg_with_contents(Regs, Contents) of {ok, CachedReg} -> Bit = reg_bit(CachedReg), - case State#state.used_regs band Bit of + Used0 = jit_regs:used_regs(Regs), + case Used0 band Bit of 0 -> - case State#state.available_regs band Bit of + Avail0 = jit_regs:available_regs(Regs), + case Avail0 band Bit of 0 -> move_to_native_register_emit(State, Value, Contents); _ -> { State#state{ - used_regs = State#state.used_regs bor Bit, - available_regs = State#state.available_regs band (bnot Bit) + regs = jit_regs:alloc_reg(Regs, Bit) }, CachedReg } @@ -2750,13 +2717,12 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, cp, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), RegBit = reg_bit(Reg), {BaseReg, Off} = ?CP, @@ -2766,9 +2732,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor RegBit, - available_regs = Avail band (bnot RegBit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, RegBit) }, Reg }; @@ -2783,8 +2747,6 @@ move_to_native_register_emit( {State#state{stream = Stream1, regs = Regs1}, Reg}; move_to_native_register_emit( #state{ - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State0, Imm, @@ -2792,6 +2754,7 @@ move_to_native_register_emit( ) when is_integer(Imm) -> + Avail = jit_regs:available_regs(Regs0), Reg = case Avail of 0 -> ?A8_REG; @@ -2800,22 +2763,19 @@ move_to_native_register_emit( RegBit = reg_bit(Reg), Regs1 = jit_regs:set_contents(Regs0, Reg, Contents), State1 = State0#state{ - used_regs = Used bor RegBit, - available_regs = Avail band (bnot RegBit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, RegBit) }, {move_to_native_register(State1, Imm, Reg), Reg}; move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {x_reg, extra}, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = case Avail of 0 -> ?A8_REG; @@ -2829,9 +2789,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor RegBit, - available_regs = Avail band (bnot RegBit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, RegBit) }, Reg }; @@ -2839,8 +2797,6 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {x_reg, X}, @@ -2848,6 +2804,7 @@ move_to_native_register_emit( ) when X < ?MAX_REG -> + Avail = jit_regs:available_regs(Regs0), Reg = case Avail of 0 -> ?A8_REG; @@ -2861,9 +2818,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - used_regs = Used bor RegBit, - available_regs = Avail band (bnot RegBit), - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, RegBit) }, Reg }; @@ -2871,13 +2826,12 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {y_reg, Y}, Contents ) -> + Avail = jit_regs:available_regs(Regs0), Reg = case Avail of 0 -> ?A8_REG; @@ -2896,9 +2850,7 @@ move_to_native_register_emit( { State#state{ stream = Stream1, - available_regs = AvailT, - used_regs = Used bor RegBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, RegBit) }, Reg }; @@ -2906,18 +2858,17 @@ move_to_native_register_emit( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used + regs = Regs0 } = State, {fp_reg, F}, _Contents ) -> + Avail = jit_regs:available_regs(Regs0), RegA = first_avail(Avail), RegABit = reg_bit(RegA), Avail2 = Avail band (bnot RegABit), RegB = first_avail(Avail2), RegBBit = reg_bit(RegB), - AvailT = Avail2 band (bnot RegBBit), {BaseReg, Off} = ?FP_REGS, I1 = jit_xtensa_asm:l32i(RegB, BaseReg, Off), I2 = jit_xtensa_asm:l32i(RegA, RegB, F * 8), @@ -2926,7 +2877,8 @@ move_to_native_register_emit( Stream1 = StreamModule:append(Stream0, Code), { State#state{ - stream = Stream1, available_regs = AvailT, used_regs = Used bor RegABit bor RegBBit + stream = Stream1, + regs = jit_regs:alloc_reg(Regs0, RegABit bor RegBBit) }, {fp, RegA, RegB} }. @@ -2973,11 +2925,11 @@ move_to_native_register( Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, X}), State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = AT, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {y_reg, Y}, RegDst ) -> + AT = jit_regs:available_regs(Regs0), Code = ldr_y_reg(RegDst, Y, AT), Stream1 = StreamModule:append(Stream0, Code), Regs1 = @@ -3008,12 +2960,11 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, Reg ) when is_atom(Reg) -> + Avail = jit_regs:available_regs(Regs0), SaveReg = first_avail(Avail), SaveBit = reg_bit(SaveReg), I1 = jit_xtensa_asm:mov(SaveReg, Reg), @@ -3023,9 +2974,7 @@ copy_to_native_register( { State#state{ stream = Stream1, - available_regs = Avail band (bnot SaveBit), - used_regs = Used bor SaveBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, SaveBit) }, SaveReg }; @@ -3033,12 +2982,11 @@ copy_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = Used, regs = Regs0 } = State, {ptr, Reg} ) when is_atom(Reg) -> + Avail = jit_regs:available_regs(Regs0), SaveReg = first_avail(Avail), SaveBit = reg_bit(SaveReg), I1 = jit_xtensa_asm:l32i(SaveReg, Reg, 0), @@ -3047,9 +2995,7 @@ copy_to_native_register( { State#state{ stream = Stream1, - available_regs = Avail band (bnot SaveBit), - used_regs = Used bor SaveBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, SaveBit) }, SaveReg }; @@ -3057,10 +3003,10 @@ copy_to_native_register(State, Reg) -> move_to_native_register(State, Reg). move_to_cp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {y_reg, Y} ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), AvailT = Avail band (bnot reg_bit(Reg)), I1 = ldr_y_reg(Reg, Y, AvailT), @@ -3077,10 +3023,10 @@ move_to_cp( State#state{stream = Stream1, regs = Regs2}. increment_sp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Offset ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), {BaseReg1, Off1} = ?Y_REGS, I1 = jit_xtensa_asm:l32i(Reg, BaseReg1, Off1), @@ -3096,11 +3042,11 @@ set_continuation_to_label( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, regs = Regs0 } = State, Label ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Regs1 = jit_regs:invalidate_reg(Regs0, Temp), %% In windowed ABI, the continuation will be called by C via CALL8, @@ -3123,12 +3069,12 @@ set_continuation_to_offset( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, branches = Branches, jump_table_start = JumpTableStart, regs = Regs0 } = State ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), @@ -3158,11 +3104,10 @@ get_module_index( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, - used_regs = UsedRegs0, regs = Regs0 } = State ) -> + Avail = jit_regs:available_regs(Regs0), Reg = first_avail(Avail), RegBit = reg_bit(Reg), % Load module from jit_state (which is in a3) @@ -3174,9 +3119,7 @@ get_module_index( { State#state{ stream = Stream1, - available_regs = Avail band (bnot RegBit), - used_regs = UsedRegs0 bor RegBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, RegBit) }, Reg }. @@ -3204,78 +3147,109 @@ and_( {State0#state{stream = Stream1, regs = Regs1}, Reg}; % Xtensa has no andi instruction - small values fall through to general case and_( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, {free, Reg}, Val ) when - Val >= -2048 andalso Val =< 2047 andalso Avail =/= 0 + Val >= -2048 andalso Val =< 2047 -> - Temp = first_avail(Avail), - AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), - Stream1 = State1#state.stream, - I = jit_xtensa_asm:and_(Reg, Reg, Temp), - Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - {State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}, Reg}; -and_( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, - {free, Reg}, - Val -) when Val < 0 andalso Val >= -256 andalso Avail =/= 0 -> - Temp = first_avail(Avail), - AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), - Stream1 = State1#state.stream, - I = jit_xtensa_asm:and_(Reg, Reg, Temp), - Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - {State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}, Reg}; -and_( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, - {free, Reg}, - Val -) when Avail =/= 0 -> - Temp = first_avail(Avail), - AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), - Stream1 = State1#state.stream, - I = jit_xtensa_asm:and_(Reg, Reg, Temp), - Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), - {State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}, Reg}; + Avail = jit_regs:available_regs(Regs0), + case Avail of + 0 -> + State1 = mov_immediate(State0, ?A8_REG, Val), + #state{stream = Stream1} = State1, + I = jit_xtensa_asm:and_(Reg, Reg, ?A8_REG), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + {State1#state{stream = Stream2, regs = Regs1}, Reg}; + _ -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), + { + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + }, + Reg + } + end; and_( - #state{available_regs = 0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, {free, Reg}, Val ) when Val < 0 andalso Val >= -256 -> - %% No available registers; use ?A8_REG (a8) directly as scratch. - %% a8 is not in AVAILABLE_REGS so it is the dedicated implicit scratch. - %% Avoid clobbering a0 (encoded return address required by RETW). - State1 = mov_immediate(State0, ?A8_REG, Val), - #state{stream_module = StreamModule, stream = Stream1} = State1, - I = jit_xtensa_asm:and_(Reg, Reg, ?A8_REG), - Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(State0#state.regs, Reg), - {State1#state{stream = Stream2, regs = Regs1}, Reg}; + Avail = jit_regs:available_regs(Regs0), + case Avail of + 0 -> + State1 = mov_immediate(State0, ?A8_REG, Val), + #state{stream = Stream1} = State1, + I = jit_xtensa_asm:and_(Reg, Reg, ?A8_REG), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + {State1#state{stream = Stream2, regs = Regs1}, Reg}; + _ -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), + { + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + }, + Reg + } + end; and_( - #state{available_regs = 0, regs = Regs0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, {free, Reg}, Val ) -> - %% No available registers; use ?A8_REG (a8) directly as scratch. - State1 = mov_immediate(State0, ?A8_REG, Val), - #state{stream_module = StreamModule, stream = Stream1} = State1, - I = jit_xtensa_asm:and_(Reg, Reg, ?A8_REG), - Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(Regs0, Reg), - {State1#state{stream = Stream2, regs = Regs1}, Reg}; + Avail = jit_regs:available_regs(Regs0), + case Avail of + 0 -> + State1 = mov_immediate(State0, ?A8_REG, Val), + #state{stream = Stream1} = State1, + I = jit_xtensa_asm:and_(Reg, Reg, ?A8_REG), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + {State1#state{stream = Stream2, regs = Regs1}, Reg}; + _ -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp), + { + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + }, + Reg + } + end; and_( - #state{stream_module = StreamModule, available_regs = Avail, used_regs = UR, regs = Regs0} = - State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, ?TERM_PRIMARY_CLEAR_MASK ) -> + Avail = jit_regs:available_regs(Regs0), ResultReg = case Avail of 0 -> ?A8_REG; @@ -3290,9 +3264,7 @@ and_( { State0#state{ stream = Stream1, - available_regs = Avail band (bnot ResultBit), - used_regs = UR bor ResultBit, - regs = Regs1 + regs = jit_regs:alloc_reg(Regs1, ResultBit) }, ResultReg }. @@ -3308,45 +3280,63 @@ or_(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State State0#state{stream = Stream1, regs = Regs1}; % Xtensa has no ori instruction - small values use li+or_ like general case or_( - #state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) when - Val >= -2048 andalso Val =< 2047 andalso Avail =/= 0 + Val >= -2048 andalso Val =< 2047 -> - Temp = first_avail(Avail), - AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), - Stream1 = State1#state.stream, - I = jit_xtensa_asm:or_(Reg, Reg, Temp), - Stream2 = StreamModule:append(Stream1, I), - %% Take the cache from State1 (mov_immediate already invalidated Temp), - %% then also invalidate Reg whose value just got rewritten. - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}; -or_( - #state{available_regs = 0} = State0, Reg, Val -) -> - %% No available registers; use ?A8_REG (a8) directly as scratch. - %% a8 is not in AVAILABLE_REGS so it is the dedicated implicit scratch. - %% Avoid clobbering a0 (encoded return address required by RETW). - State1 = mov_immediate(State0, ?A8_REG, Val), - #state{stream_module = StreamModule, stream = Stream1} = State1, - I = jit_xtensa_asm:or_(Reg, Reg, ?A8_REG), - Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), - State1#state{stream = Stream2, regs = Regs1}; + Avail = jit_regs:available_regs(Regs0), + case Avail of + 0 -> + State1 = mov_immediate(State0, ?A8_REG, Val), + #state{stream = Stream1} = State1, + I = jit_xtensa_asm:or_(Reg, Reg, ?A8_REG), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{stream = Stream2, regs = Regs1}; + _ -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:or_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + } + end; or_( - #state{stream_module = StreamModule, available_regs = Avail} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) -> - Temp = first_avail(Avail), - AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), - Stream1 = State1#state.stream, - I = jit_xtensa_asm:or_(Reg, Reg, Temp), - Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + Avail = jit_regs:available_regs(Regs0), + case Avail of + 0 -> + State1 = mov_immediate(State0, ?A8_REG, Val), + #state{stream = Stream1} = State1, + I = jit_xtensa_asm:or_(Reg, Reg, ?A8_REG), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{stream = Stream2, regs = Regs1}; + _ -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:or_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + } + end. xor_( #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, Reg, SrcReg @@ -3359,44 +3349,63 @@ xor_( State0#state{stream = Stream1, regs = Regs1}; % Xtensa has no xori instruction - small values use li+xor_ like general case xor_( - #state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) when - Val >= -2048 andalso Val =< 2047 andalso Avail =/= 0 + Val >= -2048 andalso Val =< 2047 -> - Temp = first_avail(Avail), - AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), - Stream1 = State1#state.stream, - I = jit_xtensa_asm:xor_(Reg, Reg, Temp), - Stream2 = StreamModule:append(Stream1, I), - %% mov_immediate already invalidated Temp in State1#state.regs; also - %% invalidate Reg whose value just got rewritten. - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}; -xor_( - #state{available_regs = 0} = State0, Reg, Val -) -> - %% No available registers; use ?A8_REG (a8) directly as scratch. - %% Avoid clobbering a0 (encoded return address required by RETW). - State1 = mov_immediate(State0, ?A8_REG, Val), - #state{stream_module = StreamModule, stream = Stream1} = State1, - I = jit_xtensa_asm:xor_(Reg, Reg, ?A8_REG), - Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), - State1#state{stream = Stream2, regs = Regs1}; + Avail = jit_regs:available_regs(Regs0), + case Avail of + 0 -> + State1 = mov_immediate(State0, ?A8_REG, Val), + #state{stream = Stream1} = State1, + I = jit_xtensa_asm:xor_(Reg, Reg, ?A8_REG), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{stream = Stream2, regs = Regs1}; + _ -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:xor_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + } + end; xor_( - #state{stream_module = StreamModule, available_regs = Avail} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) -> - Temp = first_avail(Avail), - AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), - Stream1 = State1#state.stream, - I = jit_xtensa_asm:xor_(Reg, Reg, Temp), - Stream2 = StreamModule:append(Stream1, I), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + Avail = jit_regs:available_regs(Regs0), + case Avail of + 0 -> + State1 = mov_immediate(State0, ?A8_REG, Val), + #state{stream = Stream1} = State1, + I = jit_xtensa_asm:xor_(Reg, Reg, ?A8_REG), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{stream = Stream2, regs = Regs1}; + _ -> + Temp = first_avail(Avail), + AT = Avail band (bnot reg_bit(Temp)), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), + Stream1 = State1#state.stream, + I = jit_xtensa_asm:xor_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + } + end. add(State0, {free, Reg}, Val) -> {add(State0, Reg, Val), Reg}; @@ -3414,20 +3423,25 @@ add(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State Stream1 = StreamModule:append(Stream0, I), Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State0#state{stream = Stream1, regs = Regs1}; -add(#state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val) -> +add(#state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; _ -> first_avail(Avail) end, AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_xtensa_asm:add(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - %% mov_immediate already invalidated Temp in State1#state.regs. Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + }. %% mov_immediate/2: returns binary, for inline code generation. %% Uses movi, movi+addmi, or movi+slli+addi sequences (no L32R). @@ -3700,26 +3714,32 @@ sub(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State Stream1 = StreamModule:append(Stream0, I), Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State#state{stream = Stream1, regs = Regs1}; -sub(#state{stream_module = StreamModule, available_regs = Avail} = State0, Reg, Val) -> +sub(#state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; _ -> first_avail(Avail) end, AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_xtensa_asm:sub(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - %% mov_immediate already invalidated Temp in State1#state.regs. Regs1 = jit_regs:invalidate_reg(State1#state.regs, Reg), - State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}. + State1#state{ + stream = Stream2, + regs = jit_regs:set_available_regs(Regs1, Avail) + }. mul(State, _Reg, 1) -> State; mul(State, Reg, 2) -> shift_left(State, Reg, 1); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 3) -> +mul(#state{regs = Regs0} = State, Reg, 3) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -3732,7 +3752,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 3) -> State#state{stream = Stream1, regs = Regs1}; mul(State, Reg, 4) -> shift_left(State, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 5) -> +mul(#state{regs = Regs0} = State, Reg, 5) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -3746,7 +3767,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 5) -> mul(State0, Reg, 6) -> State1 = mul(State0, Reg, 3), mul(State1, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 7) -> +mul(#state{regs = Regs0} = State, Reg, 7) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -3759,7 +3781,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 7) -> State#state{stream = Stream1, regs = Regs1}; mul(State, Reg, 8) -> shift_left(State, Reg, 3); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 9) -> +mul(#state{regs = Regs0} = State, Reg, 9) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -3773,7 +3796,8 @@ mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 9) -> mul(State0, Reg, 10) -> State1 = mul(State0, Reg, 5), mul(State1, Reg, 2); -mul(#state{available_regs = Avail, regs = Regs0} = State, Reg, 15) -> +mul(#state{regs = Regs0} = State, Reg, 15) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; @@ -3791,25 +3815,28 @@ mul(State, Reg, 32) -> mul(State, Reg, 64) -> shift_left(State, Reg, 6); mul( - #state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0, + #state{stream_module = StreamModule, regs = Regs0} = State0, Reg, Val ) when is_integer(Val) -> + Avail = jit_regs:available_regs(Regs0), Temp = case Avail of 0 -> ?A8_REG; _ -> first_avail(Avail) end, AT = Avail band (bnot reg_bit(Temp)), - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + State1 = mov_immediate( + State0#state{regs = jit_regs:set_available_regs(Regs0, AT)}, Temp, Val + ), Stream1 = State1#state.stream, I = jit_xtensa_asm:mull(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp), Reg), + State1Avail = jit_regs:available_regs(State1#state.regs), State1#state{ stream = Stream2, - available_regs = State1#state.available_regs bor reg_bit(Temp), - regs = Regs1 + regs = jit_regs:set_available_regs(Regs1, State1Avail bor reg_bit(Temp)) }; mul( #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DestReg, SrcReg @@ -3824,11 +3851,11 @@ decrement_reductions_and_maybe_schedule_next( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, jump_table_start = JumpTableStart, regs = Regs0 } = State0 ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), I1 = jit_xtensa_asm:l32i(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), I2 = jit_xtensa_asm:addi(Temp, Temp, -1), @@ -3860,9 +3887,11 @@ decrement_reductions_and_maybe_schedule_next( Stream3b, BNEOffset, <> ), StreamN = Stream4, - State3 = merge_used_regs(State2#state{stream = StreamN}, State1#state.used_regs), %% schedule_next clobbers caller-saved regs; invalidate cache at continuation. - State3#state{regs = jit_regs:invalidate_all(State1#state.regs)}. + State2#state{ + stream = StreamN, + regs = jit_regs:invalidate_all(State1#state.regs) + }. -spec call_or_schedule_next(state(), non_neg_integer()) -> state(). call_or_schedule_next(State0, Label) -> @@ -3874,11 +3903,11 @@ call_only_or_schedule_next( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = Avail, regs = Regs0 } = State0a, Label ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), Regs1 = jit_regs:invalidate_reg(Regs0, Temp), State0 = State0a#state{regs = Regs1}, @@ -3935,12 +3964,13 @@ call_primitive_with_cp(State0, Primitive, Args) -> rewrite_cp_offset(State2, RewriteOffset, TempReg). -spec set_cp(state()) -> {state(), non_neg_integer(), xtensa_register()}. -set_cp(#state{available_regs = Avail, used_regs = UsedRegs} = State0) -> +set_cp(#state{regs = Regs0} = State0) -> + Avail = jit_regs:available_regs(Regs0), TempReg = first_avail(Avail), TempBit = reg_bit(TempReg), %% Reserve TempReg for the offset BEFORE get_module_index consumes available registers. State1 = State0#state{ - available_regs = Avail band (bnot TempBit), used_regs = UsedRegs bor TempBit + regs = jit_regs:alloc_reg(Regs0, TempBit) }, {State2, Reg} = get_module_index(State1), #state{stream_module = StreamModule, stream = Stream0} = State2, @@ -3988,9 +4018,10 @@ rewrite_cp_offset( State0#state{stream = Stream2}. set_bs( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail} = State0, + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State0, TermReg ) -> + Avail = jit_regs:available_regs(Regs0), Temp = first_avail(Avail), {BaseReg1, Off1} = ?BS, I1 = jit_xtensa_asm:s32i(TermReg, BaseReg1, Off1), @@ -4203,80 +4234,6 @@ reg_bit(a13) -> ?REG_BIT_A13; reg_bit(a14) -> ?REG_BIT_A14; reg_bit(a15) -> ?REG_BIT_A15. -%% High registers (a15-a9) are clobbered by CALLX8 so prefer them to minimize saves. -first_avail(Mask) when Mask band ?REG_BIT_A15 =/= 0 -> a15; -first_avail(Mask) when Mask band ?REG_BIT_A14 =/= 0 -> a14; -first_avail(Mask) when Mask band ?REG_BIT_A13 =/= 0 -> a13; -first_avail(Mask) when Mask band ?REG_BIT_A12 =/= 0 -> a12; -first_avail(Mask) when Mask band ?REG_BIT_A11 =/= 0 -> a11; -first_avail(Mask) when Mask band ?REG_BIT_A10 =/= 0 -> a10; -first_avail(Mask) when Mask band ?REG_BIT_A9 =/= 0 -> a9; -first_avail(Mask) when Mask band ?REG_BIT_A8 =/= 0 -> a8; -first_avail(Mask) when Mask band ?REG_BIT_A7 =/= 0 -> a7; -first_avail(Mask) when Mask band ?REG_BIT_A6 =/= 0 -> a6; -first_avail(Mask) when Mask band ?REG_BIT_A5 =/= 0 -> a5. - -mask_to_list(0) -> []; -mask_to_list(Mask) -> mask_to_list_a15(Mask). - -mask_to_list_a15(Mask) when Mask band ?REG_BIT_A15 =/= 0 -> [a15 | mask_to_list_a14(Mask)]; -mask_to_list_a15(Mask) -> mask_to_list_a14(Mask). -mask_to_list_a14(Mask) when Mask band ?REG_BIT_A14 =/= 0 -> [a14 | mask_to_list_a13(Mask)]; -mask_to_list_a14(Mask) -> mask_to_list_a13(Mask). -mask_to_list_a13(Mask) when Mask band ?REG_BIT_A13 =/= 0 -> [a13 | mask_to_list_a12(Mask)]; -mask_to_list_a13(Mask) -> mask_to_list_a12(Mask). -mask_to_list_a12(Mask) when Mask band ?REG_BIT_A12 =/= 0 -> [a12 | mask_to_list_a11(Mask)]; -mask_to_list_a12(Mask) -> mask_to_list_a11(Mask). -mask_to_list_a11(Mask) when Mask band ?REG_BIT_A11 =/= 0 -> [a11 | mask_to_list_a10(Mask)]; -mask_to_list_a11(Mask) -> mask_to_list_a10(Mask). -mask_to_list_a10(Mask) when Mask band ?REG_BIT_A10 =/= 0 -> [a10 | mask_to_list_a9(Mask)]; -mask_to_list_a10(Mask) -> mask_to_list_a9(Mask). -mask_to_list_a9(Mask) when Mask band ?REG_BIT_A9 =/= 0 -> [a9 | mask_to_list_a8(Mask)]; -mask_to_list_a9(Mask) -> mask_to_list_a8(Mask). -mask_to_list_a8(Mask) when Mask band ?REG_BIT_A8 =/= 0 -> [a8 | mask_to_list_a7(Mask)]; -mask_to_list_a8(Mask) -> mask_to_list_a7(Mask). -mask_to_list_a7(Mask) when Mask band ?REG_BIT_A7 =/= 0 -> [a7 | mask_to_list_a6(Mask)]; -mask_to_list_a7(Mask) -> mask_to_list_a6(Mask). -mask_to_list_a6(Mask) when Mask band ?REG_BIT_A6 =/= 0 -> [a6 | mask_to_list_a5(Mask)]; -mask_to_list_a6(Mask) -> mask_to_list_a5(Mask). -mask_to_list_a5(Mask) when Mask band ?REG_BIT_A5 =/= 0 -> [a5 | mask_to_list_a4(Mask)]; -mask_to_list_a5(Mask) -> mask_to_list_a4(Mask). -mask_to_list_a4(Mask) when Mask band ?REG_BIT_A4 =/= 0 -> [a4 | mask_to_list_a3(Mask)]; -mask_to_list_a4(Mask) -> mask_to_list_a3(Mask). -mask_to_list_a3(Mask) when Mask band ?REG_BIT_A3 =/= 0 -> [a3 | mask_to_list_a2(Mask)]; -mask_to_list_a3(Mask) -> mask_to_list_a2(Mask). -mask_to_list_a2(Mask) when Mask band ?REG_BIT_A2 =/= 0 -> [a2 | mask_to_list_a1(Mask)]; -mask_to_list_a2(Mask) -> mask_to_list_a1(Mask). -mask_to_list_a1(Mask) when Mask band ?REG_BIT_A1 =/= 0 -> [a1 | mask_to_list_a0(Mask)]; -mask_to_list_a1(Mask) -> mask_to_list_a0(Mask). -mask_to_list_a0(Mask) when Mask band ?REG_BIT_A0 =/= 0 -> [a0]; -mask_to_list_a0(_Mask) -> []. - -args_regs(Args) -> - lists:map( - fun - ({free, {ptr, Reg}}) -> Reg; - ({free, Reg}) when is_atom(Reg) -> Reg; - ({free, Imm}) when is_integer(Imm) -> imm; - (offset) -> imm; - (ctx) -> ?CTX_REG; - (jit_state) -> jit_state; - (jit_state_tail_call) -> jit_state; - (stack) -> stack; - (Reg) when is_atom(Reg) -> Reg; - (Imm) when is_integer(Imm) -> imm; - ({ptr, Reg}) -> Reg; - ({x_reg, _}) -> ?CTX_REG; - ({y_reg, _}) -> ?CTX_REG; - ({fp_reg, _}) -> ?CTX_REG; - ({free, {x_reg, _}}) -> ?CTX_REG; - ({free, {y_reg, _}}) -> ?CTX_REG; - ({free, {fp_reg, _}}) -> ?CTX_REG; - ({avm_int64_t, _}) -> imm - end, - Args - ). - %%----------------------------------------------------------------------------- %% @doc Add a label at the current offset. %% @end diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 95c64f81b8..259df7682d 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -2152,10 +2152,17 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = setelement( - 7, State0, element(7, State0) band (bnot ((1 bsl 8) bor (1 bsl 9))) + Regs0 = element(9, State0), + State2 = setelement( + 9, + State0, + jit_regs:set_masks( + Regs0, + jit_regs:available_regs(Regs0) band + (bnot ((1 bsl 8) bor (1 bsl 9))), + (1 bsl 8) bor (1 bsl 9) + ) ), - State2 = setelement(8, State1, (1 bsl 8) bor (1 bsl 9)), [r8, r9] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r8, r9, 1), Stream = ?BACKEND:stream(State3), @@ -2168,10 +2175,17 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin - State1 = setelement( - 7, State0, element(7, State0) band (bnot ((1 bsl 8) bor (1 bsl 9))) + Regs0 = element(9, State0), + State2 = setelement( + 9, + State0, + jit_regs:set_masks( + Regs0, + jit_regs:available_regs(Regs0) band + (bnot ((1 bsl 8) bor (1 bsl 9))), + (1 bsl 8) bor (1 bsl 9) + ) ), - State2 = setelement(8, State1, (1 bsl 8) bor (1 bsl 9)), [r8, r9] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, 42, r8, r9, 1), Stream = ?BACKEND:stream(State3), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 408f908c16..88d1e9e795 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -3034,10 +3034,16 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = setelement( - 7, State0, element(7, State0) band (bnot ((1 bsl 3) bor (1 bsl 4))) + %% State record now stores masks inside the regs field + %% (position 10). Update the masks via jit_regs:set_masks. + Regs0 = element(10, State0), + UsedMask = (1 bsl 3) bor (1 bsl 4), + Regs1 = jit_regs:set_masks( + Regs0, + jit_regs:available_regs(Regs0) band (bnot UsedMask), + UsedMask ), - State2 = setelement(8, State1, (1 bsl 3) bor (1 bsl 4)), + State2 = setelement(10, State0, Regs1), [r4, r3] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r3, r4, 1), Stream = ?BACKEND:stream(State3), @@ -3051,10 +3057,14 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin - State1 = setelement( - 7, State0, element(7, State0) band (bnot ((1 bsl 3) bor (1 bsl 4))) + Regs0 = element(10, State0), + UsedMask = (1 bsl 3) bor (1 bsl 4), + Regs1 = jit_regs:set_masks( + Regs0, + jit_regs:available_regs(Regs0) band (bnot UsedMask), + UsedMask ), - State2 = setelement(8, State1, (1 bsl 3) bor (1 bsl 4)), + State2 = setelement(10, State0, Regs1), [r4, r3] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, 42, r3, r4, 1), Stream = ?BACKEND:stream(State3), diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl index b777f2d807..00bebc1316 100644 --- a/tests/libs/jit/jit_riscv32_tests.erl +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -2583,8 +2583,10 @@ move_to_array_element_test_() -> ?_test(begin %% a3 = bit 3 = 8, t3 = bit 11 = 2048 %% AVAILABLE_REGS_MASK = 16#7F00, remove t3 => 16#7700 - State1 = setelement(7, State0, 16#7700), - State2 = setelement(8, State1, 8 bor 2048), + Regs0 = element(9, State0), + State2 = setelement( + 9, State0, jit_regs:set_masks(Regs0, 16#7700, 8 bor 2048) + ), [t3, a3] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1), Stream = ?BACKEND:stream(State3), @@ -2601,8 +2603,10 @@ move_to_array_element_test_() -> ?_test(begin %% a3 = bit 3 = 8, t3 = bit 11 = 2048 %% AVAILABLE_REGS_MASK = 16#7F00, remove t3 => 16#7700 - State1 = setelement(7, State0, 16#7700), - State2 = setelement(8, State1, 8 bor 2048), + Regs0 = element(9, State0), + State2 = setelement( + 9, State0, jit_regs:set_masks(Regs0, 16#7700, 8 bor 2048) + ), [t3, a3] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1), Stream = ?BACKEND:stream(State3), diff --git a/tests/libs/jit/jit_riscv64_tests.erl b/tests/libs/jit/jit_riscv64_tests.erl index 8e88e93217..b0d9e8d7f3 100644 --- a/tests/libs/jit/jit_riscv64_tests.erl +++ b/tests/libs/jit/jit_riscv64_tests.erl @@ -2660,9 +2660,9 @@ move_to_array_element_test_() -> A3Bit = 1 bsl 3, T3Bit = 1 bsl 11, UsedMask = A3Bit bor T3Bit, - AvailMask = element(7, State0) band (bnot UsedMask), - State1 = setelement(7, State0, AvailMask), - State2 = setelement(8, State1, UsedMask), + Regs0 = element(9, State0), + AvailMask = jit_regs:available_regs(Regs0) band (bnot UsedMask), + State2 = setelement(9, State0, jit_regs:set_masks(Regs0, AvailMask, UsedMask)), ?assertEqual(lists:sort([a3, t3]), lists:sort(?BACKEND:used_regs(State2))), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1), Stream = ?BACKEND:stream(State3), @@ -2680,9 +2680,9 @@ move_to_array_element_test_() -> A3Bit = 1 bsl 3, T3Bit = 1 bsl 11, UsedMask = A3Bit bor T3Bit, - AvailMask = element(7, State0) band (bnot UsedMask), - State1 = setelement(7, State0, AvailMask), - State2 = setelement(8, State1, UsedMask), + Regs0 = element(9, State0), + AvailMask = jit_regs:available_regs(Regs0) band (bnot UsedMask), + State2 = setelement(9, State0, jit_regs:set_masks(Regs0, AvailMask, UsedMask)), ?assertEqual(lists:sort([a3, t3]), lists:sort(?BACKEND:used_regs(State2))), State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1), Stream = ?BACKEND:stream(State3), diff --git a/tests/libs/jit/jit_wasm32_tests.erl b/tests/libs/jit/jit_wasm32_tests.erl index 2bcc71e4ac..87e73ef1e4 100644 --- a/tests/libs/jit/jit_wasm32_tests.erl +++ b/tests/libs/jit/jit_wasm32_tests.erl @@ -2242,6 +2242,21 @@ condition_int_cast_ne_test() -> >>, ?assertStream(wasm32, Dump, Stream). +%% Regression test: call_func_ptr must keep the native-register bookkeeping +%% from after argument marshalling, not roll it back to the pre-call state. +%% With the rollback, ResultReg lost its used bit and a later allocation could +%% clobber it while the freed FuncPtr local leaked as used. +call_func_ptr_regs_test() -> + State0 = ?BACKEND:new(0, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), + State2 = ?BACKEND:add_label(State1, 0), + {State3, FuncPtr} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + ?assertEqual([FuncPtr], ?BACKEND:used_regs(State3)), + {State4, ResultReg} = ?BACKEND:call_func_ptr(State3, {free, FuncPtr}, [ctx]), + ?assertEqual([ResultReg], ?BACKEND:used_regs(State4)), + State5 = ?BACKEND:free_native_registers(State4, [ResultReg]), + ?assertEqual(ok, ?BACKEND:assert_all_native_free(State5)). + call_func_ptr_primitive_test() -> State0 = ?BACKEND:new(0, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:jump_table(State0, 1),