Skip to content

Commit a1f7da0

Browse files
committed
increase the uop buffer size
1 parent b8635b3 commit a1f7da0

4 files changed

Lines changed: 32 additions & 58 deletions

File tree

Include/internal/pycore_optimizer.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,8 @@ extern "C" {
3131
* 4. A push followed by a matching return is net-zero on frame-specific
3232
* fitness, excluding per-slot costs.
3333
*/
34-
#define MAX_TARGET_LENGTH (UOP_MAX_TRACE_LENGTH / 2)
3534
#define OPTIMIZER_EFFECTIVENESS 2
36-
#define FITNESS_INITIAL (MAX_TARGET_LENGTH * OPTIMIZER_EFFECTIVENESS)
35+
#define MAX_TARGET_LENGTH (FITNESS_INITIAL / OPTIMIZER_EFFECTIVENESS)
3736

3837
/* Exit quality thresholds: trace stops when fitness < exit_quality.
3938
* Higher = trace is more willing to stop here. */

Include/internal/pycore_uop.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,18 @@ typedef struct _PyUOpInstruction{
3636
#endif
3737
} _PyUOpInstruction;
3838

39-
// This is the length of the trace we translate initially.
39+
// Fitness is the target length of the trace we translate initially. The uop
40+
// buffer has a small amount of extra space for entry/loop-closing overhead.
4041
#if defined(Py_DEBUG) && defined(_Py_JIT)
4142
// With asserts, the stencils are a lot larger
42-
#define UOP_MAX_TRACE_LENGTH 1000
43+
#define FITNESS_INITIAL 1000
4344
#else
44-
#define UOP_MAX_TRACE_LENGTH 2500
45+
#define FITNESS_INITIAL 2500
4546
#endif
4647

48+
#define UOP_TRACE_BUFFER_OVERHEAD 10
49+
#define UOP_MAX_TRACE_LENGTH (FITNESS_INITIAL + UOP_TRACE_BUFFER_OVERHEAD)
50+
4751
/* Bloom filter with m = 256
4852
* https://en.wikipedia.org/wiki/Bloom_filter */
4953
#ifdef HAVE_GCC_UINT128_T

Python/optimizer.c

Lines changed: 23 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -651,41 +651,6 @@ compute_frame_penalty(uint16_t fitness_initial)
651651
return (int32_t)fitness_initial / (MAX_ABSTRACT_FRAME_DEPTH - 1) + 1;
652652
}
653653

654-
/* Slots carved out from `trace->end` for side-exit, deopt, and error stubs
655-
* that `prepare_for_execution()` may append. The base 2 covers `_DEOPT` and
656-
* `_CHECK_VALIDITY`'s implicit deopt path.
657-
*/
658-
static inline int
659-
compute_tail_reservation(int opcode, bool needs_guard_ip)
660-
{
661-
return 2
662-
+ !!OPCODE_HAS_EXIT(opcode)
663-
+ !!OPCODE_HAS_ERROR(opcode)
664-
+ !!OPCODE_HAS_DEOPT(opcode)
665-
+ needs_guard_ip;
666-
}
667-
668-
/* Upper bound on the slots `add_to_trace()` may consume for this bytecode,
669-
* including one spare slot for the runtime guard's emergency `_EXIT_TRACE`.
670-
*/
671-
static inline int
672-
compute_space_needed(int opcode, int nuops, bool needs_guard_ip,
673-
bool may_close_loop)
674-
{
675-
// `_CHECK_VALIDITY` + optional `_SET_IP` + macro expansion.
676-
int space_needed = 1 + !OPCODE_HAS_NO_SAVE_IP(opcode) + nuops;
677-
if (needs_guard_ip) {
678-
// `_RECORD_CODE`, guard_ip, guard_code_version, and another
679-
// `_SET_IP` if this bytecode also closes the loop.
680-
space_needed += 3 + may_close_loop;
681-
}
682-
// `_JUMP_TO_TOP` when this bytecode closes the loop.
683-
space_needed += may_close_loop;
684-
// Spare slot for emergency `_EXIT_TRACE`; also keeps remaining_space > 0.
685-
space_needed += 1;
686-
return space_needed;
687-
}
688-
689654
static int
690655
is_terminator(const _PyUOpInstruction *uop)
691656
{
@@ -906,30 +871,34 @@ _PyJit_translate_single_bytecode_to_trace(
906871
// space this bytecode consumed, including reserved tail slots.
907872
int32_t remaining_before = uop_buffer_remaining_space(trace);
908873

909-
bool may_close_loop = tracer->initial_state.close_loop_instr == next_instr ||
910-
tracer->initial_state.start_instr == next_instr;
911-
int tail_reservation = compute_tail_reservation(opcode, needs_guard_ip);
912-
int space_needed = compute_space_needed(
913-
opcode, _PyOpcode_macro_expansion[opcode].nuops,
914-
needs_guard_ip, may_close_loop);
915-
if (remaining_before <= tail_reservation + space_needed) {
916-
DPRINTF(2,
917-
"Buffer full: %s(%d) remaining=%d, needed=%d\n",
918-
_PyOpcode_OpName[opcode], oparg, remaining_before,
919-
tail_reservation + space_needed);
920-
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
921-
goto done;
922-
}
923-
924-
trace->end -= tail_reservation;
874+
// One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
875+
trace->end -= 2;
925876

926877
const _PyOpcodeRecordSlotMap *record_slot_map = &_PyOpcode_RecordSlotMaps[opcode];
927878

928879
assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
929880
assert(!_PyErr_Occurred(tstate));
930881

931882

883+
if (OPCODE_HAS_EXIT(opcode)) {
884+
// Make space for side exit
885+
trace->end--;
886+
}
887+
if (OPCODE_HAS_ERROR(opcode)) {
888+
// Make space for error stub
889+
trace->end--;
890+
}
891+
if (OPCODE_HAS_DEOPT(opcode)) {
892+
// Make space for side exit
893+
trace->end--;
894+
}
895+
896+
// _GUARD_IP leads to an exit.
897+
trace->end -= needs_guard_ip;
898+
932899
#if Py_DEBUG
900+
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
901+
int space_needed = expansion->nuops + needs_guard_ip + 2 + (!OPCODE_HAS_NO_SAVE_IP(opcode));
933902
assert(uop_buffer_remaining_space(trace) > space_needed);
934903
#endif
935904

@@ -1148,7 +1117,9 @@ _PyJit_translate_single_bytecode_to_trace(
11481117
}
11491118
}
11501119
// Loop back to the start
1151-
if (may_close_loop && uop_buffer_length(trace) > CODE_SIZE_NO_PROGRESS) {
1120+
int is_first_instr = tracer->initial_state.close_loop_instr == next_instr ||
1121+
tracer->initial_state.start_instr == next_instr;
1122+
if (is_first_instr && uop_buffer_length(trace) > CODE_SIZE_NO_PROGRESS) {
11521123
if (needs_guard_ip) {
11531124
ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)next_instr, 0);
11541125
}

Python/pystate.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,7 @@ init_interpreter(PyInterpreterState *interp,
634634
// Trace fitness configuration
635635
init_policy(&interp->opt_config.fitness_initial,
636636
"PYTHON_JIT_FITNESS_INITIAL",
637-
FITNESS_INITIAL, EXIT_QUALITY_CLOSE_LOOP, UOP_MAX_TRACE_LENGTH - 1);
637+
FITNESS_INITIAL, EXIT_QUALITY_CLOSE_LOOP, FITNESS_INITIAL);
638638

639639
interp->opt_config.specialization_enabled = !is_env_enabled("PYTHON_SPECIALIZATION_OFF");
640640
interp->opt_config.uops_optimize_enabled = !is_env_disabled("PYTHON_UOPS_OPTIMIZE");

0 commit comments

Comments
 (0)