From 94021dd1a8e4759c5192514d426c953f33c17571 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 1 Mar 2024 04:21:07 +0800 Subject: [PATCH 01/14] Generate super cases --- Python/bytecodes.c | 26 ++ Python/executor_cases.c.h | 306 +++++++++++++++++++++++ Tools/cases_generator/analyzer.py | 35 ++- Tools/cases_generator/lexer.py | 3 + Tools/cases_generator/parser.py | 1 + Tools/cases_generator/parsing.py | 23 +- Tools/cases_generator/tier2_generator.py | 15 ++ 7 files changed, 407 insertions(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 565379afc4b5a7..a3eb96860fef45 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -445,6 +445,13 @@ dummy_func( macro(BINARY_OP_SUBTRACT_INT) = _GUARD_BOTH_INT + unused/1 + _BINARY_OP_SUBTRACT_INT; + super(_GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT) = + _GUARD_BOTH_INT + _BINARY_OP_MULTIPLY_INT; + super(_GUARD_BOTH_INT__BINARY_OP_ADD_INT) = + _GUARD_BOTH_INT + _BINARY_OP_ADD_INT; + super(_GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT) = + _GUARD_BOTH_INT + _BINARY_OP_SUBTRACT_INT; + op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { EXIT_IF(!PyFloat_CheckExact(left)); EXIT_IF(!PyFloat_CheckExact(right)); @@ -481,6 +488,13 @@ dummy_func( macro(BINARY_OP_SUBTRACT_FLOAT) = _GUARD_BOTH_FLOAT + unused/1 + _BINARY_OP_SUBTRACT_FLOAT; + super(_GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT) = + _GUARD_BOTH_FLOAT + _BINARY_OP_MULTIPLY_FLOAT; + super(_GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT) = + _GUARD_BOTH_FLOAT + _BINARY_OP_ADD_FLOAT; + super(_GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT) = + _GUARD_BOTH_FLOAT + _BINARY_OP_SUBTRACT_FLOAT; + op(_GUARD_BOTH_UNICODE, (left, right -- left, right)) { EXIT_IF(!PyUnicode_CheckExact(left)); EXIT_IF(!PyUnicode_CheckExact(right)); @@ -497,6 +511,9 @@ dummy_func( macro(BINARY_OP_ADD_UNICODE) = _GUARD_BOTH_UNICODE + unused/1 + _BINARY_OP_ADD_UNICODE; + super(_GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE) = + _GUARD_BOTH_UNICODE + _BINARY_OP_ADD_UNICODE; + // This is a subtle one. It's a super-instruction for // BINARY_OP_ADD_UNICODE followed by STORE_FAST // where the store goes into the left argument. @@ -3155,6 +3172,15 @@ dummy_func( _SAVE_RETURN_OFFSET + _PUSH_FRAME; + super(_CALL_BOUND_METHOD_EXACT_ARGS) = + _CHECK_CALL_BOUND_METHOD_EXACT_ARGS + + _INIT_CALL_BOUND_METHOD_EXACT_ARGS + + _CHECK_FUNCTION_EXACT_ARGS + + _CHECK_STACK_SPACE + + _INIT_CALL_PY_EXACT_ARGS + + _SAVE_RETURN_OFFSET + + _PUSH_FRAME; + macro(CALL_PY_EXACT_ARGS) = unused/1 + // Skip over the counter _CHECK_PEP_523 + diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 20fab8f4c61eb5..b89dbbeb3c6d92 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3862,4 +3862,310 @@ break; } + case _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT: { + // _GUARD_BOTH_INT + { + PyObject *right; + PyObject *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (!PyLong_CheckExact(left)) goto side_exit; + if (!PyLong_CheckExact(right)) goto side_exit; + } + // _BINARY_OP_MULTIPLY_INT + { + PyObject *right; + PyObject *left; + PyObject *res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + STAT_INC(BINARY_OP, hit); + res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + if (res == NULL) goto pop_2_error_tier_two; + stack_pointer[-2] = res; + stack_pointer += -1; + } + break; + } + + case _GUARD_BOTH_INT__BINARY_OP_ADD_INT: { + // _GUARD_BOTH_INT + { + PyObject *right; + PyObject *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (!PyLong_CheckExact(left)) goto side_exit; + if (!PyLong_CheckExact(right)) goto side_exit; + } + // _BINARY_OP_ADD_INT + { + PyObject *right; + PyObject *left; + PyObject *res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + STAT_INC(BINARY_OP, hit); + res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + if (res == NULL) goto pop_2_error_tier_two; + stack_pointer[-2] = res; + stack_pointer += -1; + } + break; + } + + case _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT: { + // _GUARD_BOTH_INT + { + PyObject *right; + PyObject *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (!PyLong_CheckExact(left)) goto side_exit; + if (!PyLong_CheckExact(right)) goto side_exit; + } + // _BINARY_OP_SUBTRACT_INT + { + PyObject *right; + PyObject *left; + PyObject *res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + STAT_INC(BINARY_OP, hit); + res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); + _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); + if (res == NULL) goto pop_2_error_tier_two; + stack_pointer[-2] = res; + stack_pointer += -1; + } + break; + } + + case _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT: { + // _GUARD_BOTH_FLOAT + { + PyObject *right; + PyObject *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (!PyFloat_CheckExact(left)) goto side_exit; + if (!PyFloat_CheckExact(right)) goto side_exit; + } + // _BINARY_OP_MULTIPLY_FLOAT + { + PyObject *right; + PyObject *left; + PyObject *res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left)->ob_fval * + ((PyFloatObject *)right)->ob_fval; + DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); + stack_pointer[-2] = res; + stack_pointer += -1; + } + break; + } + + case _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT: { + // _GUARD_BOTH_FLOAT + { + PyObject *right; + PyObject *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (!PyFloat_CheckExact(left)) goto side_exit; + if (!PyFloat_CheckExact(right)) goto side_exit; + } + // _BINARY_OP_ADD_FLOAT + { + PyObject *right; + PyObject *left; + PyObject *res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left)->ob_fval + + ((PyFloatObject *)right)->ob_fval; + DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); + stack_pointer[-2] = res; + stack_pointer += -1; + } + break; + } + + case _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT: { + // _GUARD_BOTH_FLOAT + { + PyObject *right; + PyObject *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (!PyFloat_CheckExact(left)) goto side_exit; + if (!PyFloat_CheckExact(right)) goto side_exit; + } + // _BINARY_OP_SUBTRACT_FLOAT + { + PyObject *right; + PyObject *left; + PyObject *res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left)->ob_fval - + ((PyFloatObject *)right)->ob_fval; + DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); + stack_pointer[-2] = res; + stack_pointer += -1; + } + break; + } + + case _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE: { + // _GUARD_BOTH_UNICODE + { + PyObject *right; + PyObject *left; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if (!PyUnicode_CheckExact(left)) goto side_exit; + if (!PyUnicode_CheckExact(right)) goto side_exit; + } + // _BINARY_OP_ADD_UNICODE + { + PyObject *right; + PyObject *left; + PyObject *res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + STAT_INC(BINARY_OP, hit); + res = PyUnicode_Concat(left, right); + _Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); + _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); + if (res == NULL) goto pop_2_error_tier_two; + stack_pointer[-2] = res; + stack_pointer += -1; + } + break; + } + + case _CALL_BOUND_METHOD_EXACT_ARGS: { + // _CHECK_CALL_BOUND_METHOD_EXACT_ARGS + { + PyObject *null; + PyObject *callable; + oparg = CURRENT_OPARG(); + null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + if (null != NULL) goto deoptimize; + if (Py_TYPE(callable) != &PyMethod_Type) goto deoptimize; + } + // _INIT_CALL_BOUND_METHOD_EXACT_ARGS + { + PyObject *callable; + PyObject *func; + PyObject *self; + oparg = CURRENT_OPARG(); + callable = stack_pointer[-2 - oparg]; + STAT_INC(CALL, hit); + self = Py_NewRef(((PyMethodObject *)callable)->im_self); + stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS + func = Py_NewRef(((PyMethodObject *)callable)->im_func); + stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization + Py_DECREF(callable); + stack_pointer[-2 - oparg] = func; + stack_pointer[-1 - oparg] = self; + } + // _CHECK_FUNCTION_EXACT_ARGS + { + PyObject *self_or_null; + PyObject *callable; + oparg = CURRENT_OPARG(); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + } + // _CHECK_STACK_SPACE + { + PyObject *callable; + oparg = CURRENT_OPARG(); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + } + // _INIT_CALL_PY_EXACT_ARGS + { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = CURRENT_OPARG(); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + } + // _SAVE_RETURN_OFFSET + { + oparg = CURRENT_OPARG(); + #if TIER_ONE + frame->return_offset = (uint16_t)(next_instr - this_instr); + #endif + #if TIER_TWO + frame->return_offset = oparg; + #endif + } + // _PUSH_FRAME + { + _PyInterpreterFrame *new_frame; + new_frame = (_PyInterpreterFrame *)stack_pointer[-1]; + // Write it out explicitly because it's subtly different. + // Eventually this should be the only occurrence of this code. + assert(tstate->interp->eval_frame == NULL); + stack_pointer += -1; + _PyFrame_SetStackPointer(frame, stack_pointer); + new_frame->previous = frame; + CALL_STAT_INC(inlined_py_calls); + frame = tstate->current_frame = new_frame; + tstate->py_recursion_remaining--; + LOAD_SP(); + LOAD_IP(0); + #if LLTRACE && TIER_ONE + lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); + if (lltrace < 0) { + goto exit_unwind; + } + #endif + } + break; + } + #undef TIER_TWO diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index b0a15e6d87c2c6..2c0ee3063f5cb8 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -247,6 +247,7 @@ def dump(self, indent: str) -> None: class Analysis: instructions: dict[str, Instruction] uops: dict[str, Uop] + super_uops: dict[str, Instruction] families: dict[str, Family] pseudos: dict[str, PseudoInstruction] opmap: dict[str, int] @@ -633,6 +634,33 @@ def add_macro( add_instruction(macro.name, parts, instructions) +def add_super( + super: parser.Super, super_uops: dict[str, Instruction], uops: dict[str, Uop] +) -> None: + parts: list[Uop] = [] + for part in super.uops: + match part: + case parser.OpName(): + if part.name not in uops: + analysis_error(f"No Uop named {part.name}", super.tokens[0]) + parts.append(uops[part.name]) + case _: + assert False + assert parts + # All uop parts in a super-uop must respect the instruction format. + # 1. At most one operand can be used across all constituent uops (this can be repeated). + # 2. At most one oparg can be used across all constituent uops (we assume this, since it cannot be easily checked). + operand_uses = set() + for op in parts: + assert isinstance(op, Uop) + for cache_entry in op.caches: + if (cache_name := cache_entry.name) == "unused": + continue + operand_uses.add(cache_name) + if len(operand_uses) > 1: + analysis_error(f"Uop super {super.name}'s cache entry cannot fit in one operand.") + add_instruction(super.name, parts, super_uops) + def add_family( pfamily: parser.Family, instructions: dict[str, Instruction], @@ -754,6 +782,7 @@ def add_instruction(name: str) -> None: def analyze_forest(forest: list[parser.AstNode]) -> Analysis: instructions: dict[str, Instruction] = {} uops: dict[str, Uop] = {} + super_uops: dict[str, Instruction] = {} families: dict[str, Family] = {} pseudos: dict[str, PseudoInstruction] = {} for node in forest: @@ -766,6 +795,8 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis: add_op(node, uops) case parser.Macro(): pass + case parser.Super(): + pass case parser.Family(): pass case parser.Pseudo(): @@ -775,6 +806,8 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis: for node in forest: if isinstance(node, parser.Macro): add_macro(node, instructions, uops) + if isinstance(node, parser.Super): + add_super(node, super_uops, uops) for node in forest: match node: case parser.Family(): @@ -804,7 +837,7 @@ def analyze_forest(forest: list[parser.AstNode]) -> Analysis: families["BINARY_OP"].members.append(inst) opmap, first_arg, min_instrumented = assign_opcodes(instructions, families, pseudos) return Analysis( - instructions, uops, families, pseudos, opmap, first_arg, min_instrumented + instructions, uops, super_uops, families, pseudos, opmap, first_arg, min_instrumented ) diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index 13aee94f2b957c..8e73bb0c377832 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -213,6 +213,9 @@ def choice(*opts: str) -> str: # A macro in the DSL MACRO = "MACRO" kwds.append(MACRO) +SUPER = "SUPER" +kwds.append(SUPER) + keywords = {name.lower(): name for name in kwds} ANNOTATION = "ANNOTATION" diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index 2b77d14d21143f..4e8687c095d85f 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -1,6 +1,7 @@ from parsing import ( InstDef, Macro, + Super, Pseudo, Family, Parser, diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 0d54820e4e71fb..c6286d3630c50e 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -135,6 +135,12 @@ class Family(Node): members: list[str] +@dataclass +class Super(Node): + name: str + uops: list[UOp] + + @dataclass class Pseudo(Node): name: str @@ -142,7 +148,7 @@ class Pseudo(Node): targets: list[str] # opcodes this can be replaced by -AstNode = InstDef | Macro | Pseudo | Family +AstNode = InstDef | Macro | Super | Pseudo | Family class Parser(PLexer): @@ -150,6 +156,8 @@ class Parser(PLexer): def definition(self) -> AstNode | None: if macro := self.macro_def(): return macro + if super := self.super_def(): + return super if family := self.family_def(): return family if pseudo := self.pseudo_def(): @@ -332,6 +340,19 @@ def macro_def(self) -> Macro | None: return res return None + @contextual + def super_def(self) -> Super | None: + if tkn := self.expect(lx.SUPER): + if self.expect(lx.LPAREN): + if tkn := self.expect(lx.IDENTIFIER): + if self.expect(lx.RPAREN): + if self.expect(lx.EQUALS): + if uops := self.uops(): + self.require(lx.SEMI) + res = Super(tkn.text, uops) + return res + return None + def uops(self) -> list[UOp] | None: if uop := self.uop(): uop = cast(UOp, uop) diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index d8eed1078b0914..3e81ad3f00b99f 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -217,6 +217,21 @@ def generate_tier2( out.start_line() out.emit("}") out.emit("\n\n") + for name, super_uop in analysis.super_uops.items(): + out.emit(f"case {name}: {{\n") + stack = Stack() + for part in super_uop.parts: + out.emit(f"// {part.name}\n") + out.emit("{\n") + declare_variables(part, out) + write_uop(part, out, stack) + stack.flush(out) + out.emit("}\n") + out.start_line() + out.emit("break;\n") + out.start_line() + out.emit("}") + out.emit("\n\n") outfile.write("#undef TIER_TWO\n") From 4e6a2bd8ad0313f208ca5c89d6680cd52394bf75 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 1 Mar 2024 05:00:27 +0800 Subject: [PATCH 02/14] generate the matcher --- Include/internal/pycore_uop_ids.h | 10 +- Python/uop_super_matcher_cases.c.h | 137 ++++++++++++++++++ .../tier2_super_matcher_generator.py | 114 +++++++++++++++ Tools/cases_generator/uop_id_generator.py | 4 + 4 files changed, 264 insertions(+), 1 deletion(-) create mode 100644 Python/uop_super_matcher_cases.c.h create mode 100644 Tools/cases_generator/tier2_super_matcher_generator.py diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 3c133d97b2f03e..4b7da84a81885e 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -267,7 +267,15 @@ extern "C" { #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START -#define MAX_UOP_ID 422 +#define _CALL_BOUND_METHOD_EXACT_ARGS 423 +#define _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT 424 +#define _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT 425 +#define _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT 426 +#define _GUARD_BOTH_INT__BINARY_OP_ADD_INT 427 +#define _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT 428 +#define _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT 429 +#define _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE 430 +#define MAX_UOP_ID 430 #ifdef __cplusplus } diff --git a/Python/uop_super_matcher_cases.c.h b/Python/uop_super_matcher_cases.c.h new file mode 100644 index 00000000000000..3e7969c056aa00 --- /dev/null +++ b/Python/uop_super_matcher_cases.c.h @@ -0,0 +1,137 @@ +// This file is generated by Tools/cases_generator/tier2_super_matcher_generator.py +// from: +// Python/bytecodes.c +// Do not edit! + +#ifdef TIER_ONE + #error "This file is for Tier 2 only" +#endif +#define TIER_TWO 2 + + case _GUARD_BOTH_INT: { + next_instr++; + switch (next_instr->opcode) { + case _BINARY_OP_MULTIPLY_INT: { + REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT, this_instr[0]->oparg, this_instr[0]->operand); + next_instr++; + break; + } + } + break; + } + + case _GUARD_BOTH_INT: { + next_instr++; + switch (next_instr->opcode) { + case _BINARY_OP_ADD_INT: { + REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_ADD_INT, this_instr[0]->oparg, this_instr[0]->operand); + next_instr++; + break; + } + } + break; + } + + case _GUARD_BOTH_INT: { + next_instr++; + switch (next_instr->opcode) { + case _BINARY_OP_SUBTRACT_INT: { + REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT, this_instr[0]->oparg, this_instr[0]->operand); + next_instr++; + break; + } + } + break; + } + + case _GUARD_BOTH_FLOAT: { + next_instr++; + switch (next_instr->opcode) { + case _BINARY_OP_MULTIPLY_FLOAT: { + REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT, this_instr[0]->oparg, this_instr[0]->operand); + next_instr++; + break; + } + } + break; + } + + case _GUARD_BOTH_FLOAT: { + next_instr++; + switch (next_instr->opcode) { + case _BINARY_OP_ADD_FLOAT: { + REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT, this_instr[0]->oparg, this_instr[0]->operand); + next_instr++; + break; + } + } + break; + } + + case _GUARD_BOTH_FLOAT: { + next_instr++; + switch (next_instr->opcode) { + case _BINARY_OP_SUBTRACT_FLOAT: { + REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT, this_instr[0]->oparg, this_instr[0]->operand); + next_instr++; + break; + } + } + break; + } + + case _GUARD_BOTH_UNICODE: { + next_instr++; + switch (next_instr->opcode) { + case _BINARY_OP_ADD_UNICODE: { + REPLACE_OP(this_instr, _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE, this_instr[0]->oparg, this_instr[0]->operand); + next_instr++; + break; + } + } + break; + } + + case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { + next_instr++; + switch (next_instr->opcode) { + case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { + next_instr++; + switch (next_instr->opcode) { + case _CHECK_FUNCTION_EXACT_ARGS: { + next_instr++; + switch (next_instr->opcode) { + case _CHECK_STACK_SPACE: { + next_instr++; + switch (next_instr->opcode) { + case _INIT_CALL_PY_EXACT_ARGS: { + next_instr++; + switch (next_instr->opcode) { + case _SAVE_RETURN_OFFSET: { + next_instr++; + switch (next_instr->opcode) { + case _PUSH_FRAME: { + REPLACE_OP(this_instr, _CALL_BOUND_METHOD_EXACT_ARGS, this_instr[5]->oparg, this_instr[2]->operand); + next_instr++; + break; + } + } + break; + } + } + break; + } + } + break; + } + } + break; + } + } + break; + } + } + break; + } + +#undef TIER_TWO diff --git a/Tools/cases_generator/tier2_super_matcher_generator.py b/Tools/cases_generator/tier2_super_matcher_generator.py new file mode 100644 index 00000000000000..fdde71daa53afc --- /dev/null +++ b/Tools/cases_generator/tier2_super_matcher_generator.py @@ -0,0 +1,114 @@ +"""Generate the cases for the matcher of the tier 2 super instructions. +""" + +import argparse +import os.path +import sys + +from analyzer import ( + Analysis, + Instruction, + Uop, + Part, + analyze_files, + Skip, + StackItem, + analysis_error, +) +from generators_common import ( + DEFAULT_INPUT, + ROOT, + write_header, + emit_tokens, + emit_to, + REPLACEMENT_FUNCTIONS, +) +from cwriter import CWriter +from typing import TextIO, Iterator +from lexer import Token +from stack import StackOffset, Stack, SizeMismatch + +DEFAULT_OUTPUT = ROOT / "Python/uop_super_matcher_cases.c.h" + + +def generate_tier2( + filenames: list[str], analysis: Analysis, outfile: TextIO, lines: bool +) -> None: + write_header(__file__, filenames, outfile) + outfile.write( + """ +#ifdef TIER_ONE + #error "This file is for Tier 2 only" +#endif +#define TIER_TWO 2 +""" + ) + out = CWriter(outfile, 2, lines) + out.emit("\n") + for name, super_uop in analysis.super_uops.items(): + middle_uops = super_uop.parts[1:-1] + first_uop = super_uop.parts[0] + last_uop = super_uop.parts[-1] + + oparg = 0 + operand = 0 + for idx, part in enumerate(super_uop.parts): + if part.properties.oparg: + oparg = idx + if len(part.caches) > 0 and part.caches[0] != "unused": + operand = idx + + out.emit(f"case {first_uop.name}: ") + out.emit("{\n") + out.emit("next_instr++;\n") + + for part in middle_uops: + out.emit("switch (next_instr->opcode) {\n") + out.emit(f"case {part.name}: ") + out.emit("{\n") + out.emit("next_instr++;\n") + out.emit("switch (next_instr->opcode) {\n") + out.emit(f"case {last_uop.name}: ") + out.emit("{\n") + out.emit(f"REPLACE_OP(this_instr, {name}, this_instr[{oparg}]->oparg, this_instr[{operand}]->operand);\n") + out.emit("next_instr++;\n") + out.emit("break;\n") + out.emit("}\n") + out.emit("}\n") + for part in middle_uops: + out.start_line() + out.emit("break;\n") + out.start_line() + out.emit("}\n") + out.emit("}\n") + out.start_line() + out.emit("break;\n") + out.emit("}") + out.emit("\n\n") + outfile.write("#undef TIER_TWO\n") + + +arg_parser = argparse.ArgumentParser( + description="Generate the code for the tier 2 interpreter.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) + +arg_parser.add_argument( + "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT +) + +arg_parser.add_argument( + "-l", "--emit-line-directives", help="Emit #line directives", action="store_true" +) + +arg_parser.add_argument( + "input", nargs=argparse.REMAINDER, help="Instruction definition file(s)" +) + +if __name__ == "__main__": + args = arg_parser.parse_args() + if len(args.input) == 0: + args.input.append(DEFAULT_INPUT) + data = analyze_files(args.input) + with open(args.output, "w") as outfile: + generate_tier2(args.input, data, outfile, args.emit_line_directives) diff --git a/Tools/cases_generator/uop_id_generator.py b/Tools/cases_generator/uop_id_generator.py index eb5e3f4a324735..74c75356f0fe3f 100644 --- a/Tools/cases_generator/uop_id_generator.py +++ b/Tools/cases_generator/uop_id_generator.py @@ -51,6 +51,10 @@ def generate_uop_ids( out.emit(f"#define {name} {next_id}\n") next_id += 1 + for name, uop in sorted([(uop.name, uop) for uop in analysis.super_uops.values()]): + out.emit(f"#define {name} {next_id}\n") + next_id += 1 + out.emit(f"#define MAX_UOP_ID {next_id-1}\n") From 99e88fd749d6bf5d80cbe856d053f00f3ca97e0c Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 1 Mar 2024 06:12:55 +0800 Subject: [PATCH 03/14] generate the matcher --- Include/internal/pycore_uop_ids.h | 158 +++++++------- Include/internal/pycore_uop_metadata.h | 20 +- Lib/test/test_capi/test_opt.py | 4 +- Python/bytecodes.c | 11 +- Python/executor_cases.c.h | 202 ++++-------------- Python/optimizer_analysis.c | 15 ++ Python/uop_super_matcher_cases.c.h | 144 +++++-------- .../tier2_super_matcher_generator.py | 72 ++++--- .../cases_generator/uop_metadata_generator.py | 2 + 9 files changed, 253 insertions(+), 375 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 4b7da84a81885e..08dcdcf04bd2ee 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -119,11 +119,6 @@ extern "C" { #define _GUARD_TYPE_VERSION 348 #define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 349 #define _INIT_CALL_PY_EXACT_ARGS 350 -#define _INIT_CALL_PY_EXACT_ARGS_0 351 -#define _INIT_CALL_PY_EXACT_ARGS_1 352 -#define _INIT_CALL_PY_EXACT_ARGS_2 353 -#define _INIT_CALL_PY_EXACT_ARGS_3 354 -#define _INIT_CALL_PY_EXACT_ARGS_4 355 #define _INSTRUMENTED_CALL INSTRUMENTED_CALL #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW @@ -140,65 +135,65 @@ extern "C" { #define _INSTRUMENTED_RETURN_CONST INSTRUMENTED_RETURN_CONST #define _INSTRUMENTED_RETURN_VALUE INSTRUMENTED_RETURN_VALUE #define _INSTRUMENTED_YIELD_VALUE INSTRUMENTED_YIELD_VALUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 356 -#define _IS_NONE 357 +#define _INTERNAL_INCREMENT_OPT_COUNTER 351 +#define _IS_NONE 352 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 358 -#define _ITER_CHECK_RANGE 359 -#define _ITER_CHECK_TUPLE 360 -#define _ITER_JUMP_LIST 361 -#define _ITER_JUMP_RANGE 362 -#define _ITER_JUMP_TUPLE 363 -#define _ITER_NEXT_LIST 364 -#define _ITER_NEXT_RANGE 365 -#define _ITER_NEXT_TUPLE 366 -#define _JUMP_TO_TOP 367 +#define _ITER_CHECK_LIST 353 +#define _ITER_CHECK_RANGE 354 +#define _ITER_CHECK_TUPLE 355 +#define _ITER_JUMP_LIST 356 +#define _ITER_JUMP_RANGE 357 +#define _ITER_JUMP_TUPLE 358 +#define _ITER_NEXT_LIST 359 +#define _ITER_NEXT_RANGE 360 +#define _ITER_NEXT_TUPLE 361 +#define _JUMP_TO_TOP 362 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND #define _LOAD_ASSERTION_ERROR LOAD_ASSERTION_ERROR -#define _LOAD_ATTR 368 -#define _LOAD_ATTR_CLASS 369 -#define _LOAD_ATTR_CLASS_0 370 -#define _LOAD_ATTR_CLASS_1 371 +#define _LOAD_ATTR 363 +#define _LOAD_ATTR_CLASS 364 +#define _LOAD_ATTR_CLASS_0 365 +#define _LOAD_ATTR_CLASS_1 366 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 372 -#define _LOAD_ATTR_INSTANCE_VALUE_0 373 -#define _LOAD_ATTR_INSTANCE_VALUE_1 374 -#define _LOAD_ATTR_METHOD_LAZY_DICT 375 -#define _LOAD_ATTR_METHOD_NO_DICT 376 -#define _LOAD_ATTR_METHOD_WITH_VALUES 377 -#define _LOAD_ATTR_MODULE 378 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 379 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 380 +#define _LOAD_ATTR_INSTANCE_VALUE 367 +#define _LOAD_ATTR_INSTANCE_VALUE_0 368 +#define _LOAD_ATTR_INSTANCE_VALUE_1 369 +#define _LOAD_ATTR_METHOD_LAZY_DICT 370 +#define _LOAD_ATTR_METHOD_NO_DICT 371 +#define _LOAD_ATTR_METHOD_WITH_VALUES 372 +#define _LOAD_ATTR_MODULE 373 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 374 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 375 #define _LOAD_ATTR_PROPERTY LOAD_ATTR_PROPERTY -#define _LOAD_ATTR_SLOT 381 -#define _LOAD_ATTR_SLOT_0 382 -#define _LOAD_ATTR_SLOT_1 383 -#define _LOAD_ATTR_WITH_HINT 384 +#define _LOAD_ATTR_SLOT 376 +#define _LOAD_ATTR_SLOT_0 377 +#define _LOAD_ATTR_SLOT_1 378 +#define _LOAD_ATTR_WITH_HINT 379 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 385 -#define _LOAD_CONST_INLINE_BORROW 386 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 387 -#define _LOAD_CONST_INLINE_WITH_NULL 388 +#define _LOAD_CONST_INLINE 380 +#define _LOAD_CONST_INLINE_BORROW 381 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 382 +#define _LOAD_CONST_INLINE_WITH_NULL 383 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 389 -#define _LOAD_FAST_0 390 -#define _LOAD_FAST_1 391 -#define _LOAD_FAST_2 392 -#define _LOAD_FAST_3 393 -#define _LOAD_FAST_4 394 -#define _LOAD_FAST_5 395 -#define _LOAD_FAST_6 396 -#define _LOAD_FAST_7 397 +#define _LOAD_FAST 384 +#define _LOAD_FAST_0 385 +#define _LOAD_FAST_1 386 +#define _LOAD_FAST_2 387 +#define _LOAD_FAST_3 388 +#define _LOAD_FAST_4 389 +#define _LOAD_FAST_5 390 +#define _LOAD_FAST_6 391 +#define _LOAD_FAST_7 392 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 398 -#define _LOAD_GLOBAL_BUILTINS 399 -#define _LOAD_GLOBAL_MODULE 400 +#define _LOAD_GLOBAL 393 +#define _LOAD_GLOBAL_BUILTINS 394 +#define _LOAD_GLOBAL_MODULE 395 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR @@ -212,46 +207,46 @@ extern "C" { #define _MATCH_SEQUENCE MATCH_SEQUENCE #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_FRAME 401 -#define _POP_JUMP_IF_FALSE 402 -#define _POP_JUMP_IF_TRUE 403 +#define _POP_FRAME 396 +#define _POP_JUMP_IF_FALSE 397 +#define _POP_JUMP_IF_TRUE 398 #define _POP_TOP POP_TOP #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 404 +#define _PUSH_FRAME 399 #define _PUSH_NULL PUSH_NULL #define _RESUME_CHECK RESUME_CHECK -#define _SAVE_RETURN_OFFSET 405 -#define _SEND 406 +#define _SAVE_RETURN_OFFSET 400 +#define _SEND 401 #define _SEND_GEN SEND_GEN #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 407 -#define _STORE_ATTR 408 -#define _STORE_ATTR_INSTANCE_VALUE 409 -#define _STORE_ATTR_SLOT 410 +#define _START_EXECUTOR 402 +#define _STORE_ATTR 403 +#define _STORE_ATTR_INSTANCE_VALUE 404 +#define _STORE_ATTR_SLOT 405 #define _STORE_ATTR_WITH_HINT STORE_ATTR_WITH_HINT #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 411 -#define _STORE_FAST_0 412 -#define _STORE_FAST_1 413 -#define _STORE_FAST_2 414 -#define _STORE_FAST_3 415 -#define _STORE_FAST_4 416 -#define _STORE_FAST_5 417 -#define _STORE_FAST_6 418 -#define _STORE_FAST_7 419 +#define _STORE_FAST 406 +#define _STORE_FAST_0 407 +#define _STORE_FAST_1 408 +#define _STORE_FAST_2 409 +#define _STORE_FAST_3 410 +#define _STORE_FAST_4 411 +#define _STORE_FAST_5 412 +#define _STORE_FAST_6 413 +#define _STORE_FAST_7 414 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 420 +#define _STORE_SUBSCR 415 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TO_BOOL 421 +#define _TO_BOOL 416 #define _TO_BOOL_ALWAYS_TRUE TO_BOOL_ALWAYS_TRUE #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT @@ -262,20 +257,21 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 422 +#define _UNPACK_SEQUENCE 417 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START -#define _CALL_BOUND_METHOD_EXACT_ARGS 423 -#define _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT 424 -#define _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT 425 -#define _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT 426 -#define _GUARD_BOTH_INT__BINARY_OP_ADD_INT 427 -#define _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT 428 -#define _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT 429 -#define _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE 430 -#define MAX_UOP_ID 430 +#define _CALL_BOUND_METHOD_EXACT_ARGS 418 +#define _CHECK_CALL_PY_EXACT_ARGS 419 +#define _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT 420 +#define _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT 421 +#define _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT 422 +#define _GUARD_BOTH_INT__BINARY_OP_ADD_INT 423 +#define _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT 424 +#define _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT 425 +#define _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE 426 +#define MAX_UOP_ID 426 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 35340fe9ee1b63..314679d43b992c 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -191,11 +191,6 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_PEP_523] = HAS_DEOPT_FLAG, [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, - [_INIT_CALL_PY_EXACT_ARGS_0] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_INIT_CALL_PY_EXACT_ARGS_1] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_INIT_CALL_PY_EXACT_ARGS_2] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_INIT_CALL_PY_EXACT_ARGS_3] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_INIT_CALL_PY_EXACT_ARGS_4] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_PUSH_FRAME] = HAS_ESCAPES_FLAG, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, @@ -246,7 +241,6 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = { [_LOAD_FAST] = 8, [_STORE_FAST] = 8, - [_INIT_CALL_PY_EXACT_ARGS] = 5, }; const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { @@ -351,11 +345,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION", [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS", [_INIT_CALL_PY_EXACT_ARGS] = "_INIT_CALL_PY_EXACT_ARGS", - [_INIT_CALL_PY_EXACT_ARGS_0] = "_INIT_CALL_PY_EXACT_ARGS_0", - [_INIT_CALL_PY_EXACT_ARGS_1] = "_INIT_CALL_PY_EXACT_ARGS_1", - [_INIT_CALL_PY_EXACT_ARGS_2] = "_INIT_CALL_PY_EXACT_ARGS_2", - [_INIT_CALL_PY_EXACT_ARGS_3] = "_INIT_CALL_PY_EXACT_ARGS_3", - [_INIT_CALL_PY_EXACT_ARGS_4] = "_INIT_CALL_PY_EXACT_ARGS_4", [_INTERNAL_INCREMENT_OPT_COUNTER] = "_INTERNAL_INCREMENT_OPT_COUNTER", [_IS_NONE] = "_IS_NONE", [_IS_OP] = "_IS_OP", @@ -474,6 +463,15 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_UNPACK_SEQUENCE_TUPLE] = "_UNPACK_SEQUENCE_TUPLE", [_UNPACK_SEQUENCE_TWO_TUPLE] = "_UNPACK_SEQUENCE_TWO_TUPLE", [_WITH_EXCEPT_START] = "_WITH_EXCEPT_START", + [_CALL_BOUND_METHOD_EXACT_ARGS] = "_CALL_BOUND_METHOD_EXACT_ARGS", + [_CHECK_CALL_PY_EXACT_ARGS] = "_CHECK_CALL_PY_EXACT_ARGS", + [_GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT] = "_GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT", + [_GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT] = "_GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT", + [_GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT] = "_GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT", + [_GUARD_BOTH_INT__BINARY_OP_ADD_INT] = "_GUARD_BOTH_INT__BINARY_OP_ADD_INT", + [_GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT] = "_GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT", + [_GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT] = "_GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT", + [_GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE] = "_GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE", }; #endif // NEED_OPCODE_METADATA diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 25fc36dec93ddc..04345b5a9158c2 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -394,7 +394,7 @@ def testfunc(n): uops = get_opnames(ex) # Since there is no JUMP_FORWARD instruction, # look for indirect evidence: the += operator - self.assertIn("_BINARY_OP_ADD_INT", uops) + self.assertIn("_GUARD_BOTH_INT__BINARY_OP_ADD_INT", uops) def test_for_iter_range(self): def testfunc(n): @@ -490,7 +490,7 @@ def dummy(x): self.assertIsNotNone(ex) uops = get_opnames(ex) self.assertIn("_PUSH_FRAME", uops) - self.assertIn("_BINARY_OP_ADD_INT", uops) + self.assertIn("_GUARD_BOTH_INT__BINARY_OP_ADD_INT", uops) def test_branch_taken(self): def testfunc(n): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a3eb96860fef45..cbcef91e9854e7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3124,7 +3124,7 @@ dummy_func( DEOPT_IF(tstate->py_recursion_remaining <= 1); } - replicate(5) pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { + pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { int argcount = oparg; if (self_or_null != NULL) { args--; @@ -3177,9 +3177,7 @@ dummy_func( _INIT_CALL_BOUND_METHOD_EXACT_ARGS + _CHECK_FUNCTION_EXACT_ARGS + _CHECK_STACK_SPACE + - _INIT_CALL_PY_EXACT_ARGS + - _SAVE_RETURN_OFFSET + - _PUSH_FRAME; + _INIT_CALL_PY_EXACT_ARGS; macro(CALL_PY_EXACT_ARGS) = unused/1 + // Skip over the counter @@ -3190,6 +3188,11 @@ dummy_func( _SAVE_RETURN_OFFSET + _PUSH_FRAME; + super(_CHECK_CALL_PY_EXACT_ARGS) = + _CHECK_FUNCTION_EXACT_ARGS + + _CHECK_STACK_SPACE + + _INIT_CALL_PY_EXACT_ARGS; + inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) { DEOPT_IF(tstate->interp->eval_frame); int argcount = oparg; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b89dbbeb3c6d92..265bb3847a932c 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2842,136 +2842,6 @@ break; } - case _INIT_CALL_PY_EXACT_ARGS_0: { - PyObject **args; - PyObject *self_or_null; - PyObject *callable; - _PyInterpreterFrame *new_frame; - oparg = 0; - assert(oparg == CURRENT_OPARG()); - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; - int argcount = oparg; - if (self_or_null != NULL) { - args--; - argcount++; - } - STAT_INC(CALL, hit); - PyFunctionObject *func = (PyFunctionObject *)callable; - new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); - for (int i = 0; i < argcount; i++) { - new_frame->localsplus[i] = args[i]; - } - stack_pointer[-2 - oparg] = (PyObject *)new_frame; - stack_pointer += -1 - oparg; - break; - } - - case _INIT_CALL_PY_EXACT_ARGS_1: { - PyObject **args; - PyObject *self_or_null; - PyObject *callable; - _PyInterpreterFrame *new_frame; - oparg = 1; - assert(oparg == CURRENT_OPARG()); - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; - int argcount = oparg; - if (self_or_null != NULL) { - args--; - argcount++; - } - STAT_INC(CALL, hit); - PyFunctionObject *func = (PyFunctionObject *)callable; - new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); - for (int i = 0; i < argcount; i++) { - new_frame->localsplus[i] = args[i]; - } - stack_pointer[-2 - oparg] = (PyObject *)new_frame; - stack_pointer += -1 - oparg; - break; - } - - case _INIT_CALL_PY_EXACT_ARGS_2: { - PyObject **args; - PyObject *self_or_null; - PyObject *callable; - _PyInterpreterFrame *new_frame; - oparg = 2; - assert(oparg == CURRENT_OPARG()); - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; - int argcount = oparg; - if (self_or_null != NULL) { - args--; - argcount++; - } - STAT_INC(CALL, hit); - PyFunctionObject *func = (PyFunctionObject *)callable; - new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); - for (int i = 0; i < argcount; i++) { - new_frame->localsplus[i] = args[i]; - } - stack_pointer[-2 - oparg] = (PyObject *)new_frame; - stack_pointer += -1 - oparg; - break; - } - - case _INIT_CALL_PY_EXACT_ARGS_3: { - PyObject **args; - PyObject *self_or_null; - PyObject *callable; - _PyInterpreterFrame *new_frame; - oparg = 3; - assert(oparg == CURRENT_OPARG()); - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; - int argcount = oparg; - if (self_or_null != NULL) { - args--; - argcount++; - } - STAT_INC(CALL, hit); - PyFunctionObject *func = (PyFunctionObject *)callable; - new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); - for (int i = 0; i < argcount; i++) { - new_frame->localsplus[i] = args[i]; - } - stack_pointer[-2 - oparg] = (PyObject *)new_frame; - stack_pointer += -1 - oparg; - break; - } - - case _INIT_CALL_PY_EXACT_ARGS_4: { - PyObject **args; - PyObject *self_or_null; - PyObject *callable; - _PyInterpreterFrame *new_frame; - oparg = 4; - assert(oparg == CURRENT_OPARG()); - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; - int argcount = oparg; - if (self_or_null != NULL) { - args--; - argcount++; - } - STAT_INC(CALL, hit); - PyFunctionObject *func = (PyFunctionObject *)callable; - new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); - for (int i = 0; i < argcount; i++) { - new_frame->localsplus[i] = args[i]; - } - stack_pointer[-2 - oparg] = (PyObject *)new_frame; - stack_pointer += -1 - oparg; - break; - } - case _INIT_CALL_PY_EXACT_ARGS: { PyObject **args; PyObject *self_or_null; @@ -4133,37 +4003,57 @@ stack_pointer[-2 - oparg] = (PyObject *)new_frame; stack_pointer += -1 - oparg; } - // _SAVE_RETURN_OFFSET + break; + } + + case _CHECK_CALL_PY_EXACT_ARGS: { + // _CHECK_FUNCTION_EXACT_ARGS { + PyObject *self_or_null; + PyObject *callable; oparg = CURRENT_OPARG(); - #if TIER_ONE - frame->return_offset = (uint16_t)(next_instr - this_instr); - #endif - #if TIER_TWO - frame->return_offset = oparg; - #endif - } - // _PUSH_FRAME + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + } + // _CHECK_STACK_SPACE + { + PyObject *callable; + oparg = CURRENT_OPARG(); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + } + // _INIT_CALL_PY_EXACT_ARGS { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; _PyInterpreterFrame *new_frame; - new_frame = (_PyInterpreterFrame *)stack_pointer[-1]; - // Write it out explicitly because it's subtly different. - // Eventually this should be the only occurrence of this code. - assert(tstate->interp->eval_frame == NULL); - stack_pointer += -1; - _PyFrame_SetStackPointer(frame, stack_pointer); - new_frame->previous = frame; - CALL_STAT_INC(inlined_py_calls); - frame = tstate->current_frame = new_frame; - tstate->py_recursion_remaining--; - LOAD_SP(); - LOAD_IP(0); - #if LLTRACE && TIER_ONE - lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); - if (lltrace < 0) { - goto exit_unwind; + oparg = CURRENT_OPARG(); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; } - #endif + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; } break; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8e408ffbb1c2b5..eb3f41b2cfcc2d 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -486,6 +486,20 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s } } +static void +match_supers(_PyUOpInstruction *buffer, int buffer_size) +{ + _PyUOpInstruction *end = buffer + buffer_size; + _PyUOpInstruction *this_instr = buffer; + while (this_instr < end) { + switch(this_instr->opcode) { +#include "uop_super_matcher_cases.c.h" + default: + this_instr++; + } + } +} + // 0 - failure, no error raised, just fall back to Tier 1 // -1 - failure, and raise error // 1 - optimizer success @@ -523,6 +537,7 @@ _Py_uop_analyze_and_optimize( assert(err == 1); remove_unneeded_uops(buffer, buffer_size); + match_supers(buffer, buffer_size); OPT_STAT_INC(optimizer_successes); return 1; diff --git a/Python/uop_super_matcher_cases.c.h b/Python/uop_super_matcher_cases.c.h index 3e7969c056aa00..9245b62a4de368 100644 --- a/Python/uop_super_matcher_cases.c.h +++ b/Python/uop_super_matcher_cases.c.h @@ -9,118 +9,66 @@ #define TIER_TWO 2 case _GUARD_BOTH_INT: { - next_instr++; - switch (next_instr->opcode) { - case _BINARY_OP_MULTIPLY_INT: { - REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT, this_instr[0]->oparg, this_instr[0]->operand); - next_instr++; - break; - } + if (this_instr[1].opcode == _BINARY_OP_MULTIPLY_INT) { + DPRINTF(2, "Inserting super _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT\n");REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + break; } - break; - } - - case _GUARD_BOTH_INT: { - next_instr++; - switch (next_instr->opcode) { - case _BINARY_OP_ADD_INT: { - REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_ADD_INT, this_instr[0]->oparg, this_instr[0]->operand); - next_instr++; - break; - } + if (this_instr[1].opcode == _BINARY_OP_ADD_INT) { + DPRINTF(2, "Inserting super _GUARD_BOTH_INT__BINARY_OP_ADD_INT\n");REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_ADD_INT, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + break; } - break; - } - - case _GUARD_BOTH_INT: { - next_instr++; - switch (next_instr->opcode) { - case _BINARY_OP_SUBTRACT_INT: { - REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT, this_instr[0]->oparg, this_instr[0]->operand); - next_instr++; - break; - } + if (this_instr[1].opcode == _BINARY_OP_SUBTRACT_INT) { + DPRINTF(2, "Inserting super _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT\n");REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + break; } + this_instr += 1; break; } case _GUARD_BOTH_FLOAT: { - next_instr++; - switch (next_instr->opcode) { - case _BINARY_OP_MULTIPLY_FLOAT: { - REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT, this_instr[0]->oparg, this_instr[0]->operand); - next_instr++; - break; - } + if (this_instr[1].opcode == _BINARY_OP_MULTIPLY_FLOAT) { + DPRINTF(2, "Inserting super _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT\n");REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + break; } - break; - } - - case _GUARD_BOTH_FLOAT: { - next_instr++; - switch (next_instr->opcode) { - case _BINARY_OP_ADD_FLOAT: { - REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT, this_instr[0]->oparg, this_instr[0]->operand); - next_instr++; - break; - } + if (this_instr[1].opcode == _BINARY_OP_ADD_FLOAT) { + DPRINTF(2, "Inserting super _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT\n");REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + break; } - break; - } - - case _GUARD_BOTH_FLOAT: { - next_instr++; - switch (next_instr->opcode) { - case _BINARY_OP_SUBTRACT_FLOAT: { - REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT, this_instr[0]->oparg, this_instr[0]->operand); - next_instr++; - break; - } + if (this_instr[1].opcode == _BINARY_OP_SUBTRACT_FLOAT) { + DPRINTF(2, "Inserting super _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT\n");REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + break; } + this_instr += 1; break; } case _GUARD_BOTH_UNICODE: { - next_instr++; - switch (next_instr->opcode) { - case _BINARY_OP_ADD_UNICODE: { - REPLACE_OP(this_instr, _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE, this_instr[0]->oparg, this_instr[0]->operand); - next_instr++; - break; - } + if (this_instr[1].opcode == _BINARY_OP_ADD_UNICODE) { + DPRINTF(2, "Inserting super _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE\n");REPLACE_OP(this_instr, _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + break; } + this_instr += 1; break; } case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { - next_instr++; - switch (next_instr->opcode) { + switch (this_instr[1].opcode) { case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { - next_instr++; - switch (next_instr->opcode) { + switch (this_instr[2].opcode) { case _CHECK_FUNCTION_EXACT_ARGS: { - next_instr++; - switch (next_instr->opcode) { + switch (this_instr[3].opcode) { case _CHECK_STACK_SPACE: { - next_instr++; - switch (next_instr->opcode) { - case _INIT_CALL_PY_EXACT_ARGS: { - next_instr++; - switch (next_instr->opcode) { - case _SAVE_RETURN_OFFSET: { - next_instr++; - switch (next_instr->opcode) { - case _PUSH_FRAME: { - REPLACE_OP(this_instr, _CALL_BOUND_METHOD_EXACT_ARGS, this_instr[5]->oparg, this_instr[2]->operand); - next_instr++; - break; - } - } - break; - } - } - break; - } + if (this_instr[4].opcode == _INIT_CALL_PY_EXACT_ARGS) { + DPRINTF(2, "Inserting super _CALL_BOUND_METHOD_EXACT_ARGS\n");REPLACE_OP(this_instr, _CALL_BOUND_METHOD_EXACT_ARGS, this_instr[4].oparg, this_instr[2].operand); + for (int i = 1; i < 5; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 5; + break; } break; } @@ -131,6 +79,22 @@ break; } } + this_instr += 1; + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS: { + switch (this_instr[1].opcode) { + case _CHECK_STACK_SPACE: { + if (this_instr[2].opcode == _INIT_CALL_PY_EXACT_ARGS) { + DPRINTF(2, "Inserting super _CHECK_CALL_PY_EXACT_ARGS\n");REPLACE_OP(this_instr, _CHECK_CALL_PY_EXACT_ARGS, this_instr[2].oparg, this_instr[0].operand); + for (int i = 1; i < 3; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 3; + break; + } + break; + } + } + this_instr += 1; break; } diff --git a/Tools/cases_generator/tier2_super_matcher_generator.py b/Tools/cases_generator/tier2_super_matcher_generator.py index fdde71daa53afc..77164fb9c77b73 100644 --- a/Tools/cases_generator/tier2_super_matcher_generator.py +++ b/Tools/cases_generator/tier2_super_matcher_generator.py @@ -45,46 +45,56 @@ def generate_tier2( ) out = CWriter(outfile, 2, lines) out.emit("\n") + first_uops: dict[str, list[Instruction]] = {} + # Extract out common first uops for name, super_uop in analysis.super_uops.items(): - middle_uops = super_uop.parts[1:-1] first_uop = super_uop.parts[0] - last_uop = super_uop.parts[-1] - - oparg = 0 - operand = 0 - for idx, part in enumerate(super_uop.parts): - if part.properties.oparg: - oparg = idx - if len(part.caches) > 0 and part.caches[0] != "unused": - operand = idx - - out.emit(f"case {first_uop.name}: ") + if first_uop.name in first_uops: + first_uops[first_uop.name].append(super_uop) + continue + first_uops[first_uop.name] = [super_uop] + for first_uop_name, sub_op in first_uops.items(): + depth = 0 + out.emit(f"case {first_uop_name}: ") out.emit("{\n") - out.emit("next_instr++;\n") + depth += 1 + for super_uop in sub_op: + middle_uops = super_uop.parts[1:-1] + last_uop = super_uop.parts[-1] - for part in middle_uops: - out.emit("switch (next_instr->opcode) {\n") - out.emit(f"case {part.name}: ") - out.emit("{\n") - out.emit("next_instr++;\n") - out.emit("switch (next_instr->opcode) {\n") - out.emit(f"case {last_uop.name}: ") - out.emit("{\n") - out.emit(f"REPLACE_OP(this_instr, {name}, this_instr[{oparg}]->oparg, this_instr[{operand}]->operand);\n") - out.emit("next_instr++;\n") - out.emit("break;\n") - out.emit("}\n") - out.emit("}\n") - for part in middle_uops: - out.start_line() + oparg = 0 + operand = 0 + for idx, part in enumerate(super_uop.parts): + if part.properties.oparg: + oparg = idx + if len(part.caches) > 0 and part.caches[0] != "unused": + operand = idx + + for part in middle_uops: + out.emit(f"switch (this_instr[{depth}].opcode) {{\n") + out.emit(f"case {part.name}: ") + out.emit("{\n") + depth += 1 + out.emit(f"if (this_instr[{depth}].opcode == {last_uop.name}) {{\n") + out.emit(f'DPRINTF(2, "Inserting super {super_uop.name}\\n");') + out.emit(f"REPLACE_OP(this_instr, {super_uop.name}, this_instr[{oparg}].oparg, this_instr[{operand}].operand);\n") + out.emit(f"for (int i = 1; i < {depth + 1}; i++) {{ REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }}") + out.emit(f"this_instr += {depth + 1};\n") out.emit("break;\n") - out.start_line() - out.emit("}\n") out.emit("}\n") - out.start_line() + for part in middle_uops: + out.start_line() + out.emit("break;\n") + out.start_line() + out.emit("}\n") + out.emit("}\n") + out.start_line() + + out.emit(f"this_instr += 1;\n") out.emit("break;\n") out.emit("}") out.emit("\n\n") + outfile.write("#undef TIER_TWO\n") diff --git a/Tools/cases_generator/uop_metadata_generator.py b/Tools/cases_generator/uop_metadata_generator.py index 72eed3041c55c9..3378c9d15dfd93 100644 --- a/Tools/cases_generator/uop_metadata_generator.py +++ b/Tools/cases_generator/uop_metadata_generator.py @@ -43,6 +43,8 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: for uop in sorted(analysis.uops.values(), key=lambda t: t.name): if uop.is_viable() and uop.properties.tier != 1: out.emit(f'[{uop.name}] = "{uop.name}",\n') + for super_uop in sorted(analysis.super_uops.values(), key=lambda t: t.name): + out.emit(f'[{super_uop.name}] = "{super_uop.name}",\n') out.emit("};\n") out.emit("#endif // NEED_OPCODE_METADATA\n\n") From 6bb6aceefc844ee0421acff20522d98530ba3d5e Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 1 Mar 2024 06:21:08 +0800 Subject: [PATCH 04/14] fix side exits --- Python/executor_cases.c.h | 28 ++++++++++++------------ Tools/cases_generator/tier2_generator.py | 1 + 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 265bb3847a932c..b67382c84d5541 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3739,8 +3739,8 @@ PyObject *left; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (!PyLong_CheckExact(left)) goto side_exit; - if (!PyLong_CheckExact(right)) goto side_exit; + if (!PyLong_CheckExact(left)) goto deoptimize; + if (!PyLong_CheckExact(right)) goto deoptimize; } // _BINARY_OP_MULTIPLY_INT { @@ -3767,8 +3767,8 @@ PyObject *left; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (!PyLong_CheckExact(left)) goto side_exit; - if (!PyLong_CheckExact(right)) goto side_exit; + if (!PyLong_CheckExact(left)) goto deoptimize; + if (!PyLong_CheckExact(right)) goto deoptimize; } // _BINARY_OP_ADD_INT { @@ -3795,8 +3795,8 @@ PyObject *left; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (!PyLong_CheckExact(left)) goto side_exit; - if (!PyLong_CheckExact(right)) goto side_exit; + if (!PyLong_CheckExact(left)) goto deoptimize; + if (!PyLong_CheckExact(right)) goto deoptimize; } // _BINARY_OP_SUBTRACT_INT { @@ -3823,8 +3823,8 @@ PyObject *left; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (!PyFloat_CheckExact(left)) goto side_exit; - if (!PyFloat_CheckExact(right)) goto side_exit; + if (!PyFloat_CheckExact(left)) goto deoptimize; + if (!PyFloat_CheckExact(right)) goto deoptimize; } // _BINARY_OP_MULTIPLY_FLOAT { @@ -3851,8 +3851,8 @@ PyObject *left; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (!PyFloat_CheckExact(left)) goto side_exit; - if (!PyFloat_CheckExact(right)) goto side_exit; + if (!PyFloat_CheckExact(left)) goto deoptimize; + if (!PyFloat_CheckExact(right)) goto deoptimize; } // _BINARY_OP_ADD_FLOAT { @@ -3879,8 +3879,8 @@ PyObject *left; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (!PyFloat_CheckExact(left)) goto side_exit; - if (!PyFloat_CheckExact(right)) goto side_exit; + if (!PyFloat_CheckExact(left)) goto deoptimize; + if (!PyFloat_CheckExact(right)) goto deoptimize; } // _BINARY_OP_SUBTRACT_FLOAT { @@ -3907,8 +3907,8 @@ PyObject *left; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (!PyUnicode_CheckExact(left)) goto side_exit; - if (!PyUnicode_CheckExact(right)) goto side_exit; + if (!PyUnicode_CheckExact(left)) goto deoptimize; + if (!PyUnicode_CheckExact(right)) goto deoptimize; } // _BINARY_OP_ADD_UNICODE { diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 3e81ad3f00b99f..c1fac9a8affbf8 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -217,6 +217,7 @@ def generate_tier2( out.start_line() out.emit("}") out.emit("\n\n") + TIER2_REPLACEMENT_FUNCTIONS["EXIT_IF"] = tier2_replace_deopt for name, super_uop in analysis.super_uops.items(): out.emit(f"case {name}: {{\n") stack = Stack() From c04ea6ce1d0ac4a99d748e17090f6e02ae4e5577 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 1 Mar 2024 06:40:12 +0800 Subject: [PATCH 05/14] add a new super --- Include/internal/pycore_uop_ids.h | 3 ++- Include/internal/pycore_uop_metadata.h | 1 + Python/bytecodes.c | 2 ++ Python/executor_cases.c.h | 36 ++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 08dcdcf04bd2ee..0d6264915e2673 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -271,7 +271,8 @@ extern "C" { #define _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT 424 #define _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT 425 #define _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE 426 -#define MAX_UOP_ID 426 +#define _SAVE_RETURN_OFFSET__PUSH_FRAME 427 +#define MAX_UOP_ID 427 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 314679d43b992c..941496ed76c1b0 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -472,6 +472,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT] = "_GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT", [_GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT] = "_GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT", [_GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE] = "_GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE", + [_SAVE_RETURN_OFFSET__PUSH_FRAME] = "_SAVE_RETURN_OFFSET__PUSH_FRAME", }; #endif // NEED_OPCODE_METADATA diff --git a/Python/bytecodes.c b/Python/bytecodes.c index cbcef91e9854e7..2411655b914d13 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3193,6 +3193,8 @@ dummy_func( _CHECK_STACK_SPACE + _INIT_CALL_PY_EXACT_ARGS; + super(_SAVE_RETURN_OFFSET__PUSH_FRAME) = _SAVE_RETURN_OFFSET + _PUSH_FRAME; + inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, callable, self_or_null, args[oparg] -- unused)) { DEOPT_IF(tstate->interp->eval_frame); int argcount = oparg; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b67382c84d5541..e17dc00376ef41 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4058,4 +4058,40 @@ break; } + case _SAVE_RETURN_OFFSET__PUSH_FRAME: { + // _SAVE_RETURN_OFFSET + { + oparg = CURRENT_OPARG(); + #if TIER_ONE + frame->return_offset = (uint16_t)(next_instr - this_instr); + #endif + #if TIER_TWO + frame->return_offset = oparg; + #endif + } + // _PUSH_FRAME + { + _PyInterpreterFrame *new_frame; + new_frame = (_PyInterpreterFrame *)stack_pointer[-1]; + // Write it out explicitly because it's subtly different. + // Eventually this should be the only occurrence of this code. + assert(tstate->interp->eval_frame == NULL); + stack_pointer += -1; + _PyFrame_SetStackPointer(frame, stack_pointer); + new_frame->previous = frame; + CALL_STAT_INC(inlined_py_calls); + frame = tstate->current_frame = new_frame; + tstate->py_recursion_remaining--; + LOAD_SP(); + LOAD_IP(0); + #if LLTRACE && TIER_ONE + lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); + if (lltrace < 0) { + goto exit_unwind; + } + #endif + } + break; + } + #undef TIER_TWO From b912db1cafbfaf8dee47cd2cb45e68c107acf163 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 1 Mar 2024 17:46:12 +0800 Subject: [PATCH 06/14] add makefile, simplify generated matcher --- Lib/test/test_capi/test_opt.py | 5 +- Makefile.pre.in | 6 +- Python/uop_super_matcher_cases.c.h | 108 ++++++++++-------- .../tier2_super_matcher_generator.py | 29 ++--- 4 files changed, 77 insertions(+), 71 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 04345b5a9158c2..e8e6a9049e830f 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -489,7 +489,8 @@ def dummy(x): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) uops = get_opnames(ex) - self.assertIn("_PUSH_FRAME", uops) + self.assertIn("_SAVE_RETURN_OFFSET__PUSH_FRAME", uops) + # raise Exception('\n'.join(iter_opnames(ex))) self.assertIn("_GUARD_BOTH_INT__BINARY_OP_ADD_INT", uops) def test_branch_taken(self): @@ -834,7 +835,7 @@ def testfunc(n): self.assertLessEqual(len(guard_both_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. - self.assertIn("_BINARY_OP_MULTIPLY_FLOAT", uops) + self.assertIn("_GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT", uops) def test_compare_op_type_propagation_float(self): def testfunc(n): diff --git a/Makefile.pre.in b/Makefile.pre.in index 7533a49b4392f0..21d09fc41b0f6c 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1890,6 +1890,8 @@ regen-cases: -o $(srcdir)/Python/generated_cases.c.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_generator.py \ -o $(srcdir)/Python/executor_cases.c.h.new $(srcdir)/Python/bytecodes.c + $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_super_matcher_generator.py \ + -o $(srcdir)/Python/uop_super_matcher_cases.c.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/optimizer_generator.py \ -o $(srcdir)/Python/optimizer_cases.c.h.new \ $(srcdir)/Python/optimizer_bytecodes.c \ @@ -1905,6 +1907,7 @@ regen-cases: $(UPDATE_FILE) $(srcdir)/Include/internal/pycore_opcode_metadata.h $(srcdir)/Include/internal/pycore_opcode_metadata.h.new $(UPDATE_FILE) $(srcdir)/Include/internal/pycore_uop_metadata.h $(srcdir)/Include/internal/pycore_uop_metadata.h.new $(UPDATE_FILE) $(srcdir)/Python/executor_cases.c.h $(srcdir)/Python/executor_cases.c.h.new + $(UPDATE_FILE) $(srcdir)/Python/uop_super_matcher_cases.c.h $(srcdir)/Python/uop_super_matcher_cases.c.h.new $(UPDATE_FILE) $(srcdir)/Python/optimizer_cases.c.h $(srcdir)/Python/optimizer_cases.c.h.new $(UPDATE_FILE) $(srcdir)/Lib/_opcode_metadata.py $(srcdir)/Lib/_opcode_metadata.py.new @@ -1928,7 +1931,8 @@ Python/optimizer.o: \ Python/optimizer_analysis.o: \ $(srcdir)/Include/internal/pycore_opcode_metadata.h \ $(srcdir)/Include/internal/pycore_optimizer.h \ - $(srcdir)/Python/optimizer_cases.c.h + $(srcdir)/Python/optimizer_cases.c.h \ + $(srcdir)/Python/uop_super_matcher_cases.c.h Python/frozen.o: $(FROZEN_FILES_OUT) diff --git a/Python/uop_super_matcher_cases.c.h b/Python/uop_super_matcher_cases.c.h index 9245b62a4de368..9304f50f82ebc2 100644 --- a/Python/uop_super_matcher_cases.c.h +++ b/Python/uop_super_matcher_cases.c.h @@ -9,19 +9,25 @@ #define TIER_TWO 2 case _GUARD_BOTH_INT: { - if (this_instr[1].opcode == _BINARY_OP_MULTIPLY_INT) { - DPRINTF(2, "Inserting super _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT\n");REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT, this_instr[0].oparg, this_instr[0].operand); - for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + if ((this_instr[1].opcode == _BINARY_OP_MULTIPLY_INT)) { + DPRINTF(2, "Inserting super _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT\n"); + REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; break; } - if (this_instr[1].opcode == _BINARY_OP_ADD_INT) { - DPRINTF(2, "Inserting super _GUARD_BOTH_INT__BINARY_OP_ADD_INT\n");REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_ADD_INT, this_instr[0].oparg, this_instr[0].operand); - for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + if ((this_instr[1].opcode == _BINARY_OP_ADD_INT)) { + DPRINTF(2, "Inserting super _GUARD_BOTH_INT__BINARY_OP_ADD_INT\n"); + REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_ADD_INT, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; break; } - if (this_instr[1].opcode == _BINARY_OP_SUBTRACT_INT) { - DPRINTF(2, "Inserting super _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT\n");REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT, this_instr[0].oparg, this_instr[0].operand); - for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + if ((this_instr[1].opcode == _BINARY_OP_SUBTRACT_INT)) { + DPRINTF(2, "Inserting super _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT\n"); + REPLACE_OP(this_instr, _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; break; } this_instr += 1; @@ -29,19 +35,25 @@ } case _GUARD_BOTH_FLOAT: { - if (this_instr[1].opcode == _BINARY_OP_MULTIPLY_FLOAT) { - DPRINTF(2, "Inserting super _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT\n");REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT, this_instr[0].oparg, this_instr[0].operand); - for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + if ((this_instr[1].opcode == _BINARY_OP_MULTIPLY_FLOAT)) { + DPRINTF(2, "Inserting super _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT\n"); + REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; break; } - if (this_instr[1].opcode == _BINARY_OP_ADD_FLOAT) { - DPRINTF(2, "Inserting super _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT\n");REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT, this_instr[0].oparg, this_instr[0].operand); - for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + if ((this_instr[1].opcode == _BINARY_OP_ADD_FLOAT)) { + DPRINTF(2, "Inserting super _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT\n"); + REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; break; } - if (this_instr[1].opcode == _BINARY_OP_SUBTRACT_FLOAT) { - DPRINTF(2, "Inserting super _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT\n");REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT, this_instr[0].oparg, this_instr[0].operand); - for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + if ((this_instr[1].opcode == _BINARY_OP_SUBTRACT_FLOAT)) { + DPRINTF(2, "Inserting super _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT\n"); + REPLACE_OP(this_instr, _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; break; } this_instr += 1; @@ -49,9 +61,11 @@ } case _GUARD_BOTH_UNICODE: { - if (this_instr[1].opcode == _BINARY_OP_ADD_UNICODE) { - DPRINTF(2, "Inserting super _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE\n");REPLACE_OP(this_instr, _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE, this_instr[0].oparg, this_instr[0].operand); - for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 2; + if ((this_instr[1].opcode == _BINARY_OP_ADD_UNICODE)) { + DPRINTF(2, "Inserting super _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE\n"); + REPLACE_OP(this_instr, _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; break; } this_instr += 1; @@ -59,40 +73,36 @@ } case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { - switch (this_instr[1].opcode) { - case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { - switch (this_instr[2].opcode) { - case _CHECK_FUNCTION_EXACT_ARGS: { - switch (this_instr[3].opcode) { - case _CHECK_STACK_SPACE: { - if (this_instr[4].opcode == _INIT_CALL_PY_EXACT_ARGS) { - DPRINTF(2, "Inserting super _CALL_BOUND_METHOD_EXACT_ARGS\n");REPLACE_OP(this_instr, _CALL_BOUND_METHOD_EXACT_ARGS, this_instr[4].oparg, this_instr[2].operand); - for (int i = 1; i < 5; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 5; - break; - } - break; - } - } - break; - } - } - break; - } + if ((this_instr[1].opcode == _INIT_CALL_BOUND_METHOD_EXACT_ARGS) && (this_instr[2].opcode == _CHECK_FUNCTION_EXACT_ARGS) && (this_instr[3].opcode == _CHECK_STACK_SPACE) && (this_instr[4].opcode == _INIT_CALL_PY_EXACT_ARGS)) { + DPRINTF(2, "Inserting super _CALL_BOUND_METHOD_EXACT_ARGS\n"); + REPLACE_OP(this_instr, _CALL_BOUND_METHOD_EXACT_ARGS, this_instr[4].oparg, this_instr[2].operand); + for (int i = 1; i < 5; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 5; + break; } this_instr += 1; break; } case _CHECK_FUNCTION_EXACT_ARGS: { - switch (this_instr[1].opcode) { - case _CHECK_STACK_SPACE: { - if (this_instr[2].opcode == _INIT_CALL_PY_EXACT_ARGS) { - DPRINTF(2, "Inserting super _CHECK_CALL_PY_EXACT_ARGS\n");REPLACE_OP(this_instr, _CHECK_CALL_PY_EXACT_ARGS, this_instr[2].oparg, this_instr[0].operand); - for (int i = 1; i < 3; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }this_instr += 3; - break; - } - break; - } + if ((this_instr[1].opcode == _CHECK_STACK_SPACE) && (this_instr[2].opcode == _INIT_CALL_PY_EXACT_ARGS)) { + DPRINTF(2, "Inserting super _CHECK_CALL_PY_EXACT_ARGS\n"); + REPLACE_OP(this_instr, _CHECK_CALL_PY_EXACT_ARGS, this_instr[2].oparg, this_instr[0].operand); + for (int i = 1; i < 3; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 3; + break; + } + this_instr += 1; + break; + } + + case _SAVE_RETURN_OFFSET: { + if ((this_instr[1].opcode == _PUSH_FRAME)) { + DPRINTF(2, "Inserting super _SAVE_RETURN_OFFSET__PUSH_FRAME\n"); + REPLACE_OP(this_instr, _SAVE_RETURN_OFFSET__PUSH_FRAME, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; } this_instr += 1; break; diff --git a/Tools/cases_generator/tier2_super_matcher_generator.py b/Tools/cases_generator/tier2_super_matcher_generator.py index 77164fb9c77b73..80ddb19480cec9 100644 --- a/Tools/cases_generator/tier2_super_matcher_generator.py +++ b/Tools/cases_generator/tier2_super_matcher_generator.py @@ -54,13 +54,11 @@ def generate_tier2( continue first_uops[first_uop.name] = [super_uop] for first_uop_name, sub_op in first_uops.items(): - depth = 0 out.emit(f"case {first_uop_name}: ") out.emit("{\n") - depth += 1 for super_uop in sub_op: - middle_uops = super_uop.parts[1:-1] - last_uop = super_uop.parts[-1] + depth = 0 + rest_uops = super_uop.parts[1:] oparg = 0 operand = 0 @@ -70,25 +68,18 @@ def generate_tier2( if len(part.caches) > 0 and part.caches[0] != "unused": operand = idx - for part in middle_uops: - out.emit(f"switch (this_instr[{depth}].opcode) {{\n") - out.emit(f"case {part.name}: ") - out.emit("{\n") + predicates: list[tuple[int, Uop]] = [] + for part in rest_uops: depth += 1 - out.emit(f"if (this_instr[{depth}].opcode == {last_uop.name}) {{\n") - out.emit(f'DPRINTF(2, "Inserting super {super_uop.name}\\n");') + predicates.append((depth, part)) + predicate = " && ".join([f"(this_instr[{d}].opcode == {p.name})" for d, p in predicates]) + out.emit(f"if ({predicate}) {{\n") + out.emit(f'DPRINTF(2, "Inserting super {super_uop.name}\\n");\n') out.emit(f"REPLACE_OP(this_instr, {super_uop.name}, this_instr[{oparg}].oparg, this_instr[{operand}].operand);\n") - out.emit(f"for (int i = 1; i < {depth + 1}; i++) {{ REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }}") + out.emit(f"for (int i = 1; i < {depth + 1}; i++) {{ REPLACE_OP((&this_instr[i]), _NOP, 0, 0); }}\n") out.emit(f"this_instr += {depth + 1};\n") - out.emit("break;\n") + out.emit(f"break;\n") out.emit("}\n") - for part in middle_uops: - out.start_line() - out.emit("break;\n") - out.start_line() - out.emit("}\n") - out.emit("}\n") - out.start_line() out.emit(f"this_instr += 1;\n") out.emit("break;\n") From 2596f925c1248fb9e3b5fab4a2d3bad3a8e8b738 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 1 Mar 2024 17:57:19 +0800 Subject: [PATCH 07/14] make tests pass --- Lib/test/test_capi/test_opt.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 7695bc06bb90cd..c6256efcb92640 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -495,8 +495,7 @@ def dummy(x): self.assertIsNotNone(ex) uops = get_opnames(ex) self.assertIn("_SAVE_RETURN_OFFSET__PUSH_FRAME", uops) - # raise Exception('\n'.join(iter_opnames(ex))) - self.assertIn("_GUARD_BOTH_INT__BINARY_OP_ADD_INT", uops) + self.assertIn("_BINARY_OP_ADD_INT", uops) def test_branch_taken(self): def testfunc(n): @@ -602,8 +601,8 @@ def testfunc(loops): res, ex = self._run_with_optimizer(testfunc, 32) self.assertIsNotNone(ex) self.assertEqual(res, 63) - binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] - guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] + binop_count = [opname for opname in iter_opnames(ex) if "_BINARY_OP_ADD_INT" in opname] + guard_both_int_count = [opname for opname in iter_opnames(ex) if "_GUARD_BOTH_INT" in opname] self.assertGreaterEqual(len(binop_count), 3) self.assertLessEqual(len(guard_both_int_count), 1) @@ -626,8 +625,8 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) self.assertEqual(res, 124) - binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] - guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] + binop_count = [opname for opname in iter_opnames(ex) if "_BINARY_OP_ADD_INT" in opname] + guard_both_int_count = [opname for opname in iter_opnames(ex) if "_GUARD_BOTH_INT" in opname] self.assertGreaterEqual(len(binop_count), 3) self.assertLessEqual(len(guard_both_int_count), 1) @@ -650,8 +649,8 @@ def testfunc(loops): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) self.assertEqual(res, 124) - binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] - guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] + binop_count = [opname for opname in iter_opnames(ex) if "_BINARY_OP_ADD_INT" in opname] + guard_both_int_count = [opname for opname in iter_opnames(ex) if "_GUARD_BOTH_INT" in opname] self.assertGreaterEqual(len(binop_count), 3) self.assertLessEqual(len(guard_both_int_count), 1) @@ -668,7 +667,7 @@ def testfunc(loops): res, ex = self._run_with_optimizer(testfunc, 64) self.assertIsNotNone(ex) - binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] + binop_count = [opname for opname in iter_opnames(ex) if "_BINARY_OP_ADD_INT" in opname] self.assertGreaterEqual(len(binop_count), 3) def test_call_py_exact_args(self): @@ -681,7 +680,7 @@ def dummy(x): res, ex = self._run_with_optimizer(testfunc, 32) self.assertIsNotNone(ex) uops = get_opnames(ex) - self.assertIn("_PUSH_FRAME", uops) + self.assertIn("_SAVE_RETURN_OFFSET__PUSH_FRAME", uops) self.assertIn("_BINARY_OP_ADD_INT", uops) self.assertNotIn("_CHECK_PEP_523", uops) @@ -714,8 +713,8 @@ def testfunc(n): self.assertEqual(res, 4) self.assertIsNotNone(ex) uops = get_opnames(ex) - self.assertIn("_GUARD_BOTH_INT", uops) - guard_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] + self.assertIn("_GUARD_BOTH_INT__BINARY_OP_ADD_INT", uops) + guard_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT__BINARY_OP_ADD_INT"] self.assertEqual(len(guard_count), 1) def test_comprehension(self): @@ -807,7 +806,7 @@ def testfunc(n): self.assertLessEqual(len(guard_both_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. - self.assertIn("_BINARY_OP_ADD_FLOAT", uops) + self.assertIn("_GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT", uops) def test_float_subtract_constant_propagation(self): def testfunc(n): @@ -824,7 +823,7 @@ def testfunc(n): self.assertLessEqual(len(guard_both_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. - self.assertIn("_BINARY_OP_SUBTRACT_FLOAT", uops) + self.assertIn("_GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT", uops) def test_float_multiply_constant_propagation(self): def testfunc(n): From 7ab3a58d144532e5e3461a6f566cb89c4589f711 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 1 Mar 2024 20:42:26 +0800 Subject: [PATCH 08/14] replicate with supers --- Include/internal/pycore_uop_ids.h | 247 ++++--- Include/internal/pycore_uop_metadata.h | 43 ++ Python/bytecodes.c | 6 +- Python/executor_cases.c.h | 952 ++++++++++++++++++++++++- Python/optimizer.c | 15 - Python/optimizer_analysis.c | 21 + Python/uop_super_matcher_cases.c.h | 95 +++ Tools/cases_generator/analyzer.py | 21 +- 8 files changed, 1269 insertions(+), 131 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 8578fe985e4ff0..c1369d0d87f331 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -64,17 +64,27 @@ extern "C" { #define _CHECK_EG_MATCH CHECK_EG_MATCH #define _CHECK_EXC_MATCH CHECK_EXC_MATCH #define _CHECK_FUNCTION_EXACT_ARGS 318 -#define _CHECK_GLOBALS 319 -#define _CHECK_MANAGED_OBJECT_HAS_VALUES 320 -#define _CHECK_PEP_523 321 -#define _CHECK_STACK_SPACE 322 -#define _CHECK_VALIDITY 323 -#define _CHECK_VALIDITY_AND_SET_IP 324 -#define _COLD_EXIT 325 -#define _COMPARE_OP 326 -#define _COMPARE_OP_FLOAT 327 -#define _COMPARE_OP_INT 328 -#define _COMPARE_OP_STR 329 +#define _CHECK_FUNCTION_EXACT_ARGS_0 319 +#define _CHECK_FUNCTION_EXACT_ARGS_1 320 +#define _CHECK_FUNCTION_EXACT_ARGS_2 321 +#define _CHECK_FUNCTION_EXACT_ARGS_3 322 +#define _CHECK_FUNCTION_EXACT_ARGS_4 323 +#define _CHECK_GLOBALS 324 +#define _CHECK_MANAGED_OBJECT_HAS_VALUES 325 +#define _CHECK_PEP_523 326 +#define _CHECK_STACK_SPACE 327 +#define _CHECK_STACK_SPACE_0 328 +#define _CHECK_STACK_SPACE_1 329 +#define _CHECK_STACK_SPACE_2 330 +#define _CHECK_STACK_SPACE_3 331 +#define _CHECK_STACK_SPACE_4 332 +#define _CHECK_VALIDITY 333 +#define _CHECK_VALIDITY_AND_SET_IP 334 +#define _COLD_EXIT 335 +#define _COMPARE_OP 336 +#define _COMPARE_OP_FLOAT 337 +#define _COMPARE_OP_INT 338 +#define _COMPARE_OP_STR 339 #define _CONTAINS_OP CONTAINS_OP #define _CONVERT_VALUE CONVERT_VALUE #define _COPY COPY @@ -89,36 +99,41 @@ extern "C" { #define _DICT_UPDATE DICT_UPDATE #define _END_SEND END_SEND #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _FATAL_ERROR 330 +#define _FATAL_ERROR 340 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 331 +#define _FOR_ITER 341 #define _FOR_ITER_GEN FOR_ITER_GEN -#define _FOR_ITER_TIER_TWO 332 +#define _FOR_ITER_TIER_TWO 342 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BOTH_FLOAT 333 -#define _GUARD_BOTH_INT 334 -#define _GUARD_BOTH_UNICODE 335 -#define _GUARD_BUILTINS_VERSION 336 -#define _GUARD_DORV_VALUES 337 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 338 -#define _GUARD_GLOBALS_VERSION 339 -#define _GUARD_IS_FALSE_POP 340 -#define _GUARD_IS_NONE_POP 341 -#define _GUARD_IS_NOT_NONE_POP 342 -#define _GUARD_IS_TRUE_POP 343 -#define _GUARD_KEYS_VERSION 344 -#define _GUARD_NOT_EXHAUSTED_LIST 345 -#define _GUARD_NOT_EXHAUSTED_RANGE 346 -#define _GUARD_NOT_EXHAUSTED_TUPLE 347 -#define _GUARD_TYPE_VERSION 348 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 349 -#define _INIT_CALL_PY_EXACT_ARGS 350 +#define _GUARD_BOTH_FLOAT 343 +#define _GUARD_BOTH_INT 344 +#define _GUARD_BOTH_UNICODE 345 +#define _GUARD_BUILTINS_VERSION 346 +#define _GUARD_DORV_VALUES 347 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 348 +#define _GUARD_GLOBALS_VERSION 349 +#define _GUARD_IS_FALSE_POP 350 +#define _GUARD_IS_NONE_POP 351 +#define _GUARD_IS_NOT_NONE_POP 352 +#define _GUARD_IS_TRUE_POP 353 +#define _GUARD_KEYS_VERSION 354 +#define _GUARD_NOT_EXHAUSTED_LIST 355 +#define _GUARD_NOT_EXHAUSTED_RANGE 356 +#define _GUARD_NOT_EXHAUSTED_TUPLE 357 +#define _GUARD_TYPE_VERSION 358 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 359 +#define _INIT_CALL_PY_EXACT_ARGS 360 +#define _INIT_CALL_PY_EXACT_ARGS_0 361 +#define _INIT_CALL_PY_EXACT_ARGS_1 362 +#define _INIT_CALL_PY_EXACT_ARGS_2 363 +#define _INIT_CALL_PY_EXACT_ARGS_3 364 +#define _INIT_CALL_PY_EXACT_ARGS_4 365 #define _INSTRUMENTED_CALL INSTRUMENTED_CALL #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW @@ -135,65 +150,65 @@ extern "C" { #define _INSTRUMENTED_RETURN_CONST INSTRUMENTED_RETURN_CONST #define _INSTRUMENTED_RETURN_VALUE INSTRUMENTED_RETURN_VALUE #define _INSTRUMENTED_YIELD_VALUE INSTRUMENTED_YIELD_VALUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 351 -#define _IS_NONE 352 +#define _INTERNAL_INCREMENT_OPT_COUNTER 366 +#define _IS_NONE 367 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 353 -#define _ITER_CHECK_RANGE 354 -#define _ITER_CHECK_TUPLE 355 -#define _ITER_JUMP_LIST 356 -#define _ITER_JUMP_RANGE 357 -#define _ITER_JUMP_TUPLE 358 -#define _ITER_NEXT_LIST 359 -#define _ITER_NEXT_RANGE 360 -#define _ITER_NEXT_TUPLE 361 -#define _JUMP_TO_TOP 362 +#define _ITER_CHECK_LIST 368 +#define _ITER_CHECK_RANGE 369 +#define _ITER_CHECK_TUPLE 370 +#define _ITER_JUMP_LIST 371 +#define _ITER_JUMP_RANGE 372 +#define _ITER_JUMP_TUPLE 373 +#define _ITER_NEXT_LIST 374 +#define _ITER_NEXT_RANGE 375 +#define _ITER_NEXT_TUPLE 376 +#define _JUMP_TO_TOP 377 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND #define _LOAD_ASSERTION_ERROR LOAD_ASSERTION_ERROR -#define _LOAD_ATTR 363 -#define _LOAD_ATTR_CLASS 364 -#define _LOAD_ATTR_CLASS_0 365 -#define _LOAD_ATTR_CLASS_1 366 +#define _LOAD_ATTR 378 +#define _LOAD_ATTR_CLASS 379 +#define _LOAD_ATTR_CLASS_0 380 +#define _LOAD_ATTR_CLASS_1 381 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 367 -#define _LOAD_ATTR_INSTANCE_VALUE_0 368 -#define _LOAD_ATTR_INSTANCE_VALUE_1 369 -#define _LOAD_ATTR_METHOD_LAZY_DICT 370 -#define _LOAD_ATTR_METHOD_NO_DICT 371 -#define _LOAD_ATTR_METHOD_WITH_VALUES 372 -#define _LOAD_ATTR_MODULE 373 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 374 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 375 +#define _LOAD_ATTR_INSTANCE_VALUE 382 +#define _LOAD_ATTR_INSTANCE_VALUE_0 383 +#define _LOAD_ATTR_INSTANCE_VALUE_1 384 +#define _LOAD_ATTR_METHOD_LAZY_DICT 385 +#define _LOAD_ATTR_METHOD_NO_DICT 386 +#define _LOAD_ATTR_METHOD_WITH_VALUES 387 +#define _LOAD_ATTR_MODULE 388 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 389 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 390 #define _LOAD_ATTR_PROPERTY LOAD_ATTR_PROPERTY -#define _LOAD_ATTR_SLOT 376 -#define _LOAD_ATTR_SLOT_0 377 -#define _LOAD_ATTR_SLOT_1 378 -#define _LOAD_ATTR_WITH_HINT 379 +#define _LOAD_ATTR_SLOT 391 +#define _LOAD_ATTR_SLOT_0 392 +#define _LOAD_ATTR_SLOT_1 393 +#define _LOAD_ATTR_WITH_HINT 394 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 380 -#define _LOAD_CONST_INLINE_BORROW 381 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 382 -#define _LOAD_CONST_INLINE_WITH_NULL 383 +#define _LOAD_CONST_INLINE 395 +#define _LOAD_CONST_INLINE_BORROW 396 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 397 +#define _LOAD_CONST_INLINE_WITH_NULL 398 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 384 -#define _LOAD_FAST_0 385 -#define _LOAD_FAST_1 386 -#define _LOAD_FAST_2 387 -#define _LOAD_FAST_3 388 -#define _LOAD_FAST_4 389 -#define _LOAD_FAST_5 390 -#define _LOAD_FAST_6 391 -#define _LOAD_FAST_7 392 +#define _LOAD_FAST 399 +#define _LOAD_FAST_0 400 +#define _LOAD_FAST_1 401 +#define _LOAD_FAST_2 402 +#define _LOAD_FAST_3 403 +#define _LOAD_FAST_4 404 +#define _LOAD_FAST_5 405 +#define _LOAD_FAST_6 406 +#define _LOAD_FAST_7 407 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 393 -#define _LOAD_GLOBAL_BUILTINS 394 -#define _LOAD_GLOBAL_MODULE 395 +#define _LOAD_GLOBAL 408 +#define _LOAD_GLOBAL_BUILTINS 409 +#define _LOAD_GLOBAL_MODULE 410 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR @@ -207,47 +222,47 @@ extern "C" { #define _MATCH_SEQUENCE MATCH_SEQUENCE #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_FRAME 396 -#define _POP_JUMP_IF_FALSE 397 -#define _POP_JUMP_IF_TRUE 398 +#define _POP_FRAME 411 +#define _POP_JUMP_IF_FALSE 412 +#define _POP_JUMP_IF_TRUE 413 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 399 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 414 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 400 +#define _PUSH_FRAME 415 #define _PUSH_NULL PUSH_NULL #define _RESUME_CHECK RESUME_CHECK -#define _SAVE_RETURN_OFFSET 401 -#define _SEND 402 +#define _SAVE_RETURN_OFFSET 416 +#define _SEND 417 #define _SEND_GEN SEND_GEN #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 403 -#define _STORE_ATTR 404 -#define _STORE_ATTR_INSTANCE_VALUE 405 -#define _STORE_ATTR_SLOT 406 +#define _START_EXECUTOR 418 +#define _STORE_ATTR 419 +#define _STORE_ATTR_INSTANCE_VALUE 420 +#define _STORE_ATTR_SLOT 421 #define _STORE_ATTR_WITH_HINT STORE_ATTR_WITH_HINT #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 407 -#define _STORE_FAST_0 408 -#define _STORE_FAST_1 409 -#define _STORE_FAST_2 410 -#define _STORE_FAST_3 411 -#define _STORE_FAST_4 412 -#define _STORE_FAST_5 413 -#define _STORE_FAST_6 414 -#define _STORE_FAST_7 415 +#define _STORE_FAST 422 +#define _STORE_FAST_0 423 +#define _STORE_FAST_1 424 +#define _STORE_FAST_2 425 +#define _STORE_FAST_3 426 +#define _STORE_FAST_4 427 +#define _STORE_FAST_5 428 +#define _STORE_FAST_6 429 +#define _STORE_FAST_7 430 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 416 +#define _STORE_SUBSCR 431 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TO_BOOL 417 +#define _TO_BOOL 432 #define _TO_BOOL_ALWAYS_TRUE TO_BOOL_ALWAYS_TRUE #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT @@ -258,22 +273,32 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 418 +#define _UNPACK_SEQUENCE 433 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START -#define _CALL_BOUND_METHOD_EXACT_ARGS 419 -#define _CHECK_CALL_PY_EXACT_ARGS 420 -#define _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT 421 -#define _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT 422 -#define _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT 423 -#define _GUARD_BOTH_INT__BINARY_OP_ADD_INT 424 -#define _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT 425 -#define _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT 426 -#define _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE 427 -#define _SAVE_RETURN_OFFSET__PUSH_FRAME 428 -#define MAX_UOP_ID 428 +#define _CALL_BOUND_METHOD_EXACT_ARGS 434 +#define _CALL_BOUND_METHOD_EXACT_ARGS_0 435 +#define _CALL_BOUND_METHOD_EXACT_ARGS_1 436 +#define _CALL_BOUND_METHOD_EXACT_ARGS_2 437 +#define _CALL_BOUND_METHOD_EXACT_ARGS_3 438 +#define _CALL_BOUND_METHOD_EXACT_ARGS_4 439 +#define _CHECK_CALL_PY_EXACT_ARGS 440 +#define _CHECK_CALL_PY_EXACT_ARGS_0 441 +#define _CHECK_CALL_PY_EXACT_ARGS_1 442 +#define _CHECK_CALL_PY_EXACT_ARGS_2 443 +#define _CHECK_CALL_PY_EXACT_ARGS_3 444 +#define _CHECK_CALL_PY_EXACT_ARGS_4 445 +#define _GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT 446 +#define _GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT 447 +#define _GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT 448 +#define _GUARD_BOTH_INT__BINARY_OP_ADD_INT 449 +#define _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT 450 +#define _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT 451 +#define _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE 452 +#define _SAVE_RETURN_OFFSET__PUSH_FRAME 453 +#define MAX_UOP_ID 453 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 1a9c6711e00003..9998f032955987 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -189,8 +189,23 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG, [_CHECK_PEP_523] = HAS_DEOPT_FLAG, + [_CHECK_FUNCTION_EXACT_ARGS_0] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_FUNCTION_EXACT_ARGS_1] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_FUNCTION_EXACT_ARGS_2] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_FUNCTION_EXACT_ARGS_3] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_FUNCTION_EXACT_ARGS_4] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_STACK_SPACE_0] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_STACK_SPACE_1] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_STACK_SPACE_2] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_STACK_SPACE_3] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_CHECK_STACK_SPACE_4] = HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_PASSTHROUGH_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_0] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_1] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_2] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_3] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_4] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_PUSH_FRAME] = HAS_ESCAPES_FLAG, [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, @@ -242,6 +257,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = { [_LOAD_FAST] = 8, [_STORE_FAST] = 8, + [_CHECK_FUNCTION_EXACT_ARGS] = 5, + [_CHECK_STACK_SPACE] = 5, + [_INIT_CALL_PY_EXACT_ARGS] = 5, }; const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { @@ -292,10 +310,20 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_EG_MATCH] = "_CHECK_EG_MATCH", [_CHECK_EXC_MATCH] = "_CHECK_EXC_MATCH", [_CHECK_FUNCTION_EXACT_ARGS] = "_CHECK_FUNCTION_EXACT_ARGS", + [_CHECK_FUNCTION_EXACT_ARGS_0] = "_CHECK_FUNCTION_EXACT_ARGS_0", + [_CHECK_FUNCTION_EXACT_ARGS_1] = "_CHECK_FUNCTION_EXACT_ARGS_1", + [_CHECK_FUNCTION_EXACT_ARGS_2] = "_CHECK_FUNCTION_EXACT_ARGS_2", + [_CHECK_FUNCTION_EXACT_ARGS_3] = "_CHECK_FUNCTION_EXACT_ARGS_3", + [_CHECK_FUNCTION_EXACT_ARGS_4] = "_CHECK_FUNCTION_EXACT_ARGS_4", [_CHECK_GLOBALS] = "_CHECK_GLOBALS", [_CHECK_MANAGED_OBJECT_HAS_VALUES] = "_CHECK_MANAGED_OBJECT_HAS_VALUES", [_CHECK_PEP_523] = "_CHECK_PEP_523", [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE", + [_CHECK_STACK_SPACE_0] = "_CHECK_STACK_SPACE_0", + [_CHECK_STACK_SPACE_1] = "_CHECK_STACK_SPACE_1", + [_CHECK_STACK_SPACE_2] = "_CHECK_STACK_SPACE_2", + [_CHECK_STACK_SPACE_3] = "_CHECK_STACK_SPACE_3", + [_CHECK_STACK_SPACE_4] = "_CHECK_STACK_SPACE_4", [_CHECK_VALIDITY] = "_CHECK_VALIDITY", [_CHECK_VALIDITY_AND_SET_IP] = "_CHECK_VALIDITY_AND_SET_IP", [_COLD_EXIT] = "_COLD_EXIT", @@ -346,6 +374,11 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION", [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS", [_INIT_CALL_PY_EXACT_ARGS] = "_INIT_CALL_PY_EXACT_ARGS", + [_INIT_CALL_PY_EXACT_ARGS_0] = "_INIT_CALL_PY_EXACT_ARGS_0", + [_INIT_CALL_PY_EXACT_ARGS_1] = "_INIT_CALL_PY_EXACT_ARGS_1", + [_INIT_CALL_PY_EXACT_ARGS_2] = "_INIT_CALL_PY_EXACT_ARGS_2", + [_INIT_CALL_PY_EXACT_ARGS_3] = "_INIT_CALL_PY_EXACT_ARGS_3", + [_INIT_CALL_PY_EXACT_ARGS_4] = "_INIT_CALL_PY_EXACT_ARGS_4", [_INTERNAL_INCREMENT_OPT_COUNTER] = "_INTERNAL_INCREMENT_OPT_COUNTER", [_IS_NONE] = "_IS_NONE", [_IS_OP] = "_IS_OP", @@ -466,7 +499,17 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_UNPACK_SEQUENCE_TWO_TUPLE] = "_UNPACK_SEQUENCE_TWO_TUPLE", [_WITH_EXCEPT_START] = "_WITH_EXCEPT_START", [_CALL_BOUND_METHOD_EXACT_ARGS] = "_CALL_BOUND_METHOD_EXACT_ARGS", + [_CALL_BOUND_METHOD_EXACT_ARGS_0] = "_CALL_BOUND_METHOD_EXACT_ARGS_0", + [_CALL_BOUND_METHOD_EXACT_ARGS_1] = "_CALL_BOUND_METHOD_EXACT_ARGS_1", + [_CALL_BOUND_METHOD_EXACT_ARGS_2] = "_CALL_BOUND_METHOD_EXACT_ARGS_2", + [_CALL_BOUND_METHOD_EXACT_ARGS_3] = "_CALL_BOUND_METHOD_EXACT_ARGS_3", + [_CALL_BOUND_METHOD_EXACT_ARGS_4] = "_CALL_BOUND_METHOD_EXACT_ARGS_4", [_CHECK_CALL_PY_EXACT_ARGS] = "_CHECK_CALL_PY_EXACT_ARGS", + [_CHECK_CALL_PY_EXACT_ARGS_0] = "_CHECK_CALL_PY_EXACT_ARGS_0", + [_CHECK_CALL_PY_EXACT_ARGS_1] = "_CHECK_CALL_PY_EXACT_ARGS_1", + [_CHECK_CALL_PY_EXACT_ARGS_2] = "_CHECK_CALL_PY_EXACT_ARGS_2", + [_CHECK_CALL_PY_EXACT_ARGS_3] = "_CHECK_CALL_PY_EXACT_ARGS_3", + [_CHECK_CALL_PY_EXACT_ARGS_4] = "_CHECK_CALL_PY_EXACT_ARGS_4", [_GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT] = "_GUARD_BOTH_FLOAT__BINARY_OP_ADD_FLOAT", [_GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT] = "_GUARD_BOTH_FLOAT__BINARY_OP_MULTIPLY_FLOAT", [_GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT] = "_GUARD_BOTH_FLOAT__BINARY_OP_SUBTRACT_FLOAT", diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b24120cbcc4f8e..b0b9ba43cf815a 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3108,7 +3108,7 @@ dummy_func( DEOPT_IF(tstate->interp->eval_frame); } - op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { + replicate(5) op(_CHECK_FUNCTION_EXACT_ARGS, (func_version/2, callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) { DEOPT_IF(!PyFunction_Check(callable)); PyFunctionObject *func = (PyFunctionObject *)callable; DEOPT_IF(func->func_version != func_version); @@ -3116,14 +3116,14 @@ dummy_func( DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL)); } - op(_CHECK_STACK_SPACE, (callable, unused, unused[oparg] -- callable, unused, unused[oparg])) { + replicate(5) op(_CHECK_STACK_SPACE, (callable, unused, unused[oparg] -- callable, unused, unused[oparg])) { PyFunctionObject *func = (PyFunctionObject *)callable; PyCodeObject *code = (PyCodeObject *)func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize)); DEOPT_IF(tstate->py_recursion_remaining <= 1); } - pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { + replicate(5) pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { int argcount = oparg; if (self_or_null != NULL) { args--; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0935dff7c1de7a..47d1586cbdf23b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2816,6 +2816,86 @@ break; } + case _CHECK_FUNCTION_EXACT_ARGS_0: { + PyObject *self_or_null; + PyObject *callable; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS_1: { + PyObject *self_or_null; + PyObject *callable; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS_2: { + PyObject *self_or_null; + PyObject *callable; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS_3: { + PyObject *self_or_null; + PyObject *callable; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS_4: { + PyObject *self_or_null; + PyObject *callable; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + break; + } + case _CHECK_FUNCTION_EXACT_ARGS: { PyObject *self_or_null; PyObject *callable; @@ -2831,6 +2911,66 @@ break; } + case _CHECK_STACK_SPACE_0: { + PyObject *callable; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + break; + } + + case _CHECK_STACK_SPACE_1: { + PyObject *callable; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + break; + } + + case _CHECK_STACK_SPACE_2: { + PyObject *callable; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + break; + } + + case _CHECK_STACK_SPACE_3: { + PyObject *callable; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + break; + } + + case _CHECK_STACK_SPACE_4: { + PyObject *callable; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + break; + } + case _CHECK_STACK_SPACE: { PyObject *callable; oparg = CURRENT_OPARG(); @@ -2842,6 +2982,136 @@ break; } + case _INIT_CALL_PY_EXACT_ARGS_0: { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + break; + } + + case _INIT_CALL_PY_EXACT_ARGS_1: { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + break; + } + + case _INIT_CALL_PY_EXACT_ARGS_2: { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + break; + } + + case _INIT_CALL_PY_EXACT_ARGS_3: { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + break; + } + + case _INIT_CALL_PY_EXACT_ARGS_4: { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + break; + } + case _INIT_CALL_PY_EXACT_ARGS: { PyObject **args; PyObject *self_or_null; @@ -4017,7 +4287,412 @@ break; } - case _CHECK_CALL_PY_EXACT_ARGS: { + case _CALL_BOUND_METHOD_EXACT_ARGS_0: { + // _CHECK_CALL_BOUND_METHOD_EXACT_ARGS + { + PyObject *null; + PyObject *callable; + oparg = CURRENT_OPARG(); + null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + if (null != NULL) goto deoptimize; + if (Py_TYPE(callable) != &PyMethod_Type) goto deoptimize; + } + // _INIT_CALL_BOUND_METHOD_EXACT_ARGS + { + PyObject *callable; + PyObject *func; + PyObject *self; + oparg = CURRENT_OPARG(); + callable = stack_pointer[-2 - oparg]; + STAT_INC(CALL, hit); + self = Py_NewRef(((PyMethodObject *)callable)->im_self); + stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS + func = Py_NewRef(((PyMethodObject *)callable)->im_func); + stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization + Py_DECREF(callable); + stack_pointer[-2 - oparg] = func; + stack_pointer[-1 - oparg] = self; + } + // _CHECK_FUNCTION_EXACT_ARGS_0 + { + PyObject *self_or_null; + PyObject *callable; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + } + // _CHECK_STACK_SPACE_0 + { + PyObject *callable; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + } + // _INIT_CALL_PY_EXACT_ARGS_0 + { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + } + break; + } + + case _CALL_BOUND_METHOD_EXACT_ARGS_1: { + // _CHECK_CALL_BOUND_METHOD_EXACT_ARGS + { + PyObject *null; + PyObject *callable; + oparg = CURRENT_OPARG(); + null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + if (null != NULL) goto deoptimize; + if (Py_TYPE(callable) != &PyMethod_Type) goto deoptimize; + } + // _INIT_CALL_BOUND_METHOD_EXACT_ARGS + { + PyObject *callable; + PyObject *func; + PyObject *self; + oparg = CURRENT_OPARG(); + callable = stack_pointer[-2 - oparg]; + STAT_INC(CALL, hit); + self = Py_NewRef(((PyMethodObject *)callable)->im_self); + stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS + func = Py_NewRef(((PyMethodObject *)callable)->im_func); + stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization + Py_DECREF(callable); + stack_pointer[-2 - oparg] = func; + stack_pointer[-1 - oparg] = self; + } + // _CHECK_FUNCTION_EXACT_ARGS_1 + { + PyObject *self_or_null; + PyObject *callable; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + } + // _CHECK_STACK_SPACE_1 + { + PyObject *callable; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + } + // _INIT_CALL_PY_EXACT_ARGS_1 + { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + } + break; + } + + case _CALL_BOUND_METHOD_EXACT_ARGS_2: { + // _CHECK_CALL_BOUND_METHOD_EXACT_ARGS + { + PyObject *null; + PyObject *callable; + oparg = CURRENT_OPARG(); + null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + if (null != NULL) goto deoptimize; + if (Py_TYPE(callable) != &PyMethod_Type) goto deoptimize; + } + // _INIT_CALL_BOUND_METHOD_EXACT_ARGS + { + PyObject *callable; + PyObject *func; + PyObject *self; + oparg = CURRENT_OPARG(); + callable = stack_pointer[-2 - oparg]; + STAT_INC(CALL, hit); + self = Py_NewRef(((PyMethodObject *)callable)->im_self); + stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS + func = Py_NewRef(((PyMethodObject *)callable)->im_func); + stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization + Py_DECREF(callable); + stack_pointer[-2 - oparg] = func; + stack_pointer[-1 - oparg] = self; + } + // _CHECK_FUNCTION_EXACT_ARGS_2 + { + PyObject *self_or_null; + PyObject *callable; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + } + // _CHECK_STACK_SPACE_2 + { + PyObject *callable; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + } + // _INIT_CALL_PY_EXACT_ARGS_2 + { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + } + break; + } + + case _CALL_BOUND_METHOD_EXACT_ARGS_3: { + // _CHECK_CALL_BOUND_METHOD_EXACT_ARGS + { + PyObject *null; + PyObject *callable; + oparg = CURRENT_OPARG(); + null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + if (null != NULL) goto deoptimize; + if (Py_TYPE(callable) != &PyMethod_Type) goto deoptimize; + } + // _INIT_CALL_BOUND_METHOD_EXACT_ARGS + { + PyObject *callable; + PyObject *func; + PyObject *self; + oparg = CURRENT_OPARG(); + callable = stack_pointer[-2 - oparg]; + STAT_INC(CALL, hit); + self = Py_NewRef(((PyMethodObject *)callable)->im_self); + stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS + func = Py_NewRef(((PyMethodObject *)callable)->im_func); + stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization + Py_DECREF(callable); + stack_pointer[-2 - oparg] = func; + stack_pointer[-1 - oparg] = self; + } + // _CHECK_FUNCTION_EXACT_ARGS_3 + { + PyObject *self_or_null; + PyObject *callable; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + } + // _CHECK_STACK_SPACE_3 + { + PyObject *callable; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + } + // _INIT_CALL_PY_EXACT_ARGS_3 + { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + } + break; + } + + case _CALL_BOUND_METHOD_EXACT_ARGS_4: { + // _CHECK_CALL_BOUND_METHOD_EXACT_ARGS + { + PyObject *null; + PyObject *callable; + oparg = CURRENT_OPARG(); + null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + if (null != NULL) goto deoptimize; + if (Py_TYPE(callable) != &PyMethod_Type) goto deoptimize; + } + // _INIT_CALL_BOUND_METHOD_EXACT_ARGS + { + PyObject *callable; + PyObject *func; + PyObject *self; + oparg = CURRENT_OPARG(); + callable = stack_pointer[-2 - oparg]; + STAT_INC(CALL, hit); + self = Py_NewRef(((PyMethodObject *)callable)->im_self); + stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS + func = Py_NewRef(((PyMethodObject *)callable)->im_func); + stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization + Py_DECREF(callable); + stack_pointer[-2 - oparg] = func; + stack_pointer[-1 - oparg] = self; + } + // _CHECK_FUNCTION_EXACT_ARGS_4 + { + PyObject *self_or_null; + PyObject *callable; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + } + // _CHECK_STACK_SPACE_4 + { + PyObject *callable; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + } + // _INIT_CALL_PY_EXACT_ARGS_4 + { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + } + break; + } + + case _CHECK_CALL_PY_EXACT_ARGS: { // _CHECK_FUNCTION_EXACT_ARGS { PyObject *self_or_null; @@ -4069,6 +4744,281 @@ break; } + case _CHECK_CALL_PY_EXACT_ARGS_0: { + // _CHECK_FUNCTION_EXACT_ARGS_0 + { + PyObject *self_or_null; + PyObject *callable; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + } + // _CHECK_STACK_SPACE_0 + { + PyObject *callable; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + } + // _INIT_CALL_PY_EXACT_ARGS_0 + { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + } + break; + } + + case _CHECK_CALL_PY_EXACT_ARGS_1: { + // _CHECK_FUNCTION_EXACT_ARGS_1 + { + PyObject *self_or_null; + PyObject *callable; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + } + // _CHECK_STACK_SPACE_1 + { + PyObject *callable; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + } + // _INIT_CALL_PY_EXACT_ARGS_1 + { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + } + break; + } + + case _CHECK_CALL_PY_EXACT_ARGS_2: { + // _CHECK_FUNCTION_EXACT_ARGS_2 + { + PyObject *self_or_null; + PyObject *callable; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + } + // _CHECK_STACK_SPACE_2 + { + PyObject *callable; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + } + // _INIT_CALL_PY_EXACT_ARGS_2 + { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + } + break; + } + + case _CHECK_CALL_PY_EXACT_ARGS_3: { + // _CHECK_FUNCTION_EXACT_ARGS_3 + { + PyObject *self_or_null; + PyObject *callable; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + } + // _CHECK_STACK_SPACE_3 + { + PyObject *callable; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + } + // _INIT_CALL_PY_EXACT_ARGS_3 + { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + } + break; + } + + case _CHECK_CALL_PY_EXACT_ARGS_4: { + // _CHECK_FUNCTION_EXACT_ARGS_4 + { + PyObject *self_or_null; + PyObject *callable; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + if (!PyFunction_Check(callable)) goto deoptimize; + PyFunctionObject *func = (PyFunctionObject *)callable; + if (func->func_version != func_version) goto deoptimize; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (code->co_argcount != oparg + (self_or_null != NULL)) goto deoptimize; + } + // _CHECK_STACK_SPACE_4 + { + PyObject *callable; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + callable = stack_pointer[-2 - oparg]; + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) goto deoptimize; + if (tstate->py_recursion_remaining <= 1) goto deoptimize; + } + // _INIT_CALL_PY_EXACT_ARGS_4 + { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + } + break; + } + case _SAVE_RETURN_OFFSET__PUSH_FRAME: { // _SAVE_RETURN_OFFSET { diff --git a/Python/optimizer.c b/Python/optimizer.c index acd6d52c4a885f..599b6b66e6e1dc 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1018,21 +1018,6 @@ uop_optimize( } } assert(err == 1); - /* Fix up */ - for (int pc = 0; pc < UOP_MAX_TRACE_LENGTH; pc++) { - int opcode = buffer[pc].opcode; - int oparg = buffer[pc].oparg; - if (_PyUop_Flags[opcode] & HAS_OPARG_AND_1_FLAG) { - buffer[pc].opcode = opcode + 1 + (oparg & 1); - } - else if (oparg < _PyUop_Replication[opcode]) { - buffer[pc].opcode = opcode + oparg + 1; - } - else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { - break; - } - assert(_PyOpcode_uop_name[buffer[pc].opcode]); - } _PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies); if (executor == NULL) { return -1; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 35a48b07d22a70..5767b804f8c551 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -496,6 +496,26 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s } } +static void +replicate_and_split(_PyUOpInstruction *buffer, int buffer_size) +{ + /* Fix up */ + for (int pc = 0; pc < buffer_size; pc++) { + int opcode = buffer[pc].opcode; + int oparg = buffer[pc].oparg; + if (_PyUop_Flags[opcode] & HAS_OPARG_AND_1_FLAG) { + buffer[pc].opcode = opcode + 1 + (oparg & 1); + } + else if (oparg < _PyUop_Replication[opcode]) { + buffer[pc].opcode = opcode + oparg + 1; + } + else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { + break; + } + assert(_PyOpcode_uop_name[buffer[pc].opcode]); + } +} + static void match_supers(_PyUOpInstruction *buffer, int buffer_size) { @@ -544,6 +564,7 @@ _Py_uop_analyze_and_optimize( assert(err == 1); remove_unneeded_uops(buffer, buffer_size); + replicate_and_split(buffer, buffer_size); match_supers(buffer, buffer_size); OPT_STAT_INC(optimizer_successes); diff --git a/Python/uop_super_matcher_cases.c.h b/Python/uop_super_matcher_cases.c.h index 9304f50f82ebc2..e4a8f38c112063 100644 --- a/Python/uop_super_matcher_cases.c.h +++ b/Python/uop_super_matcher_cases.c.h @@ -80,6 +80,41 @@ this_instr += 5; break; } + if ((this_instr[1].opcode == _INIT_CALL_BOUND_METHOD_EXACT_ARGS) && (this_instr[2].opcode == _CHECK_FUNCTION_EXACT_ARGS_0) && (this_instr[3].opcode == _CHECK_STACK_SPACE_0) && (this_instr[4].opcode == _INIT_CALL_PY_EXACT_ARGS_0)) { + DPRINTF(2, "Inserting super _CALL_BOUND_METHOD_EXACT_ARGS_0\n"); + REPLACE_OP(this_instr, _CALL_BOUND_METHOD_EXACT_ARGS_0, this_instr[1].oparg, this_instr[2].operand); + for (int i = 1; i < 5; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 5; + break; + } + if ((this_instr[1].opcode == _INIT_CALL_BOUND_METHOD_EXACT_ARGS) && (this_instr[2].opcode == _CHECK_FUNCTION_EXACT_ARGS_1) && (this_instr[3].opcode == _CHECK_STACK_SPACE_1) && (this_instr[4].opcode == _INIT_CALL_PY_EXACT_ARGS_1)) { + DPRINTF(2, "Inserting super _CALL_BOUND_METHOD_EXACT_ARGS_1\n"); + REPLACE_OP(this_instr, _CALL_BOUND_METHOD_EXACT_ARGS_1, this_instr[1].oparg, this_instr[2].operand); + for (int i = 1; i < 5; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 5; + break; + } + if ((this_instr[1].opcode == _INIT_CALL_BOUND_METHOD_EXACT_ARGS) && (this_instr[2].opcode == _CHECK_FUNCTION_EXACT_ARGS_2) && (this_instr[3].opcode == _CHECK_STACK_SPACE_2) && (this_instr[4].opcode == _INIT_CALL_PY_EXACT_ARGS_2)) { + DPRINTF(2, "Inserting super _CALL_BOUND_METHOD_EXACT_ARGS_2\n"); + REPLACE_OP(this_instr, _CALL_BOUND_METHOD_EXACT_ARGS_2, this_instr[1].oparg, this_instr[2].operand); + for (int i = 1; i < 5; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 5; + break; + } + if ((this_instr[1].opcode == _INIT_CALL_BOUND_METHOD_EXACT_ARGS) && (this_instr[2].opcode == _CHECK_FUNCTION_EXACT_ARGS_3) && (this_instr[3].opcode == _CHECK_STACK_SPACE_3) && (this_instr[4].opcode == _INIT_CALL_PY_EXACT_ARGS_3)) { + DPRINTF(2, "Inserting super _CALL_BOUND_METHOD_EXACT_ARGS_3\n"); + REPLACE_OP(this_instr, _CALL_BOUND_METHOD_EXACT_ARGS_3, this_instr[1].oparg, this_instr[2].operand); + for (int i = 1; i < 5; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 5; + break; + } + if ((this_instr[1].opcode == _INIT_CALL_BOUND_METHOD_EXACT_ARGS) && (this_instr[2].opcode == _CHECK_FUNCTION_EXACT_ARGS_4) && (this_instr[3].opcode == _CHECK_STACK_SPACE_4) && (this_instr[4].opcode == _INIT_CALL_PY_EXACT_ARGS_4)) { + DPRINTF(2, "Inserting super _CALL_BOUND_METHOD_EXACT_ARGS_4\n"); + REPLACE_OP(this_instr, _CALL_BOUND_METHOD_EXACT_ARGS_4, this_instr[1].oparg, this_instr[2].operand); + for (int i = 1; i < 5; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 5; + break; + } this_instr += 1; break; } @@ -96,6 +131,66 @@ break; } + case _CHECK_FUNCTION_EXACT_ARGS_0: { + if ((this_instr[1].opcode == _CHECK_STACK_SPACE_0) && (this_instr[2].opcode == _INIT_CALL_PY_EXACT_ARGS_0)) { + DPRINTF(2, "Inserting super _CHECK_CALL_PY_EXACT_ARGS_0\n"); + REPLACE_OP(this_instr, _CHECK_CALL_PY_EXACT_ARGS_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 3; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 3; + break; + } + this_instr += 1; + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS_1: { + if ((this_instr[1].opcode == _CHECK_STACK_SPACE_1) && (this_instr[2].opcode == _INIT_CALL_PY_EXACT_ARGS_1)) { + DPRINTF(2, "Inserting super _CHECK_CALL_PY_EXACT_ARGS_1\n"); + REPLACE_OP(this_instr, _CHECK_CALL_PY_EXACT_ARGS_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 3; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 3; + break; + } + this_instr += 1; + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS_2: { + if ((this_instr[1].opcode == _CHECK_STACK_SPACE_2) && (this_instr[2].opcode == _INIT_CALL_PY_EXACT_ARGS_2)) { + DPRINTF(2, "Inserting super _CHECK_CALL_PY_EXACT_ARGS_2\n"); + REPLACE_OP(this_instr, _CHECK_CALL_PY_EXACT_ARGS_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 3; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 3; + break; + } + this_instr += 1; + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS_3: { + if ((this_instr[1].opcode == _CHECK_STACK_SPACE_3) && (this_instr[2].opcode == _INIT_CALL_PY_EXACT_ARGS_3)) { + DPRINTF(2, "Inserting super _CHECK_CALL_PY_EXACT_ARGS_3\n"); + REPLACE_OP(this_instr, _CHECK_CALL_PY_EXACT_ARGS_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 3; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 3; + break; + } + this_instr += 1; + break; + } + + case _CHECK_FUNCTION_EXACT_ARGS_4: { + if ((this_instr[1].opcode == _CHECK_STACK_SPACE_4) && (this_instr[2].opcode == _INIT_CALL_PY_EXACT_ARGS_4)) { + DPRINTF(2, "Inserting super _CHECK_CALL_PY_EXACT_ARGS_4\n"); + REPLACE_OP(this_instr, _CHECK_CALL_PY_EXACT_ARGS_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 3; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 3; + break; + } + this_instr += 1; + break; + } + case _SAVE_RETURN_OFFSET: { if ((this_instr[1].opcode == _PUSH_FRAME)) { DPRINTF(2, "Inserting super _SAVE_RETURN_OFFSET__PUSH_FRAME\n"); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 2c0ee3063f5cb8..d14c2f63e0d454 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -658,9 +658,28 @@ def add_super( continue operand_uses.add(cache_name) if len(operand_uses) > 1: - analysis_error(f"Uop super {super.name}'s cache entry cannot fit in one operand.") + analysis_error(f"Uop super {super.name}'s cache entry cannot fit in one operand.", super.tokens[0]) + add_instruction(super.name, parts, super_uops) + replicates_count = [p.replicated for p in parts if p.replicated != 0] + if len(set(replicates_count)) > 1: + analysis_error(f"Uop super {super.name}'s replicates are not all the same count: {replicates_count}", super.tokens[0]) + + if replicates_count: + replicate_count = replicates_count[0] + for i in range(replicate_count): + res = [] + for part in parts: + if part.replicated > 0: + assert part.replicated == replicate_count + res.append(uops[part.name + f"_{i}"]) + else: + res.append(part) + add_instruction(super.name + f"_{i}", res, super_uops) + + + def add_family( pfamily: parser.Family, instructions: dict[str, Instruction], From 4c47971e296d829535f96cfc5cf1feabec7f0f4b Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 1 Mar 2024 20:58:39 +0800 Subject: [PATCH 09/14] done for LOAD_FAST__LOAD_FAST_REP --- .gitattributes | 1 + Include/internal/pycore_uop_ids.h | 12 +- Include/internal/pycore_uop_metadata.h | 8 + Python/bytecodes.c | 2 + Python/executor_cases.c.h | 208 +++++++++++++++++++++++++ Python/uop_super_matcher_cases.c.h | 96 ++++++++++++ Tools/cases_generator/analyzer.py | 3 +- Tools/cases_generator/lexer.py | 1 + Tools/cases_generator/parsing.py | 6 +- 9 files changed, 333 insertions(+), 4 deletions(-) diff --git a/.gitattributes b/.gitattributes index 4f82cea7480cfe..b9aea3a25bed48 100644 --- a/.gitattributes +++ b/.gitattributes @@ -96,6 +96,7 @@ Python/Python-ast.c generated Python/executor_cases.c.h generated Python/generated_cases.c.h generated Python/optimizer_cases.c.h generated +Python/uop_super_matcher_cases.c.h generated Python/opcode_targets.h generated Python/stdlib_module_names.h generated Tools/peg_generator/pegen/grammar_parser.py generated diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index c1369d0d87f331..731f1660fcc1e9 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -297,8 +297,16 @@ extern "C" { #define _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT 450 #define _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT 451 #define _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE 452 -#define _SAVE_RETURN_OFFSET__PUSH_FRAME 453 -#define MAX_UOP_ID 453 +#define _LOAD_FAST__LOAD_FAST_0 453 +#define _LOAD_FAST__LOAD_FAST_1 454 +#define _LOAD_FAST__LOAD_FAST_2 455 +#define _LOAD_FAST__LOAD_FAST_3 456 +#define _LOAD_FAST__LOAD_FAST_4 457 +#define _LOAD_FAST__LOAD_FAST_5 458 +#define _LOAD_FAST__LOAD_FAST_6 459 +#define _LOAD_FAST__LOAD_FAST_7 460 +#define _SAVE_RETURN_OFFSET__PUSH_FRAME 461 +#define MAX_UOP_ID 461 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 9998f032955987..df95d1e59fb248 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -517,6 +517,14 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT] = "_GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT", [_GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT] = "_GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT", [_GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE] = "_GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE", + [_LOAD_FAST__LOAD_FAST_0] = "_LOAD_FAST__LOAD_FAST_0", + [_LOAD_FAST__LOAD_FAST_1] = "_LOAD_FAST__LOAD_FAST_1", + [_LOAD_FAST__LOAD_FAST_2] = "_LOAD_FAST__LOAD_FAST_2", + [_LOAD_FAST__LOAD_FAST_3] = "_LOAD_FAST__LOAD_FAST_3", + [_LOAD_FAST__LOAD_FAST_4] = "_LOAD_FAST__LOAD_FAST_4", + [_LOAD_FAST__LOAD_FAST_5] = "_LOAD_FAST__LOAD_FAST_5", + [_LOAD_FAST__LOAD_FAST_6] = "_LOAD_FAST__LOAD_FAST_6", + [_LOAD_FAST__LOAD_FAST_7] = "_LOAD_FAST__LOAD_FAST_7", [_SAVE_RETURN_OFFSET__PUSH_FRAME] = "_SAVE_RETURN_OFFSET__PUSH_FRAME", }; #endif // NEED_OPCODE_METADATA diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b0b9ba43cf815a..a627a06b2b60a5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -229,6 +229,8 @@ dummy_func( Py_INCREF(value2); } + replicate_only super(_LOAD_FAST__LOAD_FAST) = LOAD_FAST + LOAD_FAST; + pure inst(LOAD_CONST, (-- value)) { value = GETITEM(FRAME_CO_CONSTS, oparg); Py_INCREF(value); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 47d1586cbdf23b..6de69e85d93567 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4013,6 +4013,214 @@ break; } + case _LOAD_FAST__LOAD_FAST_0: { + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case _LOAD_FAST__LOAD_FAST_1: { + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case _LOAD_FAST__LOAD_FAST_2: { + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case _LOAD_FAST__LOAD_FAST_3: { + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case _LOAD_FAST__LOAD_FAST_4: { + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case _LOAD_FAST__LOAD_FAST_5: { + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case _LOAD_FAST__LOAD_FAST_6: { + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case _LOAD_FAST__LOAD_FAST_7: { + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + case _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT: { // _GUARD_BOTH_INT { diff --git a/Python/uop_super_matcher_cases.c.h b/Python/uop_super_matcher_cases.c.h index e4a8f38c112063..4563496d8ad642 100644 --- a/Python/uop_super_matcher_cases.c.h +++ b/Python/uop_super_matcher_cases.c.h @@ -8,6 +8,102 @@ #endif #define TIER_TWO 2 + case _LOAD_FAST_0: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _LOAD_FAST_1: { + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _LOAD_FAST_2: { + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _LOAD_FAST_3: { + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _LOAD_FAST_4: { + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _LOAD_FAST_5: { + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _LOAD_FAST_6: { + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _LOAD_FAST_7: { + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + case _GUARD_BOTH_INT: { if ((this_instr[1].opcode == _BINARY_OP_MULTIPLY_INT)) { DPRINTF(2, "Inserting super _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT\n"); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index d14c2f63e0d454..47b83b633e75cb 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -660,7 +660,8 @@ def add_super( if len(operand_uses) > 1: analysis_error(f"Uop super {super.name}'s cache entry cannot fit in one operand.", super.tokens[0]) - add_instruction(super.name, parts, super_uops) + if not "replicate_only" in super.annotations: + add_instruction(super.name, parts, super_uops) replicates_count = [p.replicated for p in parts if p.replicated != 0] if len(set(replicates_count)) > 1: diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index 8e73bb0c377832..9aa25cb2892bd2 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -227,6 +227,7 @@ def choice(*opts: str) -> str: "pure", "split", "replicate", + "replicate_only", "tier1", "tier2", } diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index c6286d3630c50e..36a24fcf8d39f9 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -138,6 +138,7 @@ class Family(Node): @dataclass class Super(Node): name: str + annotations: list[str] uops: list[UOp] @@ -342,6 +343,9 @@ def macro_def(self) -> Macro | None: @contextual def super_def(self) -> Super | None: + annotations = [] + while anno := self.expect(lx.ANNOTATION): + annotations.append(anno.text) if tkn := self.expect(lx.SUPER): if self.expect(lx.LPAREN): if tkn := self.expect(lx.IDENTIFIER): @@ -349,7 +353,7 @@ def super_def(self) -> Super | None: if self.expect(lx.EQUALS): if uops := self.uops(): self.require(lx.SEMI) - res = Super(tkn.text, uops) + res = Super(tkn.text, annotations, uops) return res return None From 06b2c7d24011c3980b4e262ea6b30c7445b72904 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 1 Mar 2024 21:27:13 +0800 Subject: [PATCH 10/14] Add tests --- Lib/test/test_generated_cases.py | 182 ++++++++++++++++++++++++++++++- 1 file changed, 181 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 32c2c2fca05c4e..7fcbb5703eeb46 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -29,11 +29,12 @@ def skip_if_different_mount_drives(): test_tools.skip_if_missing("cases_generator") with test_tools.imports_under_tool("cases_generator"): - from analyzer import StackItem + from analyzer import StackItem, analyze_files import parser from stack import Stack import tier1_generator import optimizer_generator + import tier2_super_matcher_generator def handle_stderr(): @@ -813,6 +814,20 @@ def test_deopt_and_exit(self): with self.assertRaises(Exception): self.run_cases_test(input, output) + def test_super(self): + input = """ + op(OP1, (arg1 -- out)) { + FOO(); + } + op(OP2, (arg1 -- out)) { + BAR(); + } + super(_OP1__OP2) = OP1 + OP2; + """ + output = "" + self.run_cases_test(input, output) + + class TestGeneratedAbstractCases(unittest.TestCase): def setUp(self) -> None: super().setUp() @@ -965,5 +980,170 @@ def test_missing_override_failure(self): self.run_cases_test(input, input2, output) +class TestGeneratedSuperMatcher(unittest.TestCase): + def setUp(self) -> None: + super().setUp() + self.maxDiff = None + + self.temp_dir = tempfile.gettempdir() + self.temp_input_filename = os.path.join(self.temp_dir, "input.txt") + self.temp_output_filename = os.path.join(self.temp_dir, "output.txt") + self.temp_metadata_filename = os.path.join(self.temp_dir, "metadata.txt") + self.temp_pymetadata_filename = os.path.join(self.temp_dir, "pymetadata.txt") + self.temp_executor_filename = os.path.join(self.temp_dir, "executor.txt") + + def tearDown(self) -> None: + for filename in [ + self.temp_input_filename, + self.temp_output_filename, + self.temp_metadata_filename, + self.temp_pymetadata_filename, + self.temp_executor_filename, + ]: + try: + os.remove(filename) + except: + pass + super().tearDown() + + def run_cases_test(self, input: str, expected: str): + with open(self.temp_input_filename, "w+") as temp_input: + temp_input.write(parser.BEGIN_MARKER) + temp_input.write(input) + temp_input.write(parser.END_MARKER) + temp_input.flush() + + with handle_stderr(), open(self.temp_output_filename, "w+") as temp_output: + data = analyze_files([self.temp_input_filename]) + tier2_super_matcher_generator.generate_tier2( + [self.temp_input_filename], data, temp_output, False + ) + + with open(self.temp_output_filename) as temp_output: + lines = temp_output.readlines() + while lines and lines[0].startswith(("// ", "#", " #", "\n")): + lines.pop(0) + while lines and lines[-1].startswith(("#", "\n")): + lines.pop(-1) + actual = "".join(lines) + + # if actual.strip() != expected.strip(): + # print("Actual:") + # print(actual) + # print("Expected:") + # print(expected) + # print("End") + self.assertEqual(actual.strip(), expected.strip()) + + def test_super_basic(self): + input = """ + op(OP1, (arg1 -- out)) { + FOO(); + } + op(OP2, (arg1 -- out)) { + BAR(); + } + super(_OP1__OP2) = OP1 + OP2; + """ + output = """ + case OP1: { + if ((this_instr[1].opcode == OP2)) { + DPRINTF(2, "Inserting super _OP1__OP2\\n"); + REPLACE_OP(this_instr, _OP1__OP2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + """ + self.run_cases_test(input, output) + + def test_super_replicate(self): + input = """ + op(OP1, (arg1 -- out)) { + FOO(); + } + replicate(3) op(OP2, (arg1 -- out)) { + BAR(); + } + super(_OP1__OP2) = OP1 + OP2; + """ + output = """ + case OP1: { + if ((this_instr[1].opcode == OP2)) { + DPRINTF(2, "Inserting super _OP1__OP2\\n"); + REPLACE_OP(this_instr, _OP1__OP2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == OP2_0)) { + DPRINTF(2, "Inserting super _OP1__OP2_0\\n"); + REPLACE_OP(this_instr, _OP1__OP2_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == OP2_1)) { + DPRINTF(2, "Inserting super _OP1__OP2_1\\n"); + REPLACE_OP(this_instr, _OP1__OP2_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == OP2_2)) { + DPRINTF(2, "Inserting super _OP1__OP2_2\\n"); + REPLACE_OP(this_instr, _OP1__OP2_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + """ + self.run_cases_test(input, output) + + def test_super_replicate_only(self): + input = """ + op(OP1, (arg1 -- out)) { + FOO(); + } + replicate(3) op(OP2, (arg1 -- out)) { + BAR(); + } + replicate_only super(_OP1__OP2) = OP1 + OP2; + """ + output = """ + case OP1: { + if ((this_instr[1].opcode == OP2_0)) { + DPRINTF(2, "Inserting super _OP1__OP2_0\\n"); + REPLACE_OP(this_instr, _OP1__OP2_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == OP2_1)) { + DPRINTF(2, "Inserting super _OP1__OP2_1\\n"); + REPLACE_OP(this_instr, _OP1__OP2_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == OP2_2)) { + DPRINTF(2, "Inserting super _OP1__OP2_2\\n"); + REPLACE_OP(this_instr, _OP1__OP2_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + """ + self.run_cases_test(input, output) + if __name__ == "__main__": unittest.main() From 0d1a21386ef63ff795da5847c9d592211eba3510 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 1 Mar 2024 21:39:30 +0800 Subject: [PATCH 11/14] clean up diff --- Lib/test/test_generated_cases.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 7fcbb5703eeb46..1f6d6901430655 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1056,7 +1056,7 @@ def test_super_basic(self): } this_instr += 1; break; - } + } """ self.run_cases_test(input, output) @@ -1102,7 +1102,7 @@ def test_super_replicate(self): } this_instr += 1; break; - } + } """ self.run_cases_test(input, output) @@ -1141,7 +1141,7 @@ def test_super_replicate_only(self): } this_instr += 1; break; - } + } """ self.run_cases_test(input, output) From d4a125a9f655d7a740f34c792fdc7b32859fc590 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 1 Mar 2024 13:40:41 +0000 Subject: [PATCH 12/14] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-03-01-13-40-39.gh-issue-116202.Cmq-PB.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-03-01-13-40-39.gh-issue-116202.Cmq-PB.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-03-01-13-40-39.gh-issue-116202.Cmq-PB.rst b/Misc/NEWS.d/next/Core and Builtins/2024-03-01-13-40-39.gh-issue-116202.Cmq-PB.rst new file mode 100644 index 00000000000000..e843b4a8337213 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-03-01-13-40-39.gh-issue-116202.Cmq-PB.rst @@ -0,0 +1 @@ +Add tier 2 super uops. From aa302a596843e69695b71bf15be98a5bbf6d9171 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 1 Mar 2024 22:59:21 +0800 Subject: [PATCH 13/14] cross product replicate only --- Include/internal/pycore_uop_ids.h | 140 +- Include/internal/pycore_uop_metadata.h | 136 +- Lib/test/test_capi/test_opt.py | 2 +- Lib/test/test_generated_cases.py | 81 +- Python/bytecodes.c | 2 + Python/executor_cases.c.h | 2808 +++++++++++++++++++++- Python/uop_super_matcher_cases.c.h | 912 ++++++- Tools/cases_generator/analyzer.py | 50 +- Tools/cases_generator/tier2_generator.py | 7 +- Tools/jit/_writer.py | 2 +- 10 files changed, 4044 insertions(+), 96 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 731f1660fcc1e9..3ccf982009f61b 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -297,16 +297,136 @@ extern "C" { #define _GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT 450 #define _GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT 451 #define _GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE 452 -#define _LOAD_FAST__LOAD_FAST_0 453 -#define _LOAD_FAST__LOAD_FAST_1 454 -#define _LOAD_FAST__LOAD_FAST_2 455 -#define _LOAD_FAST__LOAD_FAST_3 456 -#define _LOAD_FAST__LOAD_FAST_4 457 -#define _LOAD_FAST__LOAD_FAST_5 458 -#define _LOAD_FAST__LOAD_FAST_6 459 -#define _LOAD_FAST__LOAD_FAST_7 460 -#define _SAVE_RETURN_OFFSET__PUSH_FRAME 461 -#define MAX_UOP_ID 461 +#define _SAVE_RETURN_OFFSET__PUSH_FRAME 453 +#define __LOAD_FAST_0__LOAD_FAST_0 454 +#define __LOAD_FAST_0__LOAD_FAST_1 455 +#define __LOAD_FAST_0__LOAD_FAST_2 456 +#define __LOAD_FAST_0__LOAD_FAST_3 457 +#define __LOAD_FAST_0__LOAD_FAST_4 458 +#define __LOAD_FAST_0__LOAD_FAST_5 459 +#define __LOAD_FAST_0__LOAD_FAST_6 460 +#define __LOAD_FAST_0__LOAD_FAST_7 461 +#define __LOAD_FAST_1__LOAD_FAST_0 462 +#define __LOAD_FAST_1__LOAD_FAST_1 463 +#define __LOAD_FAST_1__LOAD_FAST_2 464 +#define __LOAD_FAST_1__LOAD_FAST_3 465 +#define __LOAD_FAST_1__LOAD_FAST_4 466 +#define __LOAD_FAST_1__LOAD_FAST_5 467 +#define __LOAD_FAST_1__LOAD_FAST_6 468 +#define __LOAD_FAST_1__LOAD_FAST_7 469 +#define __LOAD_FAST_2__LOAD_FAST_0 470 +#define __LOAD_FAST_2__LOAD_FAST_1 471 +#define __LOAD_FAST_2__LOAD_FAST_2 472 +#define __LOAD_FAST_2__LOAD_FAST_3 473 +#define __LOAD_FAST_2__LOAD_FAST_4 474 +#define __LOAD_FAST_2__LOAD_FAST_5 475 +#define __LOAD_FAST_2__LOAD_FAST_6 476 +#define __LOAD_FAST_2__LOAD_FAST_7 477 +#define __LOAD_FAST_3__LOAD_FAST_0 478 +#define __LOAD_FAST_3__LOAD_FAST_1 479 +#define __LOAD_FAST_3__LOAD_FAST_2 480 +#define __LOAD_FAST_3__LOAD_FAST_3 481 +#define __LOAD_FAST_3__LOAD_FAST_4 482 +#define __LOAD_FAST_3__LOAD_FAST_5 483 +#define __LOAD_FAST_3__LOAD_FAST_6 484 +#define __LOAD_FAST_3__LOAD_FAST_7 485 +#define __LOAD_FAST_4__LOAD_FAST_0 486 +#define __LOAD_FAST_4__LOAD_FAST_1 487 +#define __LOAD_FAST_4__LOAD_FAST_2 488 +#define __LOAD_FAST_4__LOAD_FAST_3 489 +#define __LOAD_FAST_4__LOAD_FAST_4 490 +#define __LOAD_FAST_4__LOAD_FAST_5 491 +#define __LOAD_FAST_4__LOAD_FAST_6 492 +#define __LOAD_FAST_4__LOAD_FAST_7 493 +#define __LOAD_FAST_5__LOAD_FAST_0 494 +#define __LOAD_FAST_5__LOAD_FAST_1 495 +#define __LOAD_FAST_5__LOAD_FAST_2 496 +#define __LOAD_FAST_5__LOAD_FAST_3 497 +#define __LOAD_FAST_5__LOAD_FAST_4 498 +#define __LOAD_FAST_5__LOAD_FAST_5 499 +#define __LOAD_FAST_5__LOAD_FAST_6 500 +#define __LOAD_FAST_5__LOAD_FAST_7 501 +#define __LOAD_FAST_6__LOAD_FAST_0 502 +#define __LOAD_FAST_6__LOAD_FAST_1 503 +#define __LOAD_FAST_6__LOAD_FAST_2 504 +#define __LOAD_FAST_6__LOAD_FAST_3 505 +#define __LOAD_FAST_6__LOAD_FAST_4 506 +#define __LOAD_FAST_6__LOAD_FAST_5 507 +#define __LOAD_FAST_6__LOAD_FAST_6 508 +#define __LOAD_FAST_6__LOAD_FAST_7 509 +#define __LOAD_FAST_7__LOAD_FAST_0 510 +#define __LOAD_FAST_7__LOAD_FAST_1 511 +#define __LOAD_FAST_7__LOAD_FAST_2 512 +#define __LOAD_FAST_7__LOAD_FAST_3 513 +#define __LOAD_FAST_7__LOAD_FAST_4 514 +#define __LOAD_FAST_7__LOAD_FAST_5 515 +#define __LOAD_FAST_7__LOAD_FAST_6 516 +#define __LOAD_FAST_7__LOAD_FAST_7 517 +#define __STORE_FAST_0__LOAD_FAST_0 518 +#define __STORE_FAST_0__LOAD_FAST_1 519 +#define __STORE_FAST_0__LOAD_FAST_2 520 +#define __STORE_FAST_0__LOAD_FAST_3 521 +#define __STORE_FAST_0__LOAD_FAST_4 522 +#define __STORE_FAST_0__LOAD_FAST_5 523 +#define __STORE_FAST_0__LOAD_FAST_6 524 +#define __STORE_FAST_0__LOAD_FAST_7 525 +#define __STORE_FAST_1__LOAD_FAST_0 526 +#define __STORE_FAST_1__LOAD_FAST_1 527 +#define __STORE_FAST_1__LOAD_FAST_2 528 +#define __STORE_FAST_1__LOAD_FAST_3 529 +#define __STORE_FAST_1__LOAD_FAST_4 530 +#define __STORE_FAST_1__LOAD_FAST_5 531 +#define __STORE_FAST_1__LOAD_FAST_6 532 +#define __STORE_FAST_1__LOAD_FAST_7 533 +#define __STORE_FAST_2__LOAD_FAST_0 534 +#define __STORE_FAST_2__LOAD_FAST_1 535 +#define __STORE_FAST_2__LOAD_FAST_2 536 +#define __STORE_FAST_2__LOAD_FAST_3 537 +#define __STORE_FAST_2__LOAD_FAST_4 538 +#define __STORE_FAST_2__LOAD_FAST_5 539 +#define __STORE_FAST_2__LOAD_FAST_6 540 +#define __STORE_FAST_2__LOAD_FAST_7 541 +#define __STORE_FAST_3__LOAD_FAST_0 542 +#define __STORE_FAST_3__LOAD_FAST_1 543 +#define __STORE_FAST_3__LOAD_FAST_2 544 +#define __STORE_FAST_3__LOAD_FAST_3 545 +#define __STORE_FAST_3__LOAD_FAST_4 546 +#define __STORE_FAST_3__LOAD_FAST_5 547 +#define __STORE_FAST_3__LOAD_FAST_6 548 +#define __STORE_FAST_3__LOAD_FAST_7 549 +#define __STORE_FAST_4__LOAD_FAST_0 550 +#define __STORE_FAST_4__LOAD_FAST_1 551 +#define __STORE_FAST_4__LOAD_FAST_2 552 +#define __STORE_FAST_4__LOAD_FAST_3 553 +#define __STORE_FAST_4__LOAD_FAST_4 554 +#define __STORE_FAST_4__LOAD_FAST_5 555 +#define __STORE_FAST_4__LOAD_FAST_6 556 +#define __STORE_FAST_4__LOAD_FAST_7 557 +#define __STORE_FAST_5__LOAD_FAST_0 558 +#define __STORE_FAST_5__LOAD_FAST_1 559 +#define __STORE_FAST_5__LOAD_FAST_2 560 +#define __STORE_FAST_5__LOAD_FAST_3 561 +#define __STORE_FAST_5__LOAD_FAST_4 562 +#define __STORE_FAST_5__LOAD_FAST_5 563 +#define __STORE_FAST_5__LOAD_FAST_6 564 +#define __STORE_FAST_5__LOAD_FAST_7 565 +#define __STORE_FAST_6__LOAD_FAST_0 566 +#define __STORE_FAST_6__LOAD_FAST_1 567 +#define __STORE_FAST_6__LOAD_FAST_2 568 +#define __STORE_FAST_6__LOAD_FAST_3 569 +#define __STORE_FAST_6__LOAD_FAST_4 570 +#define __STORE_FAST_6__LOAD_FAST_5 571 +#define __STORE_FAST_6__LOAD_FAST_6 572 +#define __STORE_FAST_6__LOAD_FAST_7 573 +#define __STORE_FAST_7__LOAD_FAST_0 574 +#define __STORE_FAST_7__LOAD_FAST_1 575 +#define __STORE_FAST_7__LOAD_FAST_2 576 +#define __STORE_FAST_7__LOAD_FAST_3 577 +#define __STORE_FAST_7__LOAD_FAST_4 578 +#define __STORE_FAST_7__LOAD_FAST_5 579 +#define __STORE_FAST_7__LOAD_FAST_6 580 +#define __STORE_FAST_7__LOAD_FAST_7 581 +#define MAX_UOP_ID 581 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index df95d1e59fb248..507a57b332ee1d 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -517,15 +517,135 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT] = "_GUARD_BOTH_INT__BINARY_OP_MULTIPLY_INT", [_GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT] = "_GUARD_BOTH_INT__BINARY_OP_SUBTRACT_INT", [_GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE] = "_GUARD_BOTH_UNICODE__BINARY_OP_ADD_UNICODE", - [_LOAD_FAST__LOAD_FAST_0] = "_LOAD_FAST__LOAD_FAST_0", - [_LOAD_FAST__LOAD_FAST_1] = "_LOAD_FAST__LOAD_FAST_1", - [_LOAD_FAST__LOAD_FAST_2] = "_LOAD_FAST__LOAD_FAST_2", - [_LOAD_FAST__LOAD_FAST_3] = "_LOAD_FAST__LOAD_FAST_3", - [_LOAD_FAST__LOAD_FAST_4] = "_LOAD_FAST__LOAD_FAST_4", - [_LOAD_FAST__LOAD_FAST_5] = "_LOAD_FAST__LOAD_FAST_5", - [_LOAD_FAST__LOAD_FAST_6] = "_LOAD_FAST__LOAD_FAST_6", - [_LOAD_FAST__LOAD_FAST_7] = "_LOAD_FAST__LOAD_FAST_7", [_SAVE_RETURN_OFFSET__PUSH_FRAME] = "_SAVE_RETURN_OFFSET__PUSH_FRAME", + [__LOAD_FAST_0__LOAD_FAST_0] = "__LOAD_FAST_0__LOAD_FAST_0", + [__LOAD_FAST_0__LOAD_FAST_1] = "__LOAD_FAST_0__LOAD_FAST_1", + [__LOAD_FAST_0__LOAD_FAST_2] = "__LOAD_FAST_0__LOAD_FAST_2", + [__LOAD_FAST_0__LOAD_FAST_3] = "__LOAD_FAST_0__LOAD_FAST_3", + [__LOAD_FAST_0__LOAD_FAST_4] = "__LOAD_FAST_0__LOAD_FAST_4", + [__LOAD_FAST_0__LOAD_FAST_5] = "__LOAD_FAST_0__LOAD_FAST_5", + [__LOAD_FAST_0__LOAD_FAST_6] = "__LOAD_FAST_0__LOAD_FAST_6", + [__LOAD_FAST_0__LOAD_FAST_7] = "__LOAD_FAST_0__LOAD_FAST_7", + [__LOAD_FAST_1__LOAD_FAST_0] = "__LOAD_FAST_1__LOAD_FAST_0", + [__LOAD_FAST_1__LOAD_FAST_1] = "__LOAD_FAST_1__LOAD_FAST_1", + [__LOAD_FAST_1__LOAD_FAST_2] = "__LOAD_FAST_1__LOAD_FAST_2", + [__LOAD_FAST_1__LOAD_FAST_3] = "__LOAD_FAST_1__LOAD_FAST_3", + [__LOAD_FAST_1__LOAD_FAST_4] = "__LOAD_FAST_1__LOAD_FAST_4", + [__LOAD_FAST_1__LOAD_FAST_5] = "__LOAD_FAST_1__LOAD_FAST_5", + [__LOAD_FAST_1__LOAD_FAST_6] = "__LOAD_FAST_1__LOAD_FAST_6", + [__LOAD_FAST_1__LOAD_FAST_7] = "__LOAD_FAST_1__LOAD_FAST_7", + [__LOAD_FAST_2__LOAD_FAST_0] = "__LOAD_FAST_2__LOAD_FAST_0", + [__LOAD_FAST_2__LOAD_FAST_1] = "__LOAD_FAST_2__LOAD_FAST_1", + [__LOAD_FAST_2__LOAD_FAST_2] = "__LOAD_FAST_2__LOAD_FAST_2", + [__LOAD_FAST_2__LOAD_FAST_3] = "__LOAD_FAST_2__LOAD_FAST_3", + [__LOAD_FAST_2__LOAD_FAST_4] = "__LOAD_FAST_2__LOAD_FAST_4", + [__LOAD_FAST_2__LOAD_FAST_5] = "__LOAD_FAST_2__LOAD_FAST_5", + [__LOAD_FAST_2__LOAD_FAST_6] = "__LOAD_FAST_2__LOAD_FAST_6", + [__LOAD_FAST_2__LOAD_FAST_7] = "__LOAD_FAST_2__LOAD_FAST_7", + [__LOAD_FAST_3__LOAD_FAST_0] = "__LOAD_FAST_3__LOAD_FAST_0", + [__LOAD_FAST_3__LOAD_FAST_1] = "__LOAD_FAST_3__LOAD_FAST_1", + [__LOAD_FAST_3__LOAD_FAST_2] = "__LOAD_FAST_3__LOAD_FAST_2", + [__LOAD_FAST_3__LOAD_FAST_3] = "__LOAD_FAST_3__LOAD_FAST_3", + [__LOAD_FAST_3__LOAD_FAST_4] = "__LOAD_FAST_3__LOAD_FAST_4", + [__LOAD_FAST_3__LOAD_FAST_5] = "__LOAD_FAST_3__LOAD_FAST_5", + [__LOAD_FAST_3__LOAD_FAST_6] = "__LOAD_FAST_3__LOAD_FAST_6", + [__LOAD_FAST_3__LOAD_FAST_7] = "__LOAD_FAST_3__LOAD_FAST_7", + [__LOAD_FAST_4__LOAD_FAST_0] = "__LOAD_FAST_4__LOAD_FAST_0", + [__LOAD_FAST_4__LOAD_FAST_1] = "__LOAD_FAST_4__LOAD_FAST_1", + [__LOAD_FAST_4__LOAD_FAST_2] = "__LOAD_FAST_4__LOAD_FAST_2", + [__LOAD_FAST_4__LOAD_FAST_3] = "__LOAD_FAST_4__LOAD_FAST_3", + [__LOAD_FAST_4__LOAD_FAST_4] = "__LOAD_FAST_4__LOAD_FAST_4", + [__LOAD_FAST_4__LOAD_FAST_5] = "__LOAD_FAST_4__LOAD_FAST_5", + [__LOAD_FAST_4__LOAD_FAST_6] = "__LOAD_FAST_4__LOAD_FAST_6", + [__LOAD_FAST_4__LOAD_FAST_7] = "__LOAD_FAST_4__LOAD_FAST_7", + [__LOAD_FAST_5__LOAD_FAST_0] = "__LOAD_FAST_5__LOAD_FAST_0", + [__LOAD_FAST_5__LOAD_FAST_1] = "__LOAD_FAST_5__LOAD_FAST_1", + [__LOAD_FAST_5__LOAD_FAST_2] = "__LOAD_FAST_5__LOAD_FAST_2", + [__LOAD_FAST_5__LOAD_FAST_3] = "__LOAD_FAST_5__LOAD_FAST_3", + [__LOAD_FAST_5__LOAD_FAST_4] = "__LOAD_FAST_5__LOAD_FAST_4", + [__LOAD_FAST_5__LOAD_FAST_5] = "__LOAD_FAST_5__LOAD_FAST_5", + [__LOAD_FAST_5__LOAD_FAST_6] = "__LOAD_FAST_5__LOAD_FAST_6", + [__LOAD_FAST_5__LOAD_FAST_7] = "__LOAD_FAST_5__LOAD_FAST_7", + [__LOAD_FAST_6__LOAD_FAST_0] = "__LOAD_FAST_6__LOAD_FAST_0", + [__LOAD_FAST_6__LOAD_FAST_1] = "__LOAD_FAST_6__LOAD_FAST_1", + [__LOAD_FAST_6__LOAD_FAST_2] = "__LOAD_FAST_6__LOAD_FAST_2", + [__LOAD_FAST_6__LOAD_FAST_3] = "__LOAD_FAST_6__LOAD_FAST_3", + [__LOAD_FAST_6__LOAD_FAST_4] = "__LOAD_FAST_6__LOAD_FAST_4", + [__LOAD_FAST_6__LOAD_FAST_5] = "__LOAD_FAST_6__LOAD_FAST_5", + [__LOAD_FAST_6__LOAD_FAST_6] = "__LOAD_FAST_6__LOAD_FAST_6", + [__LOAD_FAST_6__LOAD_FAST_7] = "__LOAD_FAST_6__LOAD_FAST_7", + [__LOAD_FAST_7__LOAD_FAST_0] = "__LOAD_FAST_7__LOAD_FAST_0", + [__LOAD_FAST_7__LOAD_FAST_1] = "__LOAD_FAST_7__LOAD_FAST_1", + [__LOAD_FAST_7__LOAD_FAST_2] = "__LOAD_FAST_7__LOAD_FAST_2", + [__LOAD_FAST_7__LOAD_FAST_3] = "__LOAD_FAST_7__LOAD_FAST_3", + [__LOAD_FAST_7__LOAD_FAST_4] = "__LOAD_FAST_7__LOAD_FAST_4", + [__LOAD_FAST_7__LOAD_FAST_5] = "__LOAD_FAST_7__LOAD_FAST_5", + [__LOAD_FAST_7__LOAD_FAST_6] = "__LOAD_FAST_7__LOAD_FAST_6", + [__LOAD_FAST_7__LOAD_FAST_7] = "__LOAD_FAST_7__LOAD_FAST_7", + [__STORE_FAST_0__LOAD_FAST_0] = "__STORE_FAST_0__LOAD_FAST_0", + [__STORE_FAST_0__LOAD_FAST_1] = "__STORE_FAST_0__LOAD_FAST_1", + [__STORE_FAST_0__LOAD_FAST_2] = "__STORE_FAST_0__LOAD_FAST_2", + [__STORE_FAST_0__LOAD_FAST_3] = "__STORE_FAST_0__LOAD_FAST_3", + [__STORE_FAST_0__LOAD_FAST_4] = "__STORE_FAST_0__LOAD_FAST_4", + [__STORE_FAST_0__LOAD_FAST_5] = "__STORE_FAST_0__LOAD_FAST_5", + [__STORE_FAST_0__LOAD_FAST_6] = "__STORE_FAST_0__LOAD_FAST_6", + [__STORE_FAST_0__LOAD_FAST_7] = "__STORE_FAST_0__LOAD_FAST_7", + [__STORE_FAST_1__LOAD_FAST_0] = "__STORE_FAST_1__LOAD_FAST_0", + [__STORE_FAST_1__LOAD_FAST_1] = "__STORE_FAST_1__LOAD_FAST_1", + [__STORE_FAST_1__LOAD_FAST_2] = "__STORE_FAST_1__LOAD_FAST_2", + [__STORE_FAST_1__LOAD_FAST_3] = "__STORE_FAST_1__LOAD_FAST_3", + [__STORE_FAST_1__LOAD_FAST_4] = "__STORE_FAST_1__LOAD_FAST_4", + [__STORE_FAST_1__LOAD_FAST_5] = "__STORE_FAST_1__LOAD_FAST_5", + [__STORE_FAST_1__LOAD_FAST_6] = "__STORE_FAST_1__LOAD_FAST_6", + [__STORE_FAST_1__LOAD_FAST_7] = "__STORE_FAST_1__LOAD_FAST_7", + [__STORE_FAST_2__LOAD_FAST_0] = "__STORE_FAST_2__LOAD_FAST_0", + [__STORE_FAST_2__LOAD_FAST_1] = "__STORE_FAST_2__LOAD_FAST_1", + [__STORE_FAST_2__LOAD_FAST_2] = "__STORE_FAST_2__LOAD_FAST_2", + [__STORE_FAST_2__LOAD_FAST_3] = "__STORE_FAST_2__LOAD_FAST_3", + [__STORE_FAST_2__LOAD_FAST_4] = "__STORE_FAST_2__LOAD_FAST_4", + [__STORE_FAST_2__LOAD_FAST_5] = "__STORE_FAST_2__LOAD_FAST_5", + [__STORE_FAST_2__LOAD_FAST_6] = "__STORE_FAST_2__LOAD_FAST_6", + [__STORE_FAST_2__LOAD_FAST_7] = "__STORE_FAST_2__LOAD_FAST_7", + [__STORE_FAST_3__LOAD_FAST_0] = "__STORE_FAST_3__LOAD_FAST_0", + [__STORE_FAST_3__LOAD_FAST_1] = "__STORE_FAST_3__LOAD_FAST_1", + [__STORE_FAST_3__LOAD_FAST_2] = "__STORE_FAST_3__LOAD_FAST_2", + [__STORE_FAST_3__LOAD_FAST_3] = "__STORE_FAST_3__LOAD_FAST_3", + [__STORE_FAST_3__LOAD_FAST_4] = "__STORE_FAST_3__LOAD_FAST_4", + [__STORE_FAST_3__LOAD_FAST_5] = "__STORE_FAST_3__LOAD_FAST_5", + [__STORE_FAST_3__LOAD_FAST_6] = "__STORE_FAST_3__LOAD_FAST_6", + [__STORE_FAST_3__LOAD_FAST_7] = "__STORE_FAST_3__LOAD_FAST_7", + [__STORE_FAST_4__LOAD_FAST_0] = "__STORE_FAST_4__LOAD_FAST_0", + [__STORE_FAST_4__LOAD_FAST_1] = "__STORE_FAST_4__LOAD_FAST_1", + [__STORE_FAST_4__LOAD_FAST_2] = "__STORE_FAST_4__LOAD_FAST_2", + [__STORE_FAST_4__LOAD_FAST_3] = "__STORE_FAST_4__LOAD_FAST_3", + [__STORE_FAST_4__LOAD_FAST_4] = "__STORE_FAST_4__LOAD_FAST_4", + [__STORE_FAST_4__LOAD_FAST_5] = "__STORE_FAST_4__LOAD_FAST_5", + [__STORE_FAST_4__LOAD_FAST_6] = "__STORE_FAST_4__LOAD_FAST_6", + [__STORE_FAST_4__LOAD_FAST_7] = "__STORE_FAST_4__LOAD_FAST_7", + [__STORE_FAST_5__LOAD_FAST_0] = "__STORE_FAST_5__LOAD_FAST_0", + [__STORE_FAST_5__LOAD_FAST_1] = "__STORE_FAST_5__LOAD_FAST_1", + [__STORE_FAST_5__LOAD_FAST_2] = "__STORE_FAST_5__LOAD_FAST_2", + [__STORE_FAST_5__LOAD_FAST_3] = "__STORE_FAST_5__LOAD_FAST_3", + [__STORE_FAST_5__LOAD_FAST_4] = "__STORE_FAST_5__LOAD_FAST_4", + [__STORE_FAST_5__LOAD_FAST_5] = "__STORE_FAST_5__LOAD_FAST_5", + [__STORE_FAST_5__LOAD_FAST_6] = "__STORE_FAST_5__LOAD_FAST_6", + [__STORE_FAST_5__LOAD_FAST_7] = "__STORE_FAST_5__LOAD_FAST_7", + [__STORE_FAST_6__LOAD_FAST_0] = "__STORE_FAST_6__LOAD_FAST_0", + [__STORE_FAST_6__LOAD_FAST_1] = "__STORE_FAST_6__LOAD_FAST_1", + [__STORE_FAST_6__LOAD_FAST_2] = "__STORE_FAST_6__LOAD_FAST_2", + [__STORE_FAST_6__LOAD_FAST_3] = "__STORE_FAST_6__LOAD_FAST_3", + [__STORE_FAST_6__LOAD_FAST_4] = "__STORE_FAST_6__LOAD_FAST_4", + [__STORE_FAST_6__LOAD_FAST_5] = "__STORE_FAST_6__LOAD_FAST_5", + [__STORE_FAST_6__LOAD_FAST_6] = "__STORE_FAST_6__LOAD_FAST_6", + [__STORE_FAST_6__LOAD_FAST_7] = "__STORE_FAST_6__LOAD_FAST_7", + [__STORE_FAST_7__LOAD_FAST_0] = "__STORE_FAST_7__LOAD_FAST_0", + [__STORE_FAST_7__LOAD_FAST_1] = "__STORE_FAST_7__LOAD_FAST_1", + [__STORE_FAST_7__LOAD_FAST_2] = "__STORE_FAST_7__LOAD_FAST_2", + [__STORE_FAST_7__LOAD_FAST_3] = "__STORE_FAST_7__LOAD_FAST_3", + [__STORE_FAST_7__LOAD_FAST_4] = "__STORE_FAST_7__LOAD_FAST_4", + [__STORE_FAST_7__LOAD_FAST_5] = "__STORE_FAST_7__LOAD_FAST_5", + [__STORE_FAST_7__LOAD_FAST_6] = "__STORE_FAST_7__LOAD_FAST_6", + [__STORE_FAST_7__LOAD_FAST_7] = "__STORE_FAST_7__LOAD_FAST_7", }; #endif // NEED_OPCODE_METADATA diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index c6256efcb92640..0b6f42cb7655e6 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -231,7 +231,7 @@ def testfunc(x): self.assertIsNotNone(ex) uops = get_opnames(ex) self.assertIn("_SET_IP", uops) - self.assertIn("_LOAD_FAST_0", uops) + self.assertIn("__LOAD_FAST_1__LOAD_FAST_0", uops) def test_extended_arg(self): "Check EXTENDED_ARG handling in superblock creation" diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 1f6d6901430655..12c38791e88415 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1027,12 +1027,12 @@ def run_cases_test(self, input: str, expected: str): lines.pop(-1) actual = "".join(lines) - # if actual.strip() != expected.strip(): - # print("Actual:") - # print(actual) - # print("Expected:") - # print(expected) - # print("End") + if actual.strip() != expected.strip(): + print("Actual:") + print(actual) + print("Expected:") + print(expected) + print("End") self.assertEqual(actual.strip(), expected.strip()) def test_super_basic(self): @@ -1108,7 +1108,7 @@ def test_super_replicate(self): def test_super_replicate_only(self): input = """ - op(OP1, (arg1 -- out)) { + replicate(3) op(OP1, (arg1 -- out)) { FOO(); } replicate(3) op(OP2, (arg1 -- out)) { @@ -1117,24 +1117,24 @@ def test_super_replicate_only(self): replicate_only super(_OP1__OP2) = OP1 + OP2; """ output = """ - case OP1: { + case OP1_0: { if ((this_instr[1].opcode == OP2_0)) { - DPRINTF(2, "Inserting super _OP1__OP2_0\\n"); - REPLACE_OP(this_instr, _OP1__OP2_0, this_instr[0].oparg, this_instr[0].operand); + DPRINTF(2, "Inserting super _OP1_0_OP2_0\\n"); + REPLACE_OP(this_instr, _OP1_0_OP2_0, this_instr[0].oparg, this_instr[0].operand); for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } this_instr += 2; break; } if ((this_instr[1].opcode == OP2_1)) { - DPRINTF(2, "Inserting super _OP1__OP2_1\\n"); - REPLACE_OP(this_instr, _OP1__OP2_1, this_instr[0].oparg, this_instr[0].operand); + DPRINTF(2, "Inserting super _OP1_0_OP2_1\\n"); + REPLACE_OP(this_instr, _OP1_0_OP2_1, this_instr[0].oparg, this_instr[0].operand); for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } this_instr += 2; break; } if ((this_instr[1].opcode == OP2_2)) { - DPRINTF(2, "Inserting super _OP1__OP2_2\\n"); - REPLACE_OP(this_instr, _OP1__OP2_2, this_instr[0].oparg, this_instr[0].operand); + DPRINTF(2, "Inserting super _OP1_0_OP2_2\\n"); + REPLACE_OP(this_instr, _OP1_0_OP2_2, this_instr[0].oparg, this_instr[0].operand); for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } this_instr += 2; break; @@ -1142,6 +1142,59 @@ def test_super_replicate_only(self): this_instr += 1; break; } + + case OP1_1: { + if ((this_instr[1].opcode == OP2_0)) { + DPRINTF(2, "Inserting super _OP1_1_OP2_0\\n"); + REPLACE_OP(this_instr, _OP1_1_OP2_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == OP2_1)) { + DPRINTF(2, "Inserting super _OP1_1_OP2_1\\n"); + REPLACE_OP(this_instr, _OP1_1_OP2_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == OP2_2)) { + DPRINTF(2, "Inserting super _OP1_1_OP2_2\\n"); + REPLACE_OP(this_instr, _OP1_1_OP2_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case OP1_2: { + if ((this_instr[1].opcode == OP2_0)) { + DPRINTF(2, "Inserting super _OP1_2_OP2_0\\n"); + REPLACE_OP(this_instr, _OP1_2_OP2_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == OP2_1)) { + DPRINTF(2, "Inserting super _OP1_2_OP2_1\\n"); + REPLACE_OP(this_instr, _OP1_2_OP2_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == OP2_2)) { + DPRINTF(2, "Inserting super _OP1_2_OP2_2\\n"); + REPLACE_OP(this_instr, _OP1_2_OP2_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + """ self.run_cases_test(input, output) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a627a06b2b60a5..02927dbea42d45 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -252,6 +252,8 @@ dummy_func( Py_INCREF(value2); } + replicate_only super(_STORE_FAST__LOAD_FAST) = STORE_FAST + LOAD_FAST; + inst(STORE_FAST_STORE_FAST, (value2, value1 --)) { uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 6de69e85d93567..e1df3212c00c47 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4013,12 +4013,11 @@ break; } - case _LOAD_FAST__LOAD_FAST_0: { + case __LOAD_FAST_0__LOAD_FAST_0: { // _LOAD_FAST_0 { PyObject *value; oparg = 0; - assert(oparg == CURRENT_OPARG()); value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4029,7 +4028,6 @@ { PyObject *value; oparg = 0; - assert(oparg == CURRENT_OPARG()); value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4039,12 +4037,11 @@ break; } - case _LOAD_FAST__LOAD_FAST_1: { - // _LOAD_FAST_1 + case __LOAD_FAST_0__LOAD_FAST_1: { + // _LOAD_FAST_0 { PyObject *value; - oparg = 1; - assert(oparg == CURRENT_OPARG()); + oparg = 0; value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4055,7 +4052,6 @@ { PyObject *value; oparg = 1; - assert(oparg == CURRENT_OPARG()); value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4065,12 +4061,11 @@ break; } - case _LOAD_FAST__LOAD_FAST_2: { - // _LOAD_FAST_2 + case __LOAD_FAST_0__LOAD_FAST_2: { + // _LOAD_FAST_0 { PyObject *value; - oparg = 2; - assert(oparg == CURRENT_OPARG()); + oparg = 0; value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4081,7 +4076,6 @@ { PyObject *value; oparg = 2; - assert(oparg == CURRENT_OPARG()); value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4091,12 +4085,11 @@ break; } - case _LOAD_FAST__LOAD_FAST_3: { - // _LOAD_FAST_3 + case __LOAD_FAST_0__LOAD_FAST_3: { + // _LOAD_FAST_0 { PyObject *value; - oparg = 3; - assert(oparg == CURRENT_OPARG()); + oparg = 0; value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4107,7 +4100,6 @@ { PyObject *value; oparg = 3; - assert(oparg == CURRENT_OPARG()); value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4117,12 +4109,11 @@ break; } - case _LOAD_FAST__LOAD_FAST_4: { - // _LOAD_FAST_4 + case __LOAD_FAST_0__LOAD_FAST_4: { + // _LOAD_FAST_0 { PyObject *value; - oparg = 4; - assert(oparg == CURRENT_OPARG()); + oparg = 0; value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4133,7 +4124,6 @@ { PyObject *value; oparg = 4; - assert(oparg == CURRENT_OPARG()); value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4143,12 +4133,11 @@ break; } - case _LOAD_FAST__LOAD_FAST_5: { - // _LOAD_FAST_5 + case __LOAD_FAST_0__LOAD_FAST_5: { + // _LOAD_FAST_0 { PyObject *value; - oparg = 5; - assert(oparg == CURRENT_OPARG()); + oparg = 0; value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4159,7 +4148,6 @@ { PyObject *value; oparg = 5; - assert(oparg == CURRENT_OPARG()); value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4169,12 +4157,11 @@ break; } - case _LOAD_FAST__LOAD_FAST_6: { - // _LOAD_FAST_6 + case __LOAD_FAST_0__LOAD_FAST_6: { + // _LOAD_FAST_0 { PyObject *value; - oparg = 6; - assert(oparg == CURRENT_OPARG()); + oparg = 0; value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4185,7 +4172,6 @@ { PyObject *value; oparg = 6; - assert(oparg == CURRENT_OPARG()); value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4195,12 +4181,203 @@ break; } - case _LOAD_FAST__LOAD_FAST_7: { + case __LOAD_FAST_0__LOAD_FAST_7: { + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } // _LOAD_FAST_7 { PyObject *value; oparg = 7; - assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_1__LOAD_FAST_0: { + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_1__LOAD_FAST_1: { + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_1__LOAD_FAST_2: { + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_1__LOAD_FAST_3: { + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_1__LOAD_FAST_4: { + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_1__LOAD_FAST_5: { + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_1__LOAD_FAST_6: { + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_1__LOAD_FAST_7: { + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -4211,7 +4388,2566 @@ { PyObject *value; oparg = 7; - assert(oparg == CURRENT_OPARG()); + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_2__LOAD_FAST_0: { + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_2__LOAD_FAST_1: { + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_2__LOAD_FAST_2: { + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_2__LOAD_FAST_3: { + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_2__LOAD_FAST_4: { + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_2__LOAD_FAST_5: { + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_2__LOAD_FAST_6: { + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_2__LOAD_FAST_7: { + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_3__LOAD_FAST_0: { + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_3__LOAD_FAST_1: { + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_3__LOAD_FAST_2: { + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_3__LOAD_FAST_3: { + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_3__LOAD_FAST_4: { + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_3__LOAD_FAST_5: { + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_3__LOAD_FAST_6: { + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_3__LOAD_FAST_7: { + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_4__LOAD_FAST_0: { + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_4__LOAD_FAST_1: { + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_4__LOAD_FAST_2: { + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_4__LOAD_FAST_3: { + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_4__LOAD_FAST_4: { + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_4__LOAD_FAST_5: { + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_4__LOAD_FAST_6: { + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_4__LOAD_FAST_7: { + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_5__LOAD_FAST_0: { + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_5__LOAD_FAST_1: { + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_5__LOAD_FAST_2: { + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_5__LOAD_FAST_3: { + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_5__LOAD_FAST_4: { + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_5__LOAD_FAST_5: { + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_5__LOAD_FAST_6: { + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_5__LOAD_FAST_7: { + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_6__LOAD_FAST_0: { + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_6__LOAD_FAST_1: { + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_6__LOAD_FAST_2: { + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_6__LOAD_FAST_3: { + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_6__LOAD_FAST_4: { + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_6__LOAD_FAST_5: { + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_6__LOAD_FAST_6: { + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_6__LOAD_FAST_7: { + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_7__LOAD_FAST_0: { + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_7__LOAD_FAST_1: { + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_7__LOAD_FAST_2: { + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_7__LOAD_FAST_3: { + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_7__LOAD_FAST_4: { + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_7__LOAD_FAST_5: { + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_7__LOAD_FAST_6: { + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __LOAD_FAST_7__LOAD_FAST_7: { + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_0__LOAD_FAST_0: { + // _STORE_FAST_0 + { + PyObject *value; + oparg = 0; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_0__LOAD_FAST_1: { + // _STORE_FAST_0 + { + PyObject *value; + oparg = 0; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_0__LOAD_FAST_2: { + // _STORE_FAST_0 + { + PyObject *value; + oparg = 0; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_0__LOAD_FAST_3: { + // _STORE_FAST_0 + { + PyObject *value; + oparg = 0; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_0__LOAD_FAST_4: { + // _STORE_FAST_0 + { + PyObject *value; + oparg = 0; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_0__LOAD_FAST_5: { + // _STORE_FAST_0 + { + PyObject *value; + oparg = 0; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_0__LOAD_FAST_6: { + // _STORE_FAST_0 + { + PyObject *value; + oparg = 0; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_0__LOAD_FAST_7: { + // _STORE_FAST_0 + { + PyObject *value; + oparg = 0; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_1__LOAD_FAST_0: { + // _STORE_FAST_1 + { + PyObject *value; + oparg = 1; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_1__LOAD_FAST_1: { + // _STORE_FAST_1 + { + PyObject *value; + oparg = 1; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_1__LOAD_FAST_2: { + // _STORE_FAST_1 + { + PyObject *value; + oparg = 1; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_1__LOAD_FAST_3: { + // _STORE_FAST_1 + { + PyObject *value; + oparg = 1; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_1__LOAD_FAST_4: { + // _STORE_FAST_1 + { + PyObject *value; + oparg = 1; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_1__LOAD_FAST_5: { + // _STORE_FAST_1 + { + PyObject *value; + oparg = 1; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_1__LOAD_FAST_6: { + // _STORE_FAST_1 + { + PyObject *value; + oparg = 1; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_1__LOAD_FAST_7: { + // _STORE_FAST_1 + { + PyObject *value; + oparg = 1; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_2__LOAD_FAST_0: { + // _STORE_FAST_2 + { + PyObject *value; + oparg = 2; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_2__LOAD_FAST_1: { + // _STORE_FAST_2 + { + PyObject *value; + oparg = 2; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_2__LOAD_FAST_2: { + // _STORE_FAST_2 + { + PyObject *value; + oparg = 2; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_2__LOAD_FAST_3: { + // _STORE_FAST_2 + { + PyObject *value; + oparg = 2; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_2__LOAD_FAST_4: { + // _STORE_FAST_2 + { + PyObject *value; + oparg = 2; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_2__LOAD_FAST_5: { + // _STORE_FAST_2 + { + PyObject *value; + oparg = 2; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_2__LOAD_FAST_6: { + // _STORE_FAST_2 + { + PyObject *value; + oparg = 2; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_2__LOAD_FAST_7: { + // _STORE_FAST_2 + { + PyObject *value; + oparg = 2; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_3__LOAD_FAST_0: { + // _STORE_FAST_3 + { + PyObject *value; + oparg = 3; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_3__LOAD_FAST_1: { + // _STORE_FAST_3 + { + PyObject *value; + oparg = 3; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_3__LOAD_FAST_2: { + // _STORE_FAST_3 + { + PyObject *value; + oparg = 3; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_3__LOAD_FAST_3: { + // _STORE_FAST_3 + { + PyObject *value; + oparg = 3; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_3__LOAD_FAST_4: { + // _STORE_FAST_3 + { + PyObject *value; + oparg = 3; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_3__LOAD_FAST_5: { + // _STORE_FAST_3 + { + PyObject *value; + oparg = 3; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_3__LOAD_FAST_6: { + // _STORE_FAST_3 + { + PyObject *value; + oparg = 3; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_3__LOAD_FAST_7: { + // _STORE_FAST_3 + { + PyObject *value; + oparg = 3; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_4__LOAD_FAST_0: { + // _STORE_FAST_4 + { + PyObject *value; + oparg = 4; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_4__LOAD_FAST_1: { + // _STORE_FAST_4 + { + PyObject *value; + oparg = 4; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_4__LOAD_FAST_2: { + // _STORE_FAST_4 + { + PyObject *value; + oparg = 4; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_4__LOAD_FAST_3: { + // _STORE_FAST_4 + { + PyObject *value; + oparg = 4; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_4__LOAD_FAST_4: { + // _STORE_FAST_4 + { + PyObject *value; + oparg = 4; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_4__LOAD_FAST_5: { + // _STORE_FAST_4 + { + PyObject *value; + oparg = 4; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_4__LOAD_FAST_6: { + // _STORE_FAST_4 + { + PyObject *value; + oparg = 4; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_4__LOAD_FAST_7: { + // _STORE_FAST_4 + { + PyObject *value; + oparg = 4; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_5__LOAD_FAST_0: { + // _STORE_FAST_5 + { + PyObject *value; + oparg = 5; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_5__LOAD_FAST_1: { + // _STORE_FAST_5 + { + PyObject *value; + oparg = 5; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_5__LOAD_FAST_2: { + // _STORE_FAST_5 + { + PyObject *value; + oparg = 5; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_5__LOAD_FAST_3: { + // _STORE_FAST_5 + { + PyObject *value; + oparg = 5; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_5__LOAD_FAST_4: { + // _STORE_FAST_5 + { + PyObject *value; + oparg = 5; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_5__LOAD_FAST_5: { + // _STORE_FAST_5 + { + PyObject *value; + oparg = 5; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_5__LOAD_FAST_6: { + // _STORE_FAST_5 + { + PyObject *value; + oparg = 5; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_5__LOAD_FAST_7: { + // _STORE_FAST_5 + { + PyObject *value; + oparg = 5; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_6__LOAD_FAST_0: { + // _STORE_FAST_6 + { + PyObject *value; + oparg = 6; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_6__LOAD_FAST_1: { + // _STORE_FAST_6 + { + PyObject *value; + oparg = 6; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_6__LOAD_FAST_2: { + // _STORE_FAST_6 + { + PyObject *value; + oparg = 6; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_6__LOAD_FAST_3: { + // _STORE_FAST_6 + { + PyObject *value; + oparg = 6; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_6__LOAD_FAST_4: { + // _STORE_FAST_6 + { + PyObject *value; + oparg = 6; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_6__LOAD_FAST_5: { + // _STORE_FAST_6 + { + PyObject *value; + oparg = 6; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_6__LOAD_FAST_6: { + // _STORE_FAST_6 + { + PyObject *value; + oparg = 6; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_6__LOAD_FAST_7: { + // _STORE_FAST_6 + { + PyObject *value; + oparg = 6; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_7__LOAD_FAST_0: { + // _STORE_FAST_7 + { + PyObject *value; + oparg = 7; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_0 + { + PyObject *value; + oparg = 0; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_7__LOAD_FAST_1: { + // _STORE_FAST_7 + { + PyObject *value; + oparg = 7; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_1 + { + PyObject *value; + oparg = 1; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_7__LOAD_FAST_2: { + // _STORE_FAST_7 + { + PyObject *value; + oparg = 7; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_2 + { + PyObject *value; + oparg = 2; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_7__LOAD_FAST_3: { + // _STORE_FAST_7 + { + PyObject *value; + oparg = 7; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_3 + { + PyObject *value; + oparg = 3; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_7__LOAD_FAST_4: { + // _STORE_FAST_7 + { + PyObject *value; + oparg = 7; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_4 + { + PyObject *value; + oparg = 4; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_7__LOAD_FAST_5: { + // _STORE_FAST_7 + { + PyObject *value; + oparg = 7; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_5 + { + PyObject *value; + oparg = 5; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_7__LOAD_FAST_6: { + // _STORE_FAST_7 + { + PyObject *value; + oparg = 7; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_6 + { + PyObject *value; + oparg = 6; + value = GETLOCAL(oparg); + assert(value != NULL); + Py_INCREF(value); + stack_pointer[0] = value; + stack_pointer += 1; + } + break; + } + + case __STORE_FAST_7__LOAD_FAST_7: { + // _STORE_FAST_7 + { + PyObject *value; + oparg = 7; + value = stack_pointer[-1]; + SETLOCAL(oparg, value); + stack_pointer += -1; + } + // _LOAD_FAST_7 + { + PyObject *value; + oparg = 7; value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); diff --git a/Python/uop_super_matcher_cases.c.h b/Python/uop_super_matcher_cases.c.h index 4563496d8ad642..5a48f0adca9524 100644 --- a/Python/uop_super_matcher_cases.c.h +++ b/Python/uop_super_matcher_cases.c.h @@ -10,8 +10,57 @@ case _LOAD_FAST_0: { if ((this_instr[1].opcode == _LOAD_FAST_0)) { - DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_0\n"); - REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + DPRINTF(2, "Inserting super __LOAD_FAST_0__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __LOAD_FAST_0__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __LOAD_FAST_0__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __LOAD_FAST_0__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __LOAD_FAST_0__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __LOAD_FAST_0__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __LOAD_FAST_0__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __LOAD_FAST_0__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __LOAD_FAST_0__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __LOAD_FAST_0__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __LOAD_FAST_0__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __LOAD_FAST_0__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __LOAD_FAST_0__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __LOAD_FAST_0__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __LOAD_FAST_0__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __LOAD_FAST_0__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } this_instr += 2; break; @@ -21,9 +70,58 @@ } case _LOAD_FAST_1: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __LOAD_FAST_1__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __LOAD_FAST_1__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } if ((this_instr[1].opcode == _LOAD_FAST_1)) { - DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_1\n"); - REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + DPRINTF(2, "Inserting super __LOAD_FAST_1__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __LOAD_FAST_1__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __LOAD_FAST_1__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __LOAD_FAST_1__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __LOAD_FAST_1__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __LOAD_FAST_1__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __LOAD_FAST_1__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __LOAD_FAST_1__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __LOAD_FAST_1__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __LOAD_FAST_1__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __LOAD_FAST_1__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __LOAD_FAST_1__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __LOAD_FAST_1__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __LOAD_FAST_1__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } this_instr += 2; break; @@ -33,9 +131,58 @@ } case _LOAD_FAST_2: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __LOAD_FAST_2__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __LOAD_FAST_2__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __LOAD_FAST_2__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __LOAD_FAST_2__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } if ((this_instr[1].opcode == _LOAD_FAST_2)) { - DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_2\n"); - REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + DPRINTF(2, "Inserting super __LOAD_FAST_2__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __LOAD_FAST_2__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __LOAD_FAST_2__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __LOAD_FAST_2__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __LOAD_FAST_2__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __LOAD_FAST_2__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __LOAD_FAST_2__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __LOAD_FAST_2__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __LOAD_FAST_2__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __LOAD_FAST_2__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __LOAD_FAST_2__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __LOAD_FAST_2__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } this_instr += 2; break; @@ -45,9 +192,58 @@ } case _LOAD_FAST_3: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __LOAD_FAST_3__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __LOAD_FAST_3__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __LOAD_FAST_3__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __LOAD_FAST_3__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __LOAD_FAST_3__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __LOAD_FAST_3__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } if ((this_instr[1].opcode == _LOAD_FAST_3)) { - DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_3\n"); - REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + DPRINTF(2, "Inserting super __LOAD_FAST_3__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __LOAD_FAST_3__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __LOAD_FAST_3__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __LOAD_FAST_3__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __LOAD_FAST_3__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __LOAD_FAST_3__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __LOAD_FAST_3__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __LOAD_FAST_3__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __LOAD_FAST_3__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __LOAD_FAST_3__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } this_instr += 2; break; @@ -57,9 +253,58 @@ } case _LOAD_FAST_4: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __LOAD_FAST_4__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __LOAD_FAST_4__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __LOAD_FAST_4__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __LOAD_FAST_4__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __LOAD_FAST_4__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __LOAD_FAST_4__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __LOAD_FAST_4__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __LOAD_FAST_4__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } if ((this_instr[1].opcode == _LOAD_FAST_4)) { - DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_4\n"); - REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + DPRINTF(2, "Inserting super __LOAD_FAST_4__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __LOAD_FAST_4__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __LOAD_FAST_4__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __LOAD_FAST_4__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __LOAD_FAST_4__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __LOAD_FAST_4__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __LOAD_FAST_4__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __LOAD_FAST_4__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } this_instr += 2; break; @@ -69,9 +314,58 @@ } case _LOAD_FAST_5: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __LOAD_FAST_5__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __LOAD_FAST_5__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __LOAD_FAST_5__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __LOAD_FAST_5__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __LOAD_FAST_5__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __LOAD_FAST_5__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __LOAD_FAST_5__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __LOAD_FAST_5__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __LOAD_FAST_5__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __LOAD_FAST_5__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } if ((this_instr[1].opcode == _LOAD_FAST_5)) { - DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_5\n"); - REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + DPRINTF(2, "Inserting super __LOAD_FAST_5__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __LOAD_FAST_5__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __LOAD_FAST_5__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __LOAD_FAST_5__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __LOAD_FAST_5__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __LOAD_FAST_5__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } this_instr += 2; break; @@ -81,9 +375,58 @@ } case _LOAD_FAST_6: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __LOAD_FAST_6__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __LOAD_FAST_6__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __LOAD_FAST_6__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __LOAD_FAST_6__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __LOAD_FAST_6__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __LOAD_FAST_6__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __LOAD_FAST_6__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __LOAD_FAST_6__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __LOAD_FAST_6__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __LOAD_FAST_6__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __LOAD_FAST_6__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __LOAD_FAST_6__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } if ((this_instr[1].opcode == _LOAD_FAST_6)) { - DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_6\n"); - REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + DPRINTF(2, "Inserting super __LOAD_FAST_6__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __LOAD_FAST_6__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __LOAD_FAST_6__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __LOAD_FAST_6__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } this_instr += 2; break; @@ -93,9 +436,546 @@ } case _LOAD_FAST_7: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __LOAD_FAST_7__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __LOAD_FAST_7__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __LOAD_FAST_7__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __LOAD_FAST_7__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __LOAD_FAST_7__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __LOAD_FAST_7__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __LOAD_FAST_7__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __LOAD_FAST_7__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __LOAD_FAST_7__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __LOAD_FAST_7__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __LOAD_FAST_7__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __LOAD_FAST_7__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __LOAD_FAST_7__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __LOAD_FAST_7__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __LOAD_FAST_7__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __LOAD_FAST_7__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _STORE_FAST_0: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __STORE_FAST_0__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __STORE_FAST_0__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __STORE_FAST_0__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __STORE_FAST_0__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __STORE_FAST_0__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __STORE_FAST_0__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __STORE_FAST_0__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __STORE_FAST_0__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __STORE_FAST_0__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __STORE_FAST_0__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __STORE_FAST_0__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __STORE_FAST_0__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __STORE_FAST_0__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __STORE_FAST_0__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __STORE_FAST_0__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __STORE_FAST_0__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _STORE_FAST_1: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __STORE_FAST_1__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __STORE_FAST_1__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __STORE_FAST_1__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __STORE_FAST_1__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __STORE_FAST_1__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __STORE_FAST_1__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __STORE_FAST_1__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __STORE_FAST_1__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __STORE_FAST_1__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __STORE_FAST_1__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __STORE_FAST_1__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __STORE_FAST_1__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __STORE_FAST_1__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __STORE_FAST_1__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __STORE_FAST_1__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __STORE_FAST_1__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _STORE_FAST_2: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __STORE_FAST_2__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __STORE_FAST_2__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __STORE_FAST_2__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __STORE_FAST_2__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __STORE_FAST_2__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __STORE_FAST_2__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __STORE_FAST_2__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __STORE_FAST_2__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __STORE_FAST_2__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __STORE_FAST_2__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __STORE_FAST_2__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __STORE_FAST_2__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __STORE_FAST_2__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __STORE_FAST_2__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __STORE_FAST_2__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __STORE_FAST_2__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _STORE_FAST_3: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __STORE_FAST_3__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __STORE_FAST_3__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __STORE_FAST_3__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __STORE_FAST_3__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __STORE_FAST_3__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __STORE_FAST_3__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __STORE_FAST_3__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __STORE_FAST_3__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __STORE_FAST_3__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __STORE_FAST_3__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __STORE_FAST_3__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __STORE_FAST_3__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __STORE_FAST_3__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __STORE_FAST_3__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __STORE_FAST_3__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __STORE_FAST_3__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _STORE_FAST_4: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __STORE_FAST_4__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __STORE_FAST_4__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __STORE_FAST_4__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __STORE_FAST_4__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __STORE_FAST_4__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __STORE_FAST_4__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __STORE_FAST_4__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __STORE_FAST_4__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __STORE_FAST_4__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __STORE_FAST_4__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __STORE_FAST_4__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __STORE_FAST_4__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __STORE_FAST_4__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __STORE_FAST_4__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __STORE_FAST_4__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __STORE_FAST_4__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _STORE_FAST_5: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __STORE_FAST_5__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __STORE_FAST_5__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __STORE_FAST_5__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __STORE_FAST_5__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __STORE_FAST_5__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __STORE_FAST_5__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __STORE_FAST_5__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __STORE_FAST_5__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __STORE_FAST_5__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __STORE_FAST_5__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __STORE_FAST_5__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __STORE_FAST_5__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __STORE_FAST_5__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __STORE_FAST_5__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __STORE_FAST_5__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __STORE_FAST_5__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _STORE_FAST_6: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __STORE_FAST_6__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __STORE_FAST_6__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __STORE_FAST_6__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __STORE_FAST_6__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __STORE_FAST_6__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __STORE_FAST_6__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __STORE_FAST_6__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __STORE_FAST_6__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __STORE_FAST_6__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __STORE_FAST_6__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __STORE_FAST_6__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __STORE_FAST_6__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __STORE_FAST_6__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __STORE_FAST_6__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_7)) { + DPRINTF(2, "Inserting super __STORE_FAST_6__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __STORE_FAST_6__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + this_instr += 1; + break; + } + + case _STORE_FAST_7: { + if ((this_instr[1].opcode == _LOAD_FAST_0)) { + DPRINTF(2, "Inserting super __STORE_FAST_7__LOAD_FAST_0\n"); + REPLACE_OP(this_instr, __STORE_FAST_7__LOAD_FAST_0, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_1)) { + DPRINTF(2, "Inserting super __STORE_FAST_7__LOAD_FAST_1\n"); + REPLACE_OP(this_instr, __STORE_FAST_7__LOAD_FAST_1, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_2)) { + DPRINTF(2, "Inserting super __STORE_FAST_7__LOAD_FAST_2\n"); + REPLACE_OP(this_instr, __STORE_FAST_7__LOAD_FAST_2, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_3)) { + DPRINTF(2, "Inserting super __STORE_FAST_7__LOAD_FAST_3\n"); + REPLACE_OP(this_instr, __STORE_FAST_7__LOAD_FAST_3, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_4)) { + DPRINTF(2, "Inserting super __STORE_FAST_7__LOAD_FAST_4\n"); + REPLACE_OP(this_instr, __STORE_FAST_7__LOAD_FAST_4, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_5)) { + DPRINTF(2, "Inserting super __STORE_FAST_7__LOAD_FAST_5\n"); + REPLACE_OP(this_instr, __STORE_FAST_7__LOAD_FAST_5, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } + if ((this_instr[1].opcode == _LOAD_FAST_6)) { + DPRINTF(2, "Inserting super __STORE_FAST_7__LOAD_FAST_6\n"); + REPLACE_OP(this_instr, __STORE_FAST_7__LOAD_FAST_6, this_instr[0].oparg, this_instr[0].operand); + for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } + this_instr += 2; + break; + } if ((this_instr[1].opcode == _LOAD_FAST_7)) { - DPRINTF(2, "Inserting super _LOAD_FAST__LOAD_FAST_7\n"); - REPLACE_OP(this_instr, _LOAD_FAST__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); + DPRINTF(2, "Inserting super __STORE_FAST_7__LOAD_FAST_7\n"); + REPLACE_OP(this_instr, __STORE_FAST_7__LOAD_FAST_7, this_instr[0].oparg, this_instr[0].operand); for (int i = 1; i < 2; i++) { REPLACE_OP((&this_instr[i]), _NOP, 0, 0); } this_instr += 2; break; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 47b83b633e75cb..b1dc99f368cdb6 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -2,6 +2,7 @@ import lexer import parser import re +import itertools from typing import Optional @@ -24,6 +25,7 @@ class Properties: side_exit: bool pure: bool passthrough: bool + replicate_only: bool tier: int | None = None oparg_and_1: bool = False const_oparg: int = -1 @@ -53,6 +55,7 @@ def from_list(properties: list["Properties"]) -> "Properties": side_exit=any(p.side_exit for p in properties), pure=all(p.pure for p in properties), passthrough=all(p.passthrough for p in properties), + replicate_only=all(p.replicate_only for p in properties), ) @@ -74,6 +77,7 @@ def from_list(properties: list["Properties"]) -> "Properties": side_exit=False, pure=False, passthrough=False, + replicate_only=False, ) @@ -182,6 +186,13 @@ def is_super(self) -> bool: Part = Uop | Skip +@dataclass +class SuperUop: + name: str + parts: list[Uop] + is_replicates_only: bool = False + + @dataclass class Instruction: name: str @@ -247,7 +258,7 @@ def dump(self, indent: str) -> None: class Analysis: instructions: dict[str, Instruction] uops: dict[str, Uop] - super_uops: dict[str, Instruction] + super_uops: dict[str, SuperUop] families: dict[str, Family] pseudos: dict[str, PseudoInstruction] opmap: dict[str, int] @@ -514,6 +525,7 @@ def compute_properties(op: parser.InstDef) -> Properties: pure="pure" in op.annotations, passthrough=passthrough, tier=tier_variable(op), + replicate_only="replicate_only" in op.annotations, ) @@ -588,6 +600,13 @@ def add_instruction( instructions[name] = Instruction(name, parts, None) +def add_superuop( + name: str, parts: list[Uop], instructions: dict[str, SuperUop], + is_replicate_only: bool = False +) -> None: + instructions[name] = SuperUop(name, parts, is_replicate_only) + + def desugar_inst( inst: parser.InstDef, instructions: dict[str, Instruction], uops: dict[str, Uop] ) -> None: @@ -635,7 +654,7 @@ def add_macro( def add_super( - super: parser.Super, super_uops: dict[str, Instruction], uops: dict[str, Uop] + super: parser.Super, super_uops: dict[str, SuperUop], uops: dict[str, Uop] ) -> None: parts: list[Uop] = [] for part in super.uops: @@ -660,14 +679,15 @@ def add_super( if len(operand_uses) > 1: analysis_error(f"Uop super {super.name}'s cache entry cannot fit in one operand.", super.tokens[0]) - if not "replicate_only" in super.annotations: - add_instruction(super.name, parts, super_uops) + replicate_only = "replicate_only" in super.annotations + if not replicate_only: + add_superuop(super.name, parts, super_uops) replicates_count = [p.replicated for p in parts if p.replicated != 0] if len(set(replicates_count)) > 1: analysis_error(f"Uop super {super.name}'s replicates are not all the same count: {replicates_count}", super.tokens[0]) - if replicates_count: + if replicates_count and not replicate_only: replicate_count = replicates_count[0] for i in range(replicate_count): res = [] @@ -677,7 +697,23 @@ def add_super( res.append(uops[part.name + f"_{i}"]) else: res.append(part) - add_instruction(super.name + f"_{i}", res, super_uops) + add_superuop(super.name + f"_{i}", res, super_uops) + elif replicate_only: + # For instructions consisting only of replicates, we can just make a cross + # product of all their replicates. Expanding the uops we cover. + # Since their oparg doesn't matter. + + replicate_count = replicates_count[0] + replicate_parts = [] + + for part in parts: + assert part.replicated > 0 + res = [] + for i in range(replicate_count): + res.append(uops[part.name + f"_{i}"]) + replicate_parts.append(res) + for permutation in itertools.product(*replicate_parts): + add_superuop('_' + '_'.join([op.name for op in permutation]), permutation, super_uops, is_replicate_only=True) @@ -802,7 +838,7 @@ def add_instruction(name: str) -> None: def analyze_forest(forest: list[parser.AstNode]) -> Analysis: instructions: dict[str, Instruction] = {} uops: dict[str, Uop] = {} - super_uops: dict[str, Instruction] = {} + super_uops: dict[str, SuperUop] = {} families: dict[str, Family] = {} pseudos: dict[str, PseudoInstruction] = {} for node in forest: diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index c1fac9a8affbf8..cc63c2731d574e 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -146,7 +146,7 @@ def tier2_replace_oparg( TIER2_REPLACEMENT_FUNCTIONS["EXIT_IF"] = tier2_replace_exit_if -def write_uop(uop: Uop, out: CWriter, stack: Stack) -> None: +def write_uop(uop: Uop, out: CWriter, stack: Stack, is_replicates_only: bool = False) -> None: try: out.start_line() if uop.properties.oparg: @@ -154,7 +154,8 @@ def write_uop(uop: Uop, out: CWriter, stack: Stack) -> None: assert uop.properties.const_oparg < 0 elif uop.properties.const_oparg >= 0: out.emit(f"oparg = {uop.properties.const_oparg};\n") - out.emit(f"assert(oparg == CURRENT_OPARG());\n") + if not is_replicates_only: + out.emit(f"assert(oparg == CURRENT_OPARG());\n") for var in reversed(uop.stack.inputs): out.emit(stack.pop(var)) if not uop.properties.stores_sp: @@ -225,7 +226,7 @@ def generate_tier2( out.emit(f"// {part.name}\n") out.emit("{\n") declare_variables(part, out) - write_uop(part, out, stack) + write_uop(part, out, stack, super_uop.is_replicates_only) stack.flush(out) out.emit("}\n") out.start_line() diff --git a/Tools/jit/_writer.py b/Tools/jit/_writer.py index 8a2a42e75cfb9b..fddbe4b661e3c5 100644 --- a/Tools/jit/_writer.py +++ b/Tools/jit/_writer.py @@ -51,7 +51,7 @@ def _dump_footer(opnames: typing.Iterable[str]) -> typing.Iterator[str]: yield " .data = INIT_STENCIL(OP##_data), \\" yield "}" yield "" - yield "static const StencilGroup stencil_groups[512] = {" + yield "static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = {" for opname in opnames: yield f" [{opname}] = INIT_STENCIL_GROUP({opname})," yield "};" From cd0ad106ce268da159516ef3dece714514a3e184 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Fri, 1 Mar 2024 23:02:04 +0800 Subject: [PATCH 14/14] comment out --- Lib/test/test_generated_cases.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 12c38791e88415..ce46a26983ac48 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1027,12 +1027,12 @@ def run_cases_test(self, input: str, expected: str): lines.pop(-1) actual = "".join(lines) - if actual.strip() != expected.strip(): - print("Actual:") - print(actual) - print("Expected:") - print(expected) - print("End") + # if actual.strip() != expected.strip(): + # print("Actual:") + # print(actual) + # print("Expected:") + # print(expected) + # print("End") self.assertEqual(actual.strip(), expected.strip()) def test_super_basic(self):