From 3d0d2b3eb44dceb008bd9174ae53cc67a0ab6eb2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 16 Aug 2021 20:28:24 +0100 Subject: [PATCH 01/12] Specialize BINARY_ADD. --- Include/internal/pycore_code.h | 1 + Include/opcode.h | 49 +++++++------ Lib/opcode.py | 5 ++ Python/ceval.c | 130 +++++++++++++++++++++++++++++---- Python/opcode_targets.h | 42 +++++------ Python/specialize.c | 70 ++++++++++++++++++ 6 files changed, 239 insertions(+), 58 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 282089c08379e0..aadad3a09ed1d3 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -300,6 +300,7 @@ int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); +int _Py_Specialize_BinaryAdd(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); #define PRINT_SPECIALIZATION_STATS 0 #define PRINT_SPECIALIZATION_STATS_DETAILED 0 diff --git a/Include/opcode.h b/Include/opcode.h index 3334242e7e4d40..1280424e92b1c4 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -136,28 +136,33 @@ extern "C" { #define DICT_MERGE 164 #define DICT_UPDATE 165 #define CALL_METHOD_KW 166 -#define BINARY_SUBSCR_ADAPTIVE 7 -#define BINARY_SUBSCR_LIST_INT 8 -#define BINARY_SUBSCR_TUPLE_INT 13 -#define BINARY_SUBSCR_DICT 14 -#define JUMP_ABSOLUTE_QUICK 18 -#define LOAD_ATTR_ADAPTIVE 21 -#define LOAD_ATTR_SPLIT_KEYS 36 -#define LOAD_ATTR_WITH_HINT 38 -#define LOAD_ATTR_SLOT 39 -#define LOAD_ATTR_MODULE 40 -#define LOAD_GLOBAL_ADAPTIVE 41 -#define LOAD_GLOBAL_MODULE 42 -#define LOAD_GLOBAL_BUILTIN 43 -#define STORE_ATTR_ADAPTIVE 44 -#define STORE_ATTR_SPLIT_KEYS 45 -#define STORE_ATTR_SLOT 46 -#define STORE_ATTR_WITH_HINT 47 -#define LOAD_FAST__LOAD_FAST 48 -#define STORE_FAST__LOAD_FAST 58 -#define LOAD_FAST__LOAD_CONST 80 -#define LOAD_CONST__LOAD_FAST 81 -#define STORE_FAST__STORE_FAST 87 +#define BINARY_ADD_ADAPTIVE 7 +#define BINARY_ADD_INT 8 +#define BINARY_ADD_FLOAT 13 +#define BINARY_ADD_UNICODE_INPLACE_FAST 14 +#define BINARY_ADD_UNICODE_INPLACE_DEREF 18 +#define BINARY_SUBSCR_ADAPTIVE 21 +#define BINARY_SUBSCR_LIST_INT 36 +#define BINARY_SUBSCR_TUPLE_INT 38 +#define BINARY_SUBSCR_DICT 39 +#define JUMP_ABSOLUTE_QUICK 40 +#define LOAD_ATTR_ADAPTIVE 41 +#define LOAD_ATTR_SPLIT_KEYS 42 +#define LOAD_ATTR_WITH_HINT 43 +#define LOAD_ATTR_SLOT 44 +#define LOAD_ATTR_MODULE 45 +#define LOAD_GLOBAL_ADAPTIVE 46 +#define LOAD_GLOBAL_MODULE 47 +#define LOAD_GLOBAL_BUILTIN 48 +#define STORE_ATTR_ADAPTIVE 58 +#define STORE_ATTR_SPLIT_KEYS 80 +#define STORE_ATTR_SLOT 81 +#define STORE_ATTR_WITH_HINT 87 +#define LOAD_FAST__LOAD_FAST 88 +#define STORE_FAST__LOAD_FAST 120 +#define LOAD_FAST__LOAD_CONST 122 +#define LOAD_CONST__LOAD_FAST 123 +#define STORE_FAST__STORE_FAST 127 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index 53cdc4aa0d549d..4513f6c9a730fb 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -220,6 +220,11 @@ def jabs_op(name, op): del def_op, name_op, jrel_op, jabs_op _specialized_instructions = [ + "BINARY_ADD_ADAPTIVE", + "BINARY_ADD_INT", + "BINARY_ADD_FLOAT", + "BINARY_ADD_UNICODE_INPLACE_FAST", + "BINARY_ADD_UNICODE_INPLACE_DEREF", "BINARY_SUBSCR_ADAPTIVE", "BINARY_SUBSCR_LIST_INT", "BINARY_SUBSCR_TUPLE_INT", diff --git a/Python/ceval.c b/Python/ceval.c index 48787493fdd2ca..0502b0b9aa8b30 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1959,28 +1959,127 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } TARGET(BINARY_ADD): { + PREDICTED(BINARY_ADD); + STAT_INC(BINARY_ADD, unquickened); PyObject *right = POP(); PyObject *left = TOP(); - PyObject *sum; - /* NOTE(vstinner): Please don't try to micro-optimize int+int on - CPython using bytecode, it is simply worthless. - See http://bugs.python.org/issue21955 and - http://bugs.python.org/issue10044 for the discussion. In short, - no patch shown any impact on a realistic benchmark, only a minor - speedup on microbenchmarks. */ - if (PyUnicode_CheckExact(left) && - PyUnicode_CheckExact(right)) { - sum = unicode_concatenate(tstate, left, right, frame, next_instr); - /* unicode_concatenate consumed the ref to left */ + PyObject *sum = PyNumber_Add(left, right); + SET_TOP(sum); + Py_DECREF(left); + Py_DECREF(right); + if (sum == NULL) { + goto error; + } + DISPATCH(); + } + + TARGET(BINARY_ADD_ADAPTIVE): { + if (oparg == 0) { + PyObject *left = SECOND(); + PyObject *right = TOP(); + next_instr--; + if (_Py_Specialize_BinaryAdd(left, right, next_instr) < 0) { + goto error; + } + DISPATCH(); } else { - sum = PyNumber_Add(left, right); - Py_DECREF(left); + STAT_INC(BINARY_ADD, deferred); + // oparg is the adaptive (cache counter*2 | inplace_bit) + UPDATE_PREV_INSTR_OPARG(next_instr, oparg - 1); + STAT_DEC(BINARY_ADD, unquickened); + JUMP_TO_INSTRUCTION(BINARY_ADD); + } + } + + TARGET(BINARY_ADD_UNICODE_INPLACE_FAST): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD); + DEOPT_IF(!PyUnicode_CheckExact(right), BINARY_ADD); + DEOPT_IF(Py_REFCNT(left) != 2, BINARY_ADD); + int next_oparg = _Py_OPARG(*next_instr); + assert(_Py_OPCODE(*next_instr) == STORE_FAST); + /* In the common case, there are 2 references to the value + * stored in 'variable' when the += is performed: one on the + * value stack (in 'v') and one still stored in the + * 'variable'. We try to delete the variable now to reduce + * the refcnt to 1. + */ + PyObject *var = GETLOCAL(next_oparg); + DEOPT_IF(var != left, BINARY_ADD); + GETLOCAL(next_oparg) = NULL; + Py_DECREF(left); + STACK_SHRINK(1); + PyUnicode_Append(&TOP(), right); + Py_DECREF(right); + if (TOP() == NULL) { + goto error; + } + DISPATCH(); + } + + TARGET(BINARY_ADD_UNICODE_INPLACE_DEREF): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD); + DEOPT_IF(!PyUnicode_CheckExact(right), BINARY_ADD); + DEOPT_IF(Py_REFCNT(left) != 2, BINARY_ADD); + int next_oparg = _Py_OPARG(*next_instr); + assert(_Py_OPCODE(*next_instr) == STORE_DEREF); + /* In the common case, there are 2 references to the value + * stored in 'variable' when the += is performed: one on the + * value stack (in 'v') and one still stored in the + * 'variable'. We try to delete the variable now to reduce + * the refcnt to 1. + */ + PyObject *cell = GETLOCAL(next_oparg); + DEOPT_IF(PyCell_GET(cell) != left, BINARY_ADD); + STAT_INC(BINARY_ADD, hit); + PyCell_SET(cell, NULL); + Py_DECREF(left); + STACK_SHRINK(1); + PyUnicode_Append(&TOP(), right); + Py_DECREF(right); + if (TOP() == NULL) { + goto error; } + DISPATCH(); + } + + TARGET(BINARY_ADD_FLOAT): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyFloat_CheckExact(left), BINARY_ADD); + DEOPT_IF(!PyFloat_CheckExact(right), BINARY_ADD); + STAT_INC(BINARY_ADD, hit); + double dsum = ((PyFloatObject *)left)->ob_fval + + ((PyFloatObject *)right)->ob_fval; + PyObject *sum = PyFloat_FromDouble(dsum); + SET_SECOND(sum); Py_DECREF(right); - SET_TOP(sum); - if (sum == NULL) + Py_DECREF(left); + STACK_SHRINK(1); + if (sum == NULL) { + goto error; + } + DISPATCH(); + } + + TARGET(BINARY_ADD_INT): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyLong_CheckExact(left), BINARY_ADD); + DEOPT_IF(!PyLong_CheckExact(right), BINARY_ADD); + STAT_INC(BINARY_ADD, hit); + PyObject *sum = PyLong_Type.tp_as_number->nb_add(left, right); + SET_SECOND(sum); + Py_DECREF(right); + Py_DECREF(left); + STACK_SHRINK(1); + if (sum == NULL) { goto error; + } DISPATCH(); } @@ -4649,6 +4748,7 @@ MISS_WITH_CACHE(LOAD_ATTR) MISS_WITH_CACHE(STORE_ATTR) MISS_WITH_CACHE(LOAD_GLOBAL) MISS_WITH_OPARG_COUNTER(BINARY_SUBSCR) +MISS_WITH_OPARG_COUNTER(BINARY_ADD) binary_subscr_dict_error: { diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index c8036a63f22011..7a18c972d0a190 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -6,21 +6,21 @@ static void *opcode_targets[256] = { &&TARGET_DUP_TOP, &&TARGET_DUP_TOP_TWO, &&TARGET_ROT_FOUR, - &&TARGET_BINARY_SUBSCR_ADAPTIVE, - &&TARGET_BINARY_SUBSCR_LIST_INT, + &&TARGET_BINARY_ADD_ADAPTIVE, + &&TARGET_BINARY_ADD_INT, &&TARGET_NOP, &&TARGET_UNARY_POSITIVE, &&TARGET_UNARY_NEGATIVE, &&TARGET_UNARY_NOT, - &&TARGET_BINARY_SUBSCR_TUPLE_INT, - &&TARGET_BINARY_SUBSCR_DICT, + &&TARGET_BINARY_ADD_FLOAT, + &&TARGET_BINARY_ADD_UNICODE_INPLACE_FAST, &&TARGET_UNARY_INVERT, &&TARGET_BINARY_MATRIX_MULTIPLY, &&TARGET_INPLACE_MATRIX_MULTIPLY, - &&TARGET_JUMP_ABSOLUTE_QUICK, + &&TARGET_BINARY_ADD_UNICODE_INPLACE_DEREF, &&TARGET_BINARY_POWER, &&TARGET_BINARY_MULTIPLY, - &&TARGET_LOAD_ATTR_ADAPTIVE, + &&TARGET_BINARY_SUBSCR_ADAPTIVE, &&TARGET_BINARY_MODULO, &&TARGET_BINARY_ADD, &&TARGET_BINARY_SUBTRACT, @@ -35,19 +35,19 @@ static void *opcode_targets[256] = { &&TARGET_MATCH_KEYS, &&TARGET_COPY_DICT_WITHOUT_KEYS, &&TARGET_PUSH_EXC_INFO, - &&TARGET_LOAD_ATTR_SPLIT_KEYS, + &&TARGET_BINARY_SUBSCR_LIST_INT, &&TARGET_POP_EXCEPT_AND_RERAISE, + &&TARGET_BINARY_SUBSCR_TUPLE_INT, + &&TARGET_BINARY_SUBSCR_DICT, + &&TARGET_JUMP_ABSOLUTE_QUICK, + &&TARGET_LOAD_ATTR_ADAPTIVE, + &&TARGET_LOAD_ATTR_SPLIT_KEYS, &&TARGET_LOAD_ATTR_WITH_HINT, &&TARGET_LOAD_ATTR_SLOT, &&TARGET_LOAD_ATTR_MODULE, &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, - &&TARGET_STORE_ATTR_ADAPTIVE, - &&TARGET_STORE_ATTR_SPLIT_KEYS, - &&TARGET_STORE_ATTR_SLOT, - &&TARGET_STORE_ATTR_WITH_HINT, - &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -57,7 +57,7 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_ADD, &&TARGET_INPLACE_SUBTRACT, &&TARGET_INPLACE_MULTIPLY, - &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_INPLACE_MODULO, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, @@ -79,15 +79,15 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_AND, &&TARGET_INPLACE_XOR, &&TARGET_INPLACE_OR, - &&TARGET_LOAD_FAST__LOAD_CONST, - &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_STORE_ATTR_SPLIT_KEYS, + &&TARGET_STORE_ATTR_SLOT, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&TARGET_STORE_FAST__STORE_FAST, - &&_unknown_opcode, + &&TARGET_STORE_ATTR_WITH_HINT, + &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -119,14 +119,14 @@ static void *opcode_targets[256] = { &&TARGET_IS_OP, &&TARGET_CONTAINS_OP, &&TARGET_RERAISE, - &&_unknown_opcode, + &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_JUMP_IF_NOT_EXC_MATCH, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_LOAD_FAST, &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, - &&_unknown_opcode, + &&TARGET_STORE_FAST__STORE_FAST, &&_unknown_opcode, &&TARGET_GEN_START, &&TARGET_RAISE_VARARGS, diff --git a/Python/specialize.c b/Python/specialize.c index ecab69bcae78e4..ea6faf8d290691 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -121,6 +121,7 @@ _Py_GetSpecializationStats(void) { int err = 0; err += add_stat_dict(stats, LOAD_ATTR, "load_attr"); err += add_stat_dict(stats, LOAD_GLOBAL, "load_global"); + err += add_stat_dict(stats, BINARY_ADD, "binary_add"); err += add_stat_dict(stats, BINARY_SUBSCR, "binary_subscr"); err += add_stat_dict(stats, STORE_ATTR, "store_attr"); if (err < 0) { @@ -175,6 +176,7 @@ _Py_PrintSpecializationStats(void) #endif print_stats(out, &_specialization_stats[LOAD_ATTR], "load_attr"); print_stats(out, &_specialization_stats[LOAD_GLOBAL], "load_global"); + print_stats(out, &_specialization_stats[BINARY_ADD], "binary_add"); print_stats(out, &_specialization_stats[BINARY_SUBSCR], "binary_subscr"); print_stats(out, &_specialization_stats[STORE_ATTR], "store_attr"); if (out != stderr) { @@ -223,6 +225,7 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) { static uint8_t adaptive_opcodes[256] = { [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE, [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE, + [BINARY_ADD] = BINARY_ADD_ADAPTIVE, [BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE, [STORE_ATTR] = STORE_ATTR_ADAPTIVE, }; @@ -231,6 +234,7 @@ static uint8_t adaptive_opcodes[256] = { static uint8_t cache_requirements[256] = { [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ + [BINARY_ADD] = 0, [BINARY_SUBSCR] = 0, [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ }; @@ -423,6 +427,13 @@ _Py_Quicken(PyCodeObject *code) { #define SPEC_FAIL_TUPLE_NON_INT_SUBSCRIPT 9 #define SPEC_FAIL_NOT_TUPLE_LIST_OR_DICT 10 +/* Binary add */ + +#define SPEC_FAIL_NOT_INPLACE 10 +#define SPEC_FAIL_NON_FUNCTION_SCOPE 11 +#define SPEC_FAIL_DIFFERENT_TYPES 12 +#define SPEC_FAIL_OTHER_TYPE 13 + static int specialize_module_load_attr( @@ -906,3 +917,62 @@ _Py_Specialize_BinarySubscr( return 0; } + +int +specialize_unicode_add(_Py_CODEUNIT *instr) +{ + int oparg = _Py_OPARG(instr[0]); + if (oparg == 0) { + SPECIALIZATION_FAIL(BINARY_ADD, SPEC_FAIL_NOT_INPLACE); + return -1; + } + int next_opcode = _Py_OPCODE(instr[1]); + switch (next_opcode) { + case STORE_FAST: + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_FAST, 1); + return 0; + case STORE_DEREF: + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_DEREF, 1); + return 0; + } + SPECIALIZATION_FAIL(BINARY_ADD, SPEC_FAIL_NON_FUNCTION_SCOPE); + return -1; +} + +int +_Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) +{ + PyTypeObject *left_type = Py_TYPE(left); + if (left_type != Py_TYPE(right)) { + SPECIALIZATION_FAIL(BINARY_ADD, SPEC_FAIL_DIFFERENT_TYPES); + goto fail; + } + if (left_type == &PyUnicode_Type) { + int err = specialize_unicode_add(instr); + if (err) { + goto fail; + } + goto success; + } + else if (left_type == &PyLong_Type) { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_INT, _Py_OPARG(*instr)); + goto success; + } + else if (left_type == &PyFloat_Type) { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_FLOAT, _Py_OPARG(*instr)); + goto success; + + } + else { + SPECIALIZATION_FAIL(BINARY_ADD, SPEC_FAIL_OTHER_TYPE); + } +fail: + STAT_INC(BINARY_ADD, specialization_failure); + assert(!PyErr_Occurred()); + *instr = _Py_MAKECODEUNIT(_Py_OPCODE(*instr), ADAPTIVE_CACHE_BACKOFF); + return 0; +success: + STAT_INC(BINARY_ADD, specialization_success); + assert(!PyErr_Occurred()); + return 0; +} From 40ef9e6ff28d65d44e4b00a1cf216da15f8188d0 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 17 Aug 2021 11:04:04 +0100 Subject: [PATCH 02/12] Turn on stats --- Include/internal/pycore_code.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index aadad3a09ed1d3..2ab3ddc58d6d4b 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -302,9 +302,9 @@ int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNI int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); int _Py_Specialize_BinaryAdd(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); -#define PRINT_SPECIALIZATION_STATS 0 -#define PRINT_SPECIALIZATION_STATS_DETAILED 0 -#define PRINT_SPECIALIZATION_STATS_TO_FILE 0 +#define PRINT_SPECIALIZATION_STATS 1 +#define PRINT_SPECIALIZATION_STATS_DETAILED 1 +#define PRINT_SPECIALIZATION_STATS_TO_FILE 1 #ifdef Py_DEBUG #define COLLECT_SPECIALIZATION_STATS 1 From 68376486f9d90d23671da1cadb7e1b2528db8df0 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 17 Aug 2021 16:14:10 +0100 Subject: [PATCH 03/12] Turn on unicode concat specialization. --- Python/specialize.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index ea6faf8d290691..ffc78db0b525b6 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -429,7 +429,6 @@ _Py_Quicken(PyCodeObject *code) { /* Binary add */ -#define SPEC_FAIL_NOT_INPLACE 10 #define SPEC_FAIL_NON_FUNCTION_SCOPE 11 #define SPEC_FAIL_DIFFERENT_TYPES 12 #define SPEC_FAIL_OTHER_TYPE 13 @@ -921,11 +920,6 @@ _Py_Specialize_BinarySubscr( int specialize_unicode_add(_Py_CODEUNIT *instr) { - int oparg = _Py_OPARG(instr[0]); - if (oparg == 0) { - SPECIALIZATION_FAIL(BINARY_ADD, SPEC_FAIL_NOT_INPLACE); - return -1; - } int next_opcode = _Py_OPCODE(instr[1]); switch (next_opcode) { case STORE_FAST: From 0c869ed1dedf56db8ca00c873300c766ffb3edd2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 17 Aug 2021 17:22:53 +0100 Subject: [PATCH 04/12] Record cache hits for BINARY_ADD and add non-inplace unicode specialization. --- Include/opcode.h | 49 +++++++++++++++++++++-------------------- Lib/opcode.py | 1 + Python/ceval.c | 30 ++++++++++++++++++++++++- Python/opcode_targets.h | 22 +++++++++--------- Python/specialize.c | 8 +++++-- 5 files changed, 72 insertions(+), 38 deletions(-) diff --git a/Include/opcode.h b/Include/opcode.h index 1280424e92b1c4..74f170fbd56c2a 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -139,30 +139,31 @@ extern "C" { #define BINARY_ADD_ADAPTIVE 7 #define BINARY_ADD_INT 8 #define BINARY_ADD_FLOAT 13 -#define BINARY_ADD_UNICODE_INPLACE_FAST 14 -#define BINARY_ADD_UNICODE_INPLACE_DEREF 18 -#define BINARY_SUBSCR_ADAPTIVE 21 -#define BINARY_SUBSCR_LIST_INT 36 -#define BINARY_SUBSCR_TUPLE_INT 38 -#define BINARY_SUBSCR_DICT 39 -#define JUMP_ABSOLUTE_QUICK 40 -#define LOAD_ATTR_ADAPTIVE 41 -#define LOAD_ATTR_SPLIT_KEYS 42 -#define LOAD_ATTR_WITH_HINT 43 -#define LOAD_ATTR_SLOT 44 -#define LOAD_ATTR_MODULE 45 -#define LOAD_GLOBAL_ADAPTIVE 46 -#define LOAD_GLOBAL_MODULE 47 -#define LOAD_GLOBAL_BUILTIN 48 -#define STORE_ATTR_ADAPTIVE 58 -#define STORE_ATTR_SPLIT_KEYS 80 -#define STORE_ATTR_SLOT 81 -#define STORE_ATTR_WITH_HINT 87 -#define LOAD_FAST__LOAD_FAST 88 -#define STORE_FAST__LOAD_FAST 120 -#define LOAD_FAST__LOAD_CONST 122 -#define LOAD_CONST__LOAD_FAST 123 -#define STORE_FAST__STORE_FAST 127 +#define BINARY_ADD_UNICODE 14 +#define BINARY_ADD_UNICODE_INPLACE_FAST 18 +#define BINARY_ADD_UNICODE_INPLACE_DEREF 21 +#define BINARY_SUBSCR_ADAPTIVE 36 +#define BINARY_SUBSCR_LIST_INT 38 +#define BINARY_SUBSCR_TUPLE_INT 39 +#define BINARY_SUBSCR_DICT 40 +#define JUMP_ABSOLUTE_QUICK 41 +#define LOAD_ATTR_ADAPTIVE 42 +#define LOAD_ATTR_SPLIT_KEYS 43 +#define LOAD_ATTR_WITH_HINT 44 +#define LOAD_ATTR_SLOT 45 +#define LOAD_ATTR_MODULE 46 +#define LOAD_GLOBAL_ADAPTIVE 47 +#define LOAD_GLOBAL_MODULE 48 +#define LOAD_GLOBAL_BUILTIN 58 +#define STORE_ATTR_ADAPTIVE 80 +#define STORE_ATTR_SPLIT_KEYS 81 +#define STORE_ATTR_SLOT 87 +#define STORE_ATTR_WITH_HINT 88 +#define LOAD_FAST__LOAD_FAST 120 +#define STORE_FAST__LOAD_FAST 122 +#define LOAD_FAST__LOAD_CONST 123 +#define LOAD_CONST__LOAD_FAST 127 +#define STORE_FAST__STORE_FAST 128 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index 4513f6c9a730fb..b612e37200bf1c 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -223,6 +223,7 @@ def jabs_op(name, op): "BINARY_ADD_ADAPTIVE", "BINARY_ADD_INT", "BINARY_ADD_FLOAT", + "BINARY_ADD_UNICODE", "BINARY_ADD_UNICODE_INPLACE_FAST", "BINARY_ADD_UNICODE_INPLACE_DEREF", "BINARY_SUBSCR_ADAPTIVE", diff --git a/Python/ceval.c b/Python/ceval.c index 0502b0b9aa8b30..1d70a321f518f1 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1435,6 +1435,12 @@ eval_frame_handle_pending(PyThreadState *tstate) #define UPDATE_PREV_INSTR_OPARG(instr, oparg) ((uint8_t*)(instr))[-1] = (oparg) +static inline void +record_hit_inline(_Py_CODEUNIT *next_instr, int oparg) +{ + UPDATE_PREV_INSTR_OPARG(next_instr, saturating_increment(oparg)); +} + #define GLOBALS() frame->f_globals #define BUILTINS() frame->f_builtins #define LOCALS() frame->f_locals @@ -1985,13 +1991,30 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } else { STAT_INC(BINARY_ADD, deferred); - // oparg is the adaptive (cache counter*2 | inplace_bit) UPDATE_PREV_INSTR_OPARG(next_instr, oparg - 1); STAT_DEC(BINARY_ADD, unquickened); JUMP_TO_INSTRUCTION(BINARY_ADD); } } + TARGET(BINARY_ADD_UNICODE): { + PyObject *left = SECOND(); + PyObject *right = TOP(); + DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD); + DEOPT_IF(!PyUnicode_CheckExact(right), BINARY_ADD); + STAT_INC(BINARY_ADD, hit); + record_hit_inline(next_instr, oparg); + PyObject *res = PyUnicode_Concat(left, right); + STACK_SHRINK(1); + SET_TOP(res); + Py_DECREF(left); + Py_DECREF(right); + if (TOP() == NULL) { + goto error; + } + DISPATCH(); + } + TARGET(BINARY_ADD_UNICODE_INPLACE_FAST): { PyObject *left = SECOND(); PyObject *right = TOP(); @@ -2008,6 +2031,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr */ PyObject *var = GETLOCAL(next_oparg); DEOPT_IF(var != left, BINARY_ADD); + STAT_INC(BINARY_ADD, hit); + record_hit_inline(next_instr, oparg); GETLOCAL(next_oparg) = NULL; Py_DECREF(left); STACK_SHRINK(1); @@ -2036,6 +2061,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *cell = GETLOCAL(next_oparg); DEOPT_IF(PyCell_GET(cell) != left, BINARY_ADD); STAT_INC(BINARY_ADD, hit); + record_hit_inline(next_instr, oparg); PyCell_SET(cell, NULL); Py_DECREF(left); STACK_SHRINK(1); @@ -2053,6 +2079,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DEOPT_IF(!PyFloat_CheckExact(left), BINARY_ADD); DEOPT_IF(!PyFloat_CheckExact(right), BINARY_ADD); STAT_INC(BINARY_ADD, hit); + record_hit_inline(next_instr, oparg); double dsum = ((PyFloatObject *)left)->ob_fval + ((PyFloatObject *)right)->ob_fval; PyObject *sum = PyFloat_FromDouble(dsum); @@ -2072,6 +2099,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DEOPT_IF(!PyLong_CheckExact(left), BINARY_ADD); DEOPT_IF(!PyLong_CheckExact(right), BINARY_ADD); STAT_INC(BINARY_ADD, hit); + record_hit_inline(next_instr, oparg); PyObject *sum = PyLong_Type.tp_as_number->nb_add(left, right); SET_SECOND(sum); Py_DECREF(right); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 7a18c972d0a190..54bcc6ddff861b 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -13,14 +13,14 @@ static void *opcode_targets[256] = { &&TARGET_UNARY_NEGATIVE, &&TARGET_UNARY_NOT, &&TARGET_BINARY_ADD_FLOAT, - &&TARGET_BINARY_ADD_UNICODE_INPLACE_FAST, + &&TARGET_BINARY_ADD_UNICODE, &&TARGET_UNARY_INVERT, &&TARGET_BINARY_MATRIX_MULTIPLY, &&TARGET_INPLACE_MATRIX_MULTIPLY, - &&TARGET_BINARY_ADD_UNICODE_INPLACE_DEREF, + &&TARGET_BINARY_ADD_UNICODE_INPLACE_FAST, &&TARGET_BINARY_POWER, &&TARGET_BINARY_MULTIPLY, - &&TARGET_BINARY_SUBSCR_ADAPTIVE, + &&TARGET_BINARY_ADD_UNICODE_INPLACE_DEREF, &&TARGET_BINARY_MODULO, &&TARGET_BINARY_ADD, &&TARGET_BINARY_SUBTRACT, @@ -35,8 +35,9 @@ static void *opcode_targets[256] = { &&TARGET_MATCH_KEYS, &&TARGET_COPY_DICT_WITHOUT_KEYS, &&TARGET_PUSH_EXC_INFO, - &&TARGET_BINARY_SUBSCR_LIST_INT, + &&TARGET_BINARY_SUBSCR_ADAPTIVE, &&TARGET_POP_EXCEPT_AND_RERAISE, + &&TARGET_BINARY_SUBSCR_LIST_INT, &&TARGET_BINARY_SUBSCR_TUPLE_INT, &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_JUMP_ABSOLUTE_QUICK, @@ -47,7 +48,6 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_ATTR_MODULE, &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LOAD_GLOBAL_MODULE, - &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -57,7 +57,7 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_ADD, &&TARGET_INPLACE_SUBTRACT, &&TARGET_INPLACE_MULTIPLY, - &&TARGET_STORE_ATTR_ADAPTIVE, + &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_INPLACE_MODULO, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, @@ -79,15 +79,15 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_AND, &&TARGET_INPLACE_XOR, &&TARGET_INPLACE_OR, + &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_STORE_ATTR_SPLIT_KEYS, - &&TARGET_STORE_ATTR_SLOT, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, + &&TARGET_STORE_ATTR_SLOT, &&TARGET_STORE_ATTR_WITH_HINT, - &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -119,15 +119,15 @@ static void *opcode_targets[256] = { &&TARGET_IS_OP, &&TARGET_CONTAINS_OP, &&TARGET_RERAISE, - &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_JUMP_IF_NOT_EXC_MATCH, + &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_LOAD_FAST__LOAD_CONST, - &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_LOAD_FAST, &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, + &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_STORE_FAST__STORE_FAST, - &&_unknown_opcode, &&TARGET_GEN_START, &&TARGET_RAISE_VARARGS, &&TARGET_CALL_FUNCTION, diff --git a/Python/specialize.c b/Python/specialize.c index ffc78db0b525b6..eb2fb6a993585b 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -921,14 +921,18 @@ int specialize_unicode_add(_Py_CODEUNIT *instr) { int next_opcode = _Py_OPCODE(instr[1]); + int next_oparg = _Py_OPARG(instr[1]); switch (next_opcode) { case STORE_FAST: - *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_FAST, 1); + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_FAST, saturating_start()); return 0; case STORE_DEREF: - *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_DEREF, 1); + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_DEREF, saturating_start()); return 0; + default: + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE, saturating_start()); } + SPECIALIZATION_FAIL(BINARY_ADD, SPEC_FAIL_NON_FUNCTION_SCOPE); return -1; } From d2dbd52dd9f94bf5c7cfb4e8c58b33e813ff54dd Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 17 Aug 2021 18:15:26 +0100 Subject: [PATCH 05/12] Turn off stats --- Include/internal/pycore_code.h | 6 +++--- Python/specialize.c | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 2ab3ddc58d6d4b..aadad3a09ed1d3 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -302,9 +302,9 @@ int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNI int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); int _Py_Specialize_BinaryAdd(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); -#define PRINT_SPECIALIZATION_STATS 1 -#define PRINT_SPECIALIZATION_STATS_DETAILED 1 -#define PRINT_SPECIALIZATION_STATS_TO_FILE 1 +#define PRINT_SPECIALIZATION_STATS 0 +#define PRINT_SPECIALIZATION_STATS_DETAILED 0 +#define PRINT_SPECIALIZATION_STATS_TO_FILE 0 #ifdef Py_DEBUG #define COLLECT_SPECIALIZATION_STATS 1 diff --git a/Python/specialize.c b/Python/specialize.c index eb2fb6a993585b..22ef2058b61624 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -921,7 +921,6 @@ int specialize_unicode_add(_Py_CODEUNIT *instr) { int next_opcode = _Py_OPCODE(instr[1]); - int next_oparg = _Py_OPARG(instr[1]); switch (next_opcode) { case STORE_FAST: *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_FAST, saturating_start()); From 5cddffc95ad63758c014d41a65eb436a70d1ea5c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 18 Aug 2021 11:14:50 +0100 Subject: [PATCH 06/12] Add NEWS. --- .../2021-08-18-11-14-38.bpo-44945.CO3s77.rst | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-08-18-11-14-38.bpo-44945.CO3s77.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-08-18-11-14-38.bpo-44945.CO3s77.rst b/Misc/NEWS.d/next/Core and Builtins/2021-08-18-11-14-38.bpo-44945.CO3s77.rst new file mode 100644 index 00000000000000..dc6c89cacfa684 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-08-18-11-14-38.bpo-44945.CO3s77.rst @@ -0,0 +1,9 @@ +Specialize the BINARY_ADD instruction using the PEP 659 machinery. Adds six +new instructions: + +* BINARY_ADD_ADAPTIVE +* BINARY_ADD_FLOAT +* BINARY_ADD_INT +* BINARY_ADD_UNICODE +* BINARY_ADD_UNICODE_INPLACE_FAST +* BINARY_ADD_UNICODE_INPLACE_DEREF From a68664eeaa07bd02731c7302b57d6580316e97fe Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 23 Aug 2021 09:46:35 +0100 Subject: [PATCH 07/12] Expose long_add for specialize opcode. --- Include/internal/pycore_long.h | 2 ++ Objects/longobject.c | 18 ++++++++++++------ Python/ceval.c | 6 +++--- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 2bea3a55ec8735..7336c317c3f2c8 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -34,6 +34,8 @@ static inline PyObject* _PyLong_GetZero(void) static inline PyObject* _PyLong_GetOne(void) { return __PyLong_GetSmallInt_internal(1); } +PyObject *_PyLong_Add(PyLongObject *left, PyLongObject *right); + #ifdef __cplusplus } #endif diff --git a/Objects/longobject.c b/Objects/longobject.c index d9127b31fd4867..8b93a89152b805 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3060,16 +3060,14 @@ x_sub(PyLongObject *a, PyLongObject *b) return maybe_small_long(long_normalize(z)); } -static PyObject * -long_add(PyLongObject *a, PyLongObject *b) +PyObject * +_PyLong_Add(PyLongObject *a, PyLongObject *b) { - PyLongObject *z; - - CHECK_BINOP(a, b); - if (Py_ABS(Py_SIZE(a)) <= 1 && Py_ABS(Py_SIZE(b)) <= 1) { return PyLong_FromLong(MEDIUM_VALUE(a) + MEDIUM_VALUE(b)); } + + PyLongObject *z; if (Py_SIZE(a) < 0) { if (Py_SIZE(b) < 0) { z = x_add(a, b); @@ -3094,6 +3092,14 @@ long_add(PyLongObject *a, PyLongObject *b) return (PyObject *)z; } +static PyObject * +long_add(PyLongObject *a, PyLongObject *b) +{ + CHECK_BINOP(a, b); + return _PyLong_Add(a, b); +} + + static PyObject * long_sub(PyLongObject *a, PyLongObject *b) { diff --git a/Python/ceval.c b/Python/ceval.c index 3c05fc9cb5d48d..d4d56561b8b60c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2121,7 +2121,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DEOPT_IF(!PyLong_CheckExact(right), BINARY_ADD); STAT_INC(BINARY_ADD, hit); record_hit_inline(next_instr, oparg); - PyObject *sum = PyLong_Type.tp_as_number->nb_add(left, right); + PyObject *sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); SET_SECOND(sum); Py_DECREF(right); Py_DECREF(left); @@ -4473,7 +4473,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr oparg = cache->adaptive.original_oparg; STAT_DEC(LOAD_METHOD, unquickened); JUMP_TO_INSTRUCTION(LOAD_METHOD); - } + } } TARGET(LOAD_METHOD_CACHED): { @@ -4491,7 +4491,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr assert(cache1->tp_version != 0); assert(self_cls->tp_dictoffset >= 0); assert(Py_TYPE(self_cls)->tp_dictoffset > 0); - + // inline version of _PyObject_GetDictPtr for offset >= 0 PyObject *dict = self_cls->tp_dictoffset != 0 ? *(PyObject **) ((char *)self + self_cls->tp_dictoffset) : NULL; From 873ba62b5556e4b310e53d91909a4ec1e24c10cf Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 24 Aug 2021 17:09:57 +0100 Subject: [PATCH 08/12] Remove overly specialized BINARY_ADD_UNICODE_INPLACE_DEREF. --- Include/opcode.h | 53 ++++++++++++++++++++--------------------- Lib/opcode.py | 1 - Python/ceval.c | 35 +++------------------------ Python/opcode_targets.h | 26 ++++++++++---------- Python/specialize.c | 36 +++++++++++++--------------- 5 files changed, 58 insertions(+), 93 deletions(-) diff --git a/Include/opcode.h b/Include/opcode.h index 09894db5a1d2af..0043cc2d209aaa 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -141,33 +141,32 @@ extern "C" { #define BINARY_ADD_FLOAT 13 #define BINARY_ADD_UNICODE 14 #define BINARY_ADD_UNICODE_INPLACE_FAST 18 -#define BINARY_ADD_UNICODE_INPLACE_DEREF 21 -#define BINARY_SUBSCR_ADAPTIVE 36 -#define BINARY_SUBSCR_LIST_INT 38 -#define BINARY_SUBSCR_TUPLE_INT 39 -#define BINARY_SUBSCR_DICT 40 -#define JUMP_ABSOLUTE_QUICK 41 -#define LOAD_ATTR_ADAPTIVE 42 -#define LOAD_ATTR_SPLIT_KEYS 43 -#define LOAD_ATTR_WITH_HINT 44 -#define LOAD_ATTR_SLOT 45 -#define LOAD_ATTR_MODULE 46 -#define LOAD_GLOBAL_ADAPTIVE 47 -#define LOAD_GLOBAL_MODULE 48 -#define LOAD_GLOBAL_BUILTIN 58 -#define LOAD_METHOD_ADAPTIVE 80 -#define LOAD_METHOD_CACHED 81 -#define LOAD_METHOD_CLASS 87 -#define LOAD_METHOD_MODULE 88 -#define STORE_ATTR_ADAPTIVE 120 -#define STORE_ATTR_SPLIT_KEYS 122 -#define STORE_ATTR_SLOT 123 -#define STORE_ATTR_WITH_HINT 127 -#define LOAD_FAST__LOAD_FAST 128 -#define STORE_FAST__LOAD_FAST 134 -#define LOAD_FAST__LOAD_CONST 140 -#define LOAD_CONST__LOAD_FAST 143 -#define STORE_FAST__STORE_FAST 149 +#define BINARY_SUBSCR_ADAPTIVE 21 +#define BINARY_SUBSCR_LIST_INT 36 +#define BINARY_SUBSCR_TUPLE_INT 38 +#define BINARY_SUBSCR_DICT 39 +#define JUMP_ABSOLUTE_QUICK 40 +#define LOAD_ATTR_ADAPTIVE 41 +#define LOAD_ATTR_SPLIT_KEYS 42 +#define LOAD_ATTR_WITH_HINT 43 +#define LOAD_ATTR_SLOT 44 +#define LOAD_ATTR_MODULE 45 +#define LOAD_GLOBAL_ADAPTIVE 46 +#define LOAD_GLOBAL_MODULE 47 +#define LOAD_GLOBAL_BUILTIN 48 +#define LOAD_METHOD_ADAPTIVE 58 +#define LOAD_METHOD_CACHED 80 +#define LOAD_METHOD_CLASS 81 +#define LOAD_METHOD_MODULE 87 +#define STORE_ATTR_ADAPTIVE 88 +#define STORE_ATTR_SPLIT_KEYS 120 +#define STORE_ATTR_SLOT 122 +#define STORE_ATTR_WITH_HINT 123 +#define LOAD_FAST__LOAD_FAST 127 +#define STORE_FAST__LOAD_FAST 128 +#define LOAD_FAST__LOAD_CONST 134 +#define LOAD_CONST__LOAD_FAST 140 +#define STORE_FAST__STORE_FAST 143 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index 4f101cf750f4d5..5d356746888757 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -225,7 +225,6 @@ def jabs_op(name, op): "BINARY_ADD_FLOAT", "BINARY_ADD_UNICODE", "BINARY_ADD_UNICODE_INPLACE_FAST", - "BINARY_ADD_UNICODE_INPLACE_DEREF", "BINARY_SUBSCR_ADAPTIVE", "BINARY_SUBSCR_LIST_INT", "BINARY_SUBSCR_TUPLE_INT", diff --git a/Python/ceval.c b/Python/ceval.c index d4d56561b8b60c..f3cd7d96398540 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2045,8 +2045,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr int next_oparg = _Py_OPARG(*next_instr); assert(_Py_OPCODE(*next_instr) == STORE_FAST); /* In the common case, there are 2 references to the value - * stored in 'variable' when the += is performed: one on the - * value stack (in 'v') and one still stored in the + * stored in 'variable' when the v = v + ... is performed: one + * on the value stack (in 'v') and one still stored in the * 'variable'. We try to delete the variable now to reduce * the refcnt to 1. */ @@ -2055,36 +2055,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr STAT_INC(BINARY_ADD, hit); record_hit_inline(next_instr, oparg); GETLOCAL(next_oparg) = NULL; - Py_DECREF(left); - STACK_SHRINK(1); - PyUnicode_Append(&TOP(), right); - Py_DECREF(right); - if (TOP() == NULL) { - goto error; - } - DISPATCH(); - } - - TARGET(BINARY_ADD_UNICODE_INPLACE_DEREF): { - PyObject *left = SECOND(); - PyObject *right = TOP(); - DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD); - DEOPT_IF(!PyUnicode_CheckExact(right), BINARY_ADD); - DEOPT_IF(Py_REFCNT(left) != 2, BINARY_ADD); - int next_oparg = _Py_OPARG(*next_instr); - assert(_Py_OPCODE(*next_instr) == STORE_DEREF); - /* In the common case, there are 2 references to the value - * stored in 'variable' when the += is performed: one on the - * value stack (in 'v') and one still stored in the - * 'variable'. We try to delete the variable now to reduce - * the refcnt to 1. - */ - PyObject *cell = GETLOCAL(next_oparg); - DEOPT_IF(PyCell_GET(cell) != left, BINARY_ADD); - STAT_INC(BINARY_ADD, hit); - record_hit_inline(next_instr, oparg); - PyCell_SET(cell, NULL); - Py_DECREF(left); + Py_SET_REFCNT(left, 1); STACK_SHRINK(1); PyUnicode_Append(&TOP(), right); Py_DECREF(right); diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 9d3543e834e856..f3bfae545bcd48 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -20,7 +20,7 @@ static void *opcode_targets[256] = { &&TARGET_BINARY_ADD_UNICODE_INPLACE_FAST, &&TARGET_BINARY_POWER, &&TARGET_BINARY_MULTIPLY, - &&TARGET_BINARY_ADD_UNICODE_INPLACE_DEREF, + &&TARGET_BINARY_SUBSCR_ADAPTIVE, &&TARGET_BINARY_MODULO, &&TARGET_BINARY_ADD, &&TARGET_BINARY_SUBTRACT, @@ -35,9 +35,8 @@ static void *opcode_targets[256] = { &&TARGET_MATCH_KEYS, &&TARGET_COPY_DICT_WITHOUT_KEYS, &&TARGET_PUSH_EXC_INFO, - &&TARGET_BINARY_SUBSCR_ADAPTIVE, - &&TARGET_POP_EXCEPT_AND_RERAISE, &&TARGET_BINARY_SUBSCR_LIST_INT, + &&TARGET_POP_EXCEPT_AND_RERAISE, &&TARGET_BINARY_SUBSCR_TUPLE_INT, &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_JUMP_ABSOLUTE_QUICK, @@ -48,6 +47,7 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_ATTR_MODULE, &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LOAD_GLOBAL_MODULE, + &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -57,7 +57,7 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_ADD, &&TARGET_INPLACE_SUBTRACT, &&TARGET_INPLACE_MULTIPLY, - &&TARGET_LOAD_GLOBAL_BUILTIN, + &&TARGET_LOAD_METHOD_ADAPTIVE, &&TARGET_INPLACE_MODULO, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, @@ -79,15 +79,15 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_AND, &&TARGET_INPLACE_XOR, &&TARGET_INPLACE_OR, - &&TARGET_LOAD_METHOD_ADAPTIVE, &&TARGET_LOAD_METHOD_CACHED, + &&TARGET_LOAD_METHOD_CLASS, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&TARGET_LOAD_METHOD_CLASS, &&TARGET_LOAD_METHOD_MODULE, + &&TARGET_STORE_ATTR_ADAPTIVE, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -119,36 +119,36 @@ static void *opcode_targets[256] = { &&TARGET_IS_OP, &&TARGET_CONTAINS_OP, &&TARGET_RERAISE, - &&TARGET_STORE_ATTR_ADAPTIVE, - &&TARGET_JUMP_IF_NOT_EXC_MATCH, &&TARGET_STORE_ATTR_SPLIT_KEYS, + &&TARGET_JUMP_IF_NOT_EXC_MATCH, &&TARGET_STORE_ATTR_SLOT, + &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_LOAD_FAST, &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, - &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_LOAD_FAST__LOAD_FAST, + &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_GEN_START, &&TARGET_RAISE_VARARGS, &&TARGET_CALL_FUNCTION, &&TARGET_MAKE_FUNCTION, &&TARGET_BUILD_SLICE, - &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_MAKE_CELL, &&TARGET_LOAD_CLOSURE, &&TARGET_LOAD_DEREF, &&TARGET_STORE_DEREF, &&TARGET_DELETE_DEREF, - &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_LOAD_CONST__LOAD_FAST, &&TARGET_CALL_FUNCTION_KW, &&TARGET_CALL_FUNCTION_EX, - &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_STORE_FAST__STORE_FAST, &&TARGET_EXTENDED_ARG, &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, &&TARGET_MAP_ADD, &&TARGET_LOAD_CLASSDEREF, - &&TARGET_STORE_FAST__STORE_FAST, + &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, &&TARGET_MATCH_CLASS, diff --git a/Python/specialize.c b/Python/specialize.c index c7f337729b4b20..5a98b6143a2a9f 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -908,7 +908,7 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_NOT_METHOD); goto fail; } - + assert(kind == METHOD); // If o.__dict__ changes, the method might be found in o.__dict__ // instead of old type lookup. So record o.__dict__'s keys. @@ -943,15 +943,15 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, } // Fall through. } // Else owner is maybe a builtin with no dict, or __slots__. Doesn't matter. - + /* `descr` is borrowed. Just check tp_version_tag before accessing in case * it's deleted. This is safe for methods (even inherited ones from super * classes!) as long as tp_version_tag is validated for two main reasons: - * + * * 1. The class will always hold a reference to the method so it will * usually not be GC-ed. Should it be deleted in Python, e.g. * `del obj.meth`, tp_version_tag will be invalidated, because of reason 2. - * + * * 2. The pre-existing type method cache (MCACHE) uses the same principles * of caching a borrowed descriptor. It does all the heavy lifting for us. * E.g. it invalidates on any MRO modification, on any type object @@ -1087,23 +1087,16 @@ _Py_Specialize_BinarySubscr( } -int +void specialize_unicode_add(_Py_CODEUNIT *instr) { int next_opcode = _Py_OPCODE(instr[1]); - switch (next_opcode) { - case STORE_FAST: - *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_FAST, saturating_start()); - return 0; - case STORE_DEREF: - *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_DEREF, saturating_start()); - return 0; - default: - *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE, saturating_start()); + if (next_opcode == STORE_FAST) { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_FAST, saturating_start()); + } + else { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE, saturating_start()); } - - SPECIALIZATION_FAIL(BINARY_ADD, SPEC_FAIL_NON_FUNCTION_SCOPE); - return -1; } int @@ -1115,9 +1108,12 @@ _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) goto fail; } if (left_type == &PyUnicode_Type) { - int err = specialize_unicode_add(instr); - if (err) { - goto fail; + int next_opcode = _Py_OPCODE(instr[1]); + if (next_opcode == STORE_FAST) { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_FAST, saturating_start()); + } + else { + *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE, saturating_start()); } goto success; } From 60ed8a2b7379e9acecd02a3cd5482268ea1fa5eb Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 25 Aug 2021 12:17:36 +0100 Subject: [PATCH 09/12] Correct NEWS item --- .../2021-08-18-11-14-38.bpo-44945.CO3s77.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-08-18-11-14-38.bpo-44945.CO3s77.rst b/Misc/NEWS.d/next/Core and Builtins/2021-08-18-11-14-38.bpo-44945.CO3s77.rst index dc6c89cacfa684..66d53ec523de3c 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2021-08-18-11-14-38.bpo-44945.CO3s77.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2021-08-18-11-14-38.bpo-44945.CO3s77.rst @@ -1,9 +1,7 @@ -Specialize the BINARY_ADD instruction using the PEP 659 machinery. Adds six -new instructions: +Specialize the BINARY_ADD instruction using the PEP 659 machinery. Adds five new instructions: * BINARY_ADD_ADAPTIVE * BINARY_ADD_FLOAT * BINARY_ADD_INT * BINARY_ADD_UNICODE * BINARY_ADD_UNICODE_INPLACE_FAST -* BINARY_ADD_UNICODE_INPLACE_DEREF From 648e215e3d01a42faf61e5f44f9a33fa0eb9cb99 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 25 Aug 2021 12:22:24 +0100 Subject: [PATCH 10/12] Delete unused function --- Python/specialize.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 5a98b6143a2a9f..55c36705676474 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -978,6 +978,7 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, return 0; } + int _Py_Specialize_LoadGlobal( PyObject *globals, PyObject *builtins, @@ -1045,7 +1046,6 @@ _Py_Specialize_LoadGlobal( return 0; } - int _Py_Specialize_BinarySubscr( PyObject *container, PyObject *sub, _Py_CODEUNIT *instr) @@ -1086,19 +1086,6 @@ _Py_Specialize_BinarySubscr( return 0; } - -void -specialize_unicode_add(_Py_CODEUNIT *instr) -{ - int next_opcode = _Py_OPCODE(instr[1]); - if (next_opcode == STORE_FAST) { - *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE_INPLACE_FAST, saturating_start()); - } - else { - *instr = _Py_MAKECODEUNIT(BINARY_ADD_UNICODE, saturating_start()); - } -} - int _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) { From 9dd054a812dddd39f15fb4e4fa9b5536e82dec36 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 25 Aug 2021 12:33:28 +0100 Subject: [PATCH 11/12] Slight improvement to DEOPT checks in BINARY_ADD specializations. --- Python/ceval.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index f3cd7d96398540..a0a77a03ebe7d8 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2022,7 +2022,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *left = SECOND(); PyObject *right = TOP(); DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD); - DEOPT_IF(!PyUnicode_CheckExact(right), BINARY_ADD); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); STAT_INC(BINARY_ADD, hit); record_hit_inline(next_instr, oparg); PyObject *res = PyUnicode_Concat(left, right); @@ -2040,7 +2040,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *left = SECOND(); PyObject *right = TOP(); DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_ADD); - DEOPT_IF(!PyUnicode_CheckExact(right), BINARY_ADD); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); DEOPT_IF(Py_REFCNT(left) != 2, BINARY_ADD); int next_oparg = _Py_OPARG(*next_instr); assert(_Py_OPCODE(*next_instr) == STORE_FAST); @@ -2069,7 +2069,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *left = SECOND(); PyObject *right = TOP(); DEOPT_IF(!PyFloat_CheckExact(left), BINARY_ADD); - DEOPT_IF(!PyFloat_CheckExact(right), BINARY_ADD); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); STAT_INC(BINARY_ADD, hit); record_hit_inline(next_instr, oparg); double dsum = ((PyFloatObject *)left)->ob_fval + @@ -2089,7 +2089,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr PyObject *left = SECOND(); PyObject *right = TOP(); DEOPT_IF(!PyLong_CheckExact(left), BINARY_ADD); - DEOPT_IF(!PyLong_CheckExact(right), BINARY_ADD); + DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_ADD); STAT_INC(BINARY_ADD, hit); record_hit_inline(next_instr, oparg); PyObject *sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); From 3146f48931cf37b59818225210b515a13a9693bc Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 26 Aug 2021 14:38:46 +0100 Subject: [PATCH 12/12] Set initial counts for BINARY_ADD specializations and fix refcount leak. --- Python/ceval.c | 2 +- Python/specialize.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/ceval.c b/Python/ceval.c index 737df03a780d6d..8aaa83b1b74bf4 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2055,7 +2055,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr STAT_INC(BINARY_ADD, hit); record_hit_inline(next_instr, oparg); GETLOCAL(next_oparg) = NULL; - Py_SET_REFCNT(left, 1); + Py_DECREF(left); STACK_SHRINK(1); PyUnicode_Append(&TOP(), right); Py_DECREF(right); diff --git a/Python/specialize.c b/Python/specialize.c index 55c36705676474..b321368148f023 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1105,11 +1105,11 @@ _Py_Specialize_BinaryAdd(PyObject *left, PyObject *right, _Py_CODEUNIT *instr) goto success; } else if (left_type == &PyLong_Type) { - *instr = _Py_MAKECODEUNIT(BINARY_ADD_INT, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(BINARY_ADD_INT, saturating_start()); goto success; } else if (left_type == &PyFloat_Type) { - *instr = _Py_MAKECODEUNIT(BINARY_ADD_FLOAT, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(BINARY_ADD_FLOAT, saturating_start()); goto success; }