From ff4092caa91d79e62b4e987bcb3062c48537bc55 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov Date: Fri, 30 Jun 2023 20:28:44 +0300 Subject: [PATCH 1/8] Improve error handling in marshalling code Currently `w_object` and `w_complex_object` routines continue to marshal complex object until they return to their callers, regardless of error or exception occurred. This leads to ignored exceptions and, as a result, assertion failures in debug builds, e. g. if 'PyObject_GetBuffer' is called during marshalling of complex object with exception set. Another consequence is the late return from `w_object` in case of error: if arg is a container and the first item is marshallable, all other items will be processed anyway. This fix improves error handling of some marshalling routines, making them to return status value (0 or 1) depending on whether an error occurred, and use this status to decide whether to marshal remaining objects or not. * make `w_object` and `w_complex_object` functions to return 1 if marshalling succeeded and 0 if error occurred * in `w_object` check if error occurred and return 0 in this case * add `W_OBJECT` macro that calls `w_object` and returns 0 from function in which it is expanded * make `w_complex_object` function to use `W_OBJECT` macro calls instead of `w_object` function calls * change `W_SIZE` macro to return 0 on failure * make `w_pstring` return 0 to avoid compiler warning --- Python/marshal.c | 73 +++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index 7cfc7cc00306f58..7a8e3348a72ac5f 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -188,7 +188,7 @@ w_long(long x, WFILE *p) if ((n) > SIZE32_MAX) { \ (p)->depth--; \ (p)->error = WFERR_UNMARSHALLABLE; \ - return; \ + return 0; \ } \ w_long((long)(n), p); \ } while(0) @@ -196,11 +196,12 @@ w_long(long x, WFILE *p) # define W_SIZE w_long #endif -static void +static int w_pstring(const void *s, Py_ssize_t n, WFILE *p) { W_SIZE(n, p); w_string(s, n, p); + return 0; } static void @@ -340,10 +341,10 @@ w_ref(PyObject *v, char *flag, WFILE *p) return 1; } -static void +static int w_complex_object(PyObject *v, char flag, WFILE *p); -static void +static int w_object(PyObject *v, WFILE *p) { char flag = '\0'; @@ -352,6 +353,7 @@ w_object(PyObject *v, WFILE *p) if (p->depth > MAX_MARSHAL_STACK_DEPTH) { p->error = WFERR_NESTEDTOODEEP; + return 0; } else if (v == NULL) { w_byte(TYPE_NULL, p); @@ -374,10 +376,22 @@ w_object(PyObject *v, WFILE *p) else if (!w_ref(v, &flag, p)) w_complex_object(v, flag, p); + if (p->error != WFERR_OK) { + return 0; + } + p->depth--; + return 1; } -static void +#define W_OBJECT(v, p) \ + do { \ + if (!w_object(v, p)) { \ + return 0; \ + } \ + } while (0) + +static int w_complex_object(PyObject *v, char flag, WFILE *p) { Py_ssize_t i, n; @@ -455,7 +469,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) if (utf8 == NULL) { p->depth--; p->error = WFERR_UNMARSHALLABLE; - return; + return 0; } if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v)) W_TYPE(TYPE_INTERNED, p); @@ -476,7 +490,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) W_SIZE(n, p); } for (i = 0; i < n; i++) { - w_object(PyTuple_GET_ITEM(v, i), p); + W_OBJECT(PyTuple_GET_ITEM(v, i), p); } } else if (PyList_CheckExact(v)) { @@ -484,7 +498,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) n = PyList_GET_SIZE(v); W_SIZE(n, p); for (i = 0; i < n; i++) { - w_object(PyList_GET_ITEM(v, i), p); + W_OBJECT(PyList_GET_ITEM(v, i), p); } } else if (PyDict_CheckExact(v)) { @@ -494,10 +508,10 @@ w_complex_object(PyObject *v, char flag, WFILE *p) /* This one is NULL object terminated! */ pos = 0; while (PyDict_Next(v, &pos, &key, &value)) { - w_object(key, p); - w_object(value, p); + W_OBJECT(key, p); + W_OBJECT(value, p); } - w_object((PyObject *)NULL, p); + w_byte(TYPE_NULL, p); } else if (PyAnySet_CheckExact(v)) { PyObject *value; @@ -517,7 +531,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) PyObject *pairs = PyList_New(n); if (pairs == NULL) { p->error = WFERR_NOMEMORY; - return; + return 0; } Py_ssize_t i = 0; while (_PySet_NextEntry(v, &pos, &value, &hash)) { @@ -525,14 +539,14 @@ w_complex_object(PyObject *v, char flag, WFILE *p) if (dump == NULL) { p->error = WFERR_UNMARSHALLABLE; Py_DECREF(pairs); - return; + return 0; } PyObject *pair = PyTuple_Pack(2, dump, value); Py_DECREF(dump); if (pair == NULL) { p->error = WFERR_NOMEMORY; Py_DECREF(pairs); - return; + return 0; } PyList_SET_ITEM(pairs, i++, pair); } @@ -540,12 +554,12 @@ w_complex_object(PyObject *v, char flag, WFILE *p) if (PyList_Sort(pairs)) { p->error = WFERR_NOMEMORY; Py_DECREF(pairs); - return; + return 0; } for (Py_ssize_t i = 0; i < n; i++) { PyObject *pair = PyList_GET_ITEM(pairs, i); value = PyTuple_GET_ITEM(pair, 1); - w_object(value, p); + W_OBJECT(value, p); } Py_DECREF(pairs); } @@ -554,7 +568,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) PyObject *co_code = _PyCode_GetCode(co); if (co_code == NULL) { p->error = WFERR_NOMEMORY; - return; + return 0; } W_TYPE(TYPE_CODE, p); w_long(co->co_argcount, p); @@ -562,17 +576,17 @@ w_complex_object(PyObject *v, char flag, WFILE *p) w_long(co->co_kwonlyargcount, p); w_long(co->co_stacksize, p); w_long(co->co_flags, p); - w_object(co_code, p); - w_object(co->co_consts, p); - w_object(co->co_names, p); - w_object(co->co_localsplusnames, p); - w_object(co->co_localspluskinds, p); - w_object(co->co_filename, p); - w_object(co->co_name, p); - w_object(co->co_qualname, p); + W_OBJECT(co_code, p); + W_OBJECT(co->co_consts, p); + W_OBJECT(co->co_names, p); + W_OBJECT(co->co_localsplusnames, p); + W_OBJECT(co->co_localspluskinds, p); + W_OBJECT(co->co_filename, p); + W_OBJECT(co->co_name, p); + W_OBJECT(co->co_qualname, p); w_long(co->co_firstlineno, p); - w_object(co->co_linetable, p); - w_object(co->co_exceptiontable, p); + W_OBJECT(co->co_linetable, p); + W_OBJECT(co->co_exceptiontable, p); Py_DECREF(co_code); } else if (PyObject_CheckBuffer(v)) { @@ -582,7 +596,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) w_byte(TYPE_UNKNOWN, p); p->depth--; p->error = WFERR_UNMARSHALLABLE; - return; + return 0; } W_TYPE(TYPE_STRING, p); w_pstring(view.buf, view.len, p); @@ -591,7 +605,10 @@ w_complex_object(PyObject *v, char flag, WFILE *p) else { W_TYPE(TYPE_UNKNOWN, p); p->error = WFERR_UNMARSHALLABLE; + return 0; } + + return 1; } static void From 5c51576a8a23a4ba384dc8be5a34432e25070382 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov Date: Fri, 30 Jun 2023 22:08:23 +0300 Subject: [PATCH 2/8] added test --- Lib/test/test_marshal.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py index 3d9d6d5d0aca34e..b68e9a24a04a24b 100644 --- a/Lib/test/test_marshal.py +++ b/Lib/test/test_marshal.py @@ -375,6 +375,20 @@ def test_deterministic_sets(self): _, dump_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1") self.assertEqual(dump_0, dump_1) + def test_raise_after_first_unmarshallable(self): + # gh-106287: marshal.dumps should fail after the first unmarshallable + # item of container, rather than trying to process remaining items + nested = last = [] + for i in range(4000): + last.append([0]) + last = last[-1] + x = ({int}, 1, {'a': 2, 'b': 3, 'c': [1, 2, 3, 4]}, 2 + 3j, nested, memoryview(b'')) + # this should report that given object is unmarshallable rather deeply nested + # and shouldn't cause assertion failure as it did before + with self.assertRaises(ValueError) as cm: + marshal.dumps(x) + self.assertIn("unmarshallable object", str(cm.exception)) + LARGE_SIZE = 2**31 pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4 From 34dd559eb5e1c212676a743b29548eb0d39accc6 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 30 Jun 2023 19:50:14 +0000 Subject: [PATCH 3/8] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2023-06-30-19-50-10.gh-issue-106287.Tji5Ve.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-06-30-19-50-10.gh-issue-106287.Tji5Ve.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-06-30-19-50-10.gh-issue-106287.Tji5Ve.rst b/Misc/NEWS.d/next/Core and Builtins/2023-06-30-19-50-10.gh-issue-106287.Tji5Ve.rst new file mode 100644 index 000000000000000..cdc2620a9c32c47 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-06-30-19-50-10.gh-issue-106287.Tji5Ve.rst @@ -0,0 +1 @@ +Improve handling of containers with items of unsupported types during marshalling: fail after first unsupported item and do not propagate possibly raised exception on attempts to marshal subsequent ones. From d84aba00a6aa65621aba5d221f48511fef516893 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov Date: Mon, 1 Jan 2024 23:02:23 +0300 Subject: [PATCH 4/8] refactored test --- Lib/test/test_marshal.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py index b68e9a24a04a24b..abc4c8c48e7da84 100644 --- a/Lib/test/test_marshal.py +++ b/Lib/test/test_marshal.py @@ -378,10 +378,9 @@ def test_deterministic_sets(self): def test_raise_after_first_unmarshallable(self): # gh-106287: marshal.dumps should fail after the first unmarshallable # item of container, rather than trying to process remaining items - nested = last = [] + nested = [] for i in range(4000): - last.append([0]) - last = last[-1] + nested = [0, nested] x = ({int}, 1, {'a': 2, 'b': 3, 'c': [1, 2, 3, 4]}, 2 + 3j, nested, memoryview(b'')) # this should report that given object is unmarshallable rather deeply nested # and shouldn't cause assertion failure as it did before From 89cc99cd105a0d21757cec9ed7fdd95ebea39d05 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov Date: Mon, 1 Jan 2024 23:13:05 +0300 Subject: [PATCH 5/8] fixed refleaks --- Python/marshal.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index 420f5b1b43d576f..315d69a85081fe4 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -557,7 +557,10 @@ w_complex_object(PyObject *v, char flag, WFILE *p) for (Py_ssize_t i = 0; i < n; i++) { PyObject *pair = PyList_GET_ITEM(pairs, i); value = PyTuple_GET_ITEM(pair, 1); - W_OBJECT(value, p); + if (!w_object(value, p)) { + Py_DECREF(pairs); + return 0; + } } Py_DECREF(pairs); } @@ -574,17 +577,17 @@ w_complex_object(PyObject *v, char flag, WFILE *p) w_long(co->co_kwonlyargcount, p); w_long(co->co_stacksize, p); w_long(co->co_flags, p); - W_OBJECT(co_code, p); - W_OBJECT(co->co_consts, p); - W_OBJECT(co->co_names, p); - W_OBJECT(co->co_localsplusnames, p); - W_OBJECT(co->co_localspluskinds, p); - W_OBJECT(co->co_filename, p); - W_OBJECT(co->co_name, p); - W_OBJECT(co->co_qualname, p); + w_object(co_code, p); + w_object(co->co_consts, p); + w_object(co->co_names, p); + w_object(co->co_localsplusnames, p); + w_object(co->co_localspluskinds, p); + w_object(co->co_filename, p); + w_object(co->co_name, p); + w_object(co->co_qualname, p); w_long(co->co_firstlineno, p); - W_OBJECT(co->co_linetable, p); - W_OBJECT(co->co_exceptiontable, p); + w_object(co->co_linetable, p); + w_object(co->co_exceptiontable, p); Py_DECREF(co_code); } else if (PyObject_CheckBuffer(v)) { From 7fb643aeee88e3752397b75ad8e6423e179f17ff Mon Sep 17 00:00:00 2001 From: Radislav Chugunov Date: Wed, 8 May 2024 19:03:44 +0300 Subject: [PATCH 6/8] fixed return values in w_complex_object --- Python/marshal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index b806e0fd01962a8..8166e0aafbb3fed 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -568,7 +568,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) Py_END_CRITICAL_SECTION(); if (p->error == WFERR_UNMARSHALLABLE || p->error == WFERR_NOMEMORY) { Py_DECREF(pairs); - return; + return 0; } assert(i == n); if (PyList_Sort(pairs)) { @@ -589,7 +589,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) else if (PyCode_Check(v)) { if (!p->allow_code) { p->error = WFERR_CODE_NOT_ALLOWED; - return; + return 0; } PyCodeObject *co = (PyCodeObject *)v; PyObject *co_code = _PyCode_GetCode(co); From cc0c806c536734ea255f972c919267bcb94a1316 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov Date: Wed, 8 May 2024 20:10:13 +0300 Subject: [PATCH 7/8] changed return values --- Python/marshal.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index 8166e0aafbb3fed..8519d246e5bcde2 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -366,7 +366,7 @@ w_object(PyObject *v, WFILE *p) if (p->depth > MAX_MARSHAL_STACK_DEPTH) { p->error = WFERR_NESTEDTOODEEP; - return 0; + return -1; } else if (v == NULL) { w_byte(TYPE_NULL, p); @@ -390,17 +390,17 @@ w_object(PyObject *v, WFILE *p) w_complex_object(v, flag, p); if (p->error != WFERR_OK) { - return 0; + return -1; } p->depth--; - return 1; + return 0; } #define W_OBJECT(v, p) \ do { \ - if (!w_object(v, p)) { \ - return 0; \ + if (w_object(v, p) < 0) { \ + return -1; \ } \ } while (0) @@ -482,7 +482,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) if (utf8 == NULL) { p->depth--; p->error = WFERR_UNMARSHALLABLE; - return 0; + return -1; } if (p->version >= 3 && PyUnicode_CHECK_INTERNED(v)) W_TYPE(TYPE_INTERNED, p); @@ -544,7 +544,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) PyObject *pairs = PyList_New(n); if (pairs == NULL) { p->error = WFERR_NOMEMORY; - return 0; + return -1; } Py_ssize_t i = 0; Py_BEGIN_CRITICAL_SECTION(v); @@ -568,20 +568,20 @@ w_complex_object(PyObject *v, char flag, WFILE *p) Py_END_CRITICAL_SECTION(); if (p->error == WFERR_UNMARSHALLABLE || p->error == WFERR_NOMEMORY) { Py_DECREF(pairs); - return 0; + return -1; } assert(i == n); if (PyList_Sort(pairs)) { p->error = WFERR_NOMEMORY; Py_DECREF(pairs); - return 0; + return -1; } for (Py_ssize_t i = 0; i < n; i++) { PyObject *pair = PyList_GET_ITEM(pairs, i); value = PyTuple_GET_ITEM(pair, 1); - if (!w_object(value, p)) { + if (w_object(value, p) < 0) { Py_DECREF(pairs); - return 0; + return -1; } } Py_DECREF(pairs); @@ -589,13 +589,13 @@ w_complex_object(PyObject *v, char flag, WFILE *p) else if (PyCode_Check(v)) { if (!p->allow_code) { p->error = WFERR_CODE_NOT_ALLOWED; - return 0; + return -1; } PyCodeObject *co = (PyCodeObject *)v; PyObject *co_code = _PyCode_GetCode(co); if (co_code == NULL) { p->error = WFERR_NOMEMORY; - return 0; + return -1; } W_TYPE(TYPE_CODE, p); w_long(co->co_argcount, p); @@ -615,6 +615,10 @@ w_complex_object(PyObject *v, char flag, WFILE *p) w_object(co->co_linetable, p); w_object(co->co_exceptiontable, p); Py_DECREF(co_code); + + if (p->error != WFERR_OK) { + return -1; + } } else if (PyObject_CheckBuffer(v)) { /* Write unknown bytes-like objects as a bytes object */ @@ -623,7 +627,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) w_byte(TYPE_UNKNOWN, p); p->depth--; p->error = WFERR_UNMARSHALLABLE; - return 0; + return -1; } W_TYPE(TYPE_STRING, p); w_pstring(view.buf, view.len, p); @@ -632,10 +636,10 @@ w_complex_object(PyObject *v, char flag, WFILE *p) else { W_TYPE(TYPE_UNKNOWN, p); p->error = WFERR_UNMARSHALLABLE; - return 0; + return -1; } - return 1; + return 0; } static void From 94682e72ded00b7d380fe7e5320b9c610a9008fe Mon Sep 17 00:00:00 2001 From: Radislav Chugunov Date: Wed, 8 May 2024 20:12:25 +0300 Subject: [PATCH 8/8] changed W_SIZE return value --- Python/marshal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/marshal.c b/Python/marshal.c index 8519d246e5bcde2..0b43284a0ccdb66 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -198,7 +198,7 @@ w_long(long x, WFILE *p) if ((n) > SIZE32_MAX) { \ (p)->depth--; \ (p)->error = WFERR_UNMARSHALLABLE; \ - return 0; \ + return -1; \ } \ w_long((long)(n), p); \ } while(0)