gh-90949: amend GH-139234 in prevision of future mitigation API (#139366)
Fix some typos left in f04bea44c37793561d753dd4ca6e7cd658137553,
and simplify some internal functions to ease maintenance of future
mitigation APIs.
diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst
index 4c21bc8..0b08a9b 100644
--- a/Doc/library/pyexpat.rst
+++ b/Doc/library/pyexpat.rst
@@ -281,8 +281,8 @@
.. note::
The maximum amplification factor is only considered if the threshold
- that can be adjusted :meth:`.SetAllocTrackerActivationThreshold` is
- exceeded.
+ that can be adjusted by :meth:`.SetAllocTrackerActivationThreshold`
+ is exceeded.
.. versionadded:: next
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
index 3a395c1..fc55192 100644
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -556,8 +556,8 @@
xml.parsers.expat
-----------------
-* Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold`
- and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification`
+* Add :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold`
+ and :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification`
to :ref:`xmlparser <xmlparser-objects>` objects to prevent use of
disproportional amounts of dynamic memory from within an Expat parser.
(Contributed by Bénédikt Tran in :gh:`90949`.)
diff --git a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst
index 0719d43..5611f33 100644
--- a/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst
+++ b/Misc/NEWS.d/next/Library/2025-09-22-14-40-11.gh-issue-90949.UM35nb.rst
@@ -1,5 +1,5 @@
-Add :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold`
-and :func:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification`
+Add :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerActivationThreshold`
+and :meth:`~xml.parsers.expat.xmlparser.SetAllocTrackerMaximumAmplification`
to :ref:`xmlparser <xmlparser-objects>` objects to prevent use of
disproportional amounts of dynamic memory from within an Expat parser.
Patch by Bénédikt Tran.
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 50db77c..a59e565 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -125,7 +125,7 @@ CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info,
}
static int
-set_error_code(PyObject *err, enum XML_Error code)
+set_xml_error_attr_code(PyObject *err, enum XML_Error code)
{
PyObject *v = PyLong_FromLong((long)code);
int ok = v != NULL && PyObject_SetAttr(err, &_Py_ID(code), v) != -1;
@@ -137,7 +137,7 @@ set_error_code(PyObject *err, enum XML_Error code)
* false on an exception.
*/
static int
-set_error_location(PyObject *err, const char *name, XML_Size value)
+set_xml_error_attr_location(PyObject *err, const char *name, XML_Size value)
{
PyObject *v = PyLong_FromSize_t((size_t)value);
int ok = v != NULL && PyObject_SetAttrString(err, name, v) != -1;
@@ -147,31 +147,21 @@ set_error_location(PyObject *err, const char *name, XML_Size value)
static PyObject *
-format_xml_error(enum XML_Error code, XML_Size lineno, XML_Size column)
-{
- const char *errmsg = XML_ErrorString(code);
- PyUnicodeWriter *writer = PyUnicodeWriter_Create(strlen(errmsg) + 1);
- if (writer == NULL) {
- return NULL;
- }
- if (PyUnicodeWriter_Format(writer,
- "%s: line %zu, column %zu",
- errmsg, (size_t)lineno, (size_t)column) < 0)
- {
- PyUnicodeWriter_Discard(writer);
- return NULL;
- }
- return PyUnicodeWriter_Finish(writer);
-}
-
-static PyObject *
set_xml_error(pyexpat_state *state,
enum XML_Error code, XML_Size lineno, XML_Size column,
const char *errmsg)
{
- PyObject *arg = errmsg == NULL
- ? format_xml_error(code, lineno, column)
- : PyUnicode_FromStringAndSize(errmsg, strlen(errmsg));
+ PyObject *arg;
+ if (errmsg == NULL) {
+ arg = PyUnicode_FromFormat(
+ "%s: line %zu, column %zu",
+ XML_ErrorString(code),
+ (size_t)lineno, (size_t)column
+ );
+ }
+ else {
+ arg = PyUnicode_FromStringAndSize(errmsg, strlen(errmsg));
+ }
if (arg == NULL) {
return NULL;
}
@@ -179,9 +169,9 @@ set_xml_error(pyexpat_state *state,
Py_DECREF(arg);
if (
res != NULL
- && set_error_code(res, code)
- && set_error_location(res, "lineno", lineno)
- && set_error_location(res, "offset", column)
+ && set_xml_error_attr_code(res, code)
+ && set_xml_error_attr_location(res, "lineno", lineno)
+ && set_xml_error_attr_location(res, "offset", column)
) {
PyErr_SetObject(state->error, res);
}
@@ -1185,6 +1175,50 @@ pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
#endif
#if XML_COMBINED_VERSION >= 20702
+static PyObject *
+set_activation_threshold(xmlparseobject *self,
+ PyTypeObject *cls,
+ unsigned long long threshold,
+ XML_Bool (*setter)(XML_Parser, unsigned long long))
+{
+ assert(self->itself != NULL);
+ if (setter(self->itself, threshold) == XML_TRUE) {
+ Py_RETURN_NONE;
+ }
+ // The setter fails if self->itself is NULL (which is not possible here)
+ // or is a non-root parser, which currently only happens for parsers
+ // created by ExternalEntityParserCreate().
+ pyexpat_state *state = PyType_GetModuleState(cls);
+ return set_invalid_arg(state, self, "parser must be a root parser");
+}
+
+static PyObject *
+set_maximum_amplification(xmlparseobject *self,
+ PyTypeObject *cls,
+ float max_factor,
+ XML_Bool (*setter)(XML_Parser, float))
+{
+ assert(self->itself != NULL);
+ if (setter(self->itself, max_factor) == XML_TRUE) {
+ Py_RETURN_NONE;
+ }
+ // The setter fails if self->itself is NULL (which is not possible here),
+ // is a non-root parser, which currently only happens for parsers created
+ // by ExternalEntityParserCreate(), or if 'max_factor' is NaN or < 1.0.
+ pyexpat_state *state = PyType_GetModuleState(cls);
+ // Note: Expat has no API to determine whether a parser is a root parser,
+ // and since the Expat functions for defining the various maximum allowed
+ // amplifcation factors fail when a bad parser or an out-of-range factor
+ // is given without specifying which check failed, we check whether the
+ // factor is out-of-range to improve the error message. See also gh-90949.
+ const char *message = (isnan(max_factor) || max_factor < 1.0f)
+ ? "'max_factor' must be at least 1.0"
+ : "parser must be a root parser";
+ return set_invalid_arg(state, self, message);
+}
+#endif
+
+#if XML_COMBINED_VERSION >= 20702
/*[clinic input]
@permit_long_summary
@permit_long_docstring_body
@@ -1205,15 +1239,10 @@ pyexpat_xmlparser_SetAllocTrackerActivationThreshold_impl(xmlparseobject *self,
unsigned long long threshold)
/*[clinic end generated code: output=bed7e93207ba08c5 input=54182cd71ad69978]*/
{
- assert(self->itself != NULL);
- if (XML_SetAllocTrackerActivationThreshold(self->itself, threshold) == XML_TRUE) {
- Py_RETURN_NONE;
- }
- // XML_SetAllocTrackerActivationThreshold() can only fail if self->itself
- // is not a root parser (currently, this is equivalent to be created
- // by ExternalEntityParserCreate()).
- pyexpat_state *state = PyType_GetModuleState(cls);
- return set_invalid_arg(state, self, "parser must be a root parser");
+ return set_activation_threshold(
+ self, cls, threshold,
+ XML_SetAllocTrackerActivationThreshold
+ );
}
#endif
@@ -1248,24 +1277,10 @@ pyexpat_xmlparser_SetAllocTrackerMaximumAmplification_impl(xmlparseobject *self,
float max_factor)
/*[clinic end generated code: output=6e44bd48c9b112a0 input=3544abf9dd7ae055]*/
{
- assert(self->itself != NULL);
- if (XML_SetAllocTrackerMaximumAmplification(self->itself, max_factor) == XML_TRUE) {
- Py_RETURN_NONE;
- }
- // XML_SetAllocTrackerMaximumAmplification() can fail if self->itself
- // is not a root parser (currently, this is equivalent to be created
- // by ExternalEntityParserCreate()) or if 'max_factor' is NaN or < 1.0.
- //
- // Expat does not provide a way to determine whether a parser is a root
- // or not, nor does it provide a way to distinguish between failures in
- // XML_SetAllocTrackerMaximumAmplification() (see gh-90949), we manually
- // detect the factor out-of-range issue here so that users have a better
- // error message.
- pyexpat_state *state = PyType_GetModuleState(cls);
- const char *message = (isnan(max_factor) || max_factor < 1.0f)
- ? "'max_factor' must be at least 1.0"
- : "parser must be a root parser";
- return set_invalid_arg(state, self, message);
+ return set_maximum_amplification(
+ self, cls, max_factor,
+ XML_SetAllocTrackerMaximumAmplification
+ );
}
#endif