https://github.com/python/cpython/commit/5150795b1cb6393e2b34834b2729d9176315054e
commit: 5150795b1cb6393e2b34834b2729d9176315054e
branch: main
author: Victor Stinner <[email protected]>
committer: vstinner <[email protected]>
date: 2024-06-20T19:06:16Z
summary:

gh-119182: Optimize PyUnicode_FromFormat() (#120796)

Use strchr() and ucs1lib_find_max_char() to optimize the code path
formatting sub-strings between '%' formats.

files:
M Objects/unicodeobject.c

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 1f8c89dd12a528..e6feed47fbb2bf 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2875,47 +2875,47 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
 static int
 unicode_from_format(_PyUnicodeWriter *writer, const char *format, va_list 
vargs)
 {
-    writer->min_length += strlen(format) + 100;
+    Py_ssize_t len = strlen(format);
+    writer->min_length += len + 100;
     writer->overallocate = 1;
 
-    va_list vargs2;
-    const char *f;
-
     // Copy varags to be able to pass a reference to a subfunction.
+    va_list vargs2;
     va_copy(vargs2, vargs);
 
-    for (f = format; *f; ) {
+    // _PyUnicodeWriter_WriteASCIIString() below requires the format string
+    // to be encoded to ASCII.
+    int is_ascii = (ucs1lib_find_max_char((Py_UCS1*)format, (Py_UCS1*)format + 
len) < 128);
+    if (!is_ascii) {
+        Py_ssize_t i;
+        for (i=0; i < len && (unsigned char)format[i] <= 127; i++);
+        PyErr_Format(PyExc_ValueError,
+            "PyUnicode_FromFormatV() expects an ASCII-encoded format "
+            "string, got a non-ASCII byte: 0x%02x",
+            (unsigned char)format[i]);
+        goto fail;
+    }
+
+    for (const char *f = format; *f; ) {
         if (*f == '%') {
             f = unicode_fromformat_arg(writer, f, &vargs2);
             if (f == NULL)
                 goto fail;
         }
         else {
-            const char *p;
-            Py_ssize_t len;
-
-            p = f;
-            do
-            {
-                if ((unsigned char)*p > 127) {
-                    PyErr_Format(PyExc_ValueError,
-                        "PyUnicode_FromFormatV() expects an ASCII-encoded 
format "
-                        "string, got a non-ASCII byte: 0x%02x",
-                        (unsigned char)*p);
-                    goto fail;
-                }
-                p++;
+            const char *p = strchr(f, '%');
+            if (p != NULL) {
+                len = p - f;
             }
-            while (*p != '\0' && *p != '%');
-            len = p - f;
-
-            if (*p == '\0')
+            else {
+                len = strlen(f);
                 writer->overallocate = 0;
+            }
 
-            if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0)
+            if (_PyUnicodeWriter_WriteASCIIString(writer, f, len) < 0) {
                 goto fail;
-
-            f = p;
+            }
+            f += len;
         }
     }
     va_end(vargs2);

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-checkins.python.org/
Member address: [email protected]

Reply via email to