Inada Naoki <[email protected]> added the comment:
Some compilers produce inefficient code for PR-14752.
I wrote another patch which is friendly to more compilers.
$ perf record ./python -m pyperf timeit -s "import json; x = json.dumps({'k':
'1' * 2 ** 20})" "json.loads(x)"
# PR-14752
gcc-7 (Ubuntu 7.4.0-8ubuntu1) 7.4.0
Mean +- std dev: 1.11 ms +- 0.01 ms
```
│ scanstring_unicode():
│ c = PyUnicode_READ(kind, buf, next);
11.92 │270: movzbl (%r15,%r8,1),%eax
│ if (c == '"' || c == '\\') {
27.97 │ cmp $0x22,%eax
│ c = PyUnicode_READ(kind, buf, next);
29.22 │ mov %eax,0x34(%rsp)
│ if (c == '"' || c == '\\') {
0.46 │ ↑ je ef
0.02 │ cmp $0x5c,%eax
│ ↑ je ef
│ if (c <= 0x1f && invalid < 0) {
│ cmp $0x1f,%eax
0.00 │ ↓ ja 297
│ test %rdx,%rdx
│ cmovs %r8,%rdx
│ for (next = end; next < len; next++) {
29.49 │297: add $0x1,%r8
│ cmp %r8,%r12
0.92 │ ↑ jne 270
```
gcc-8 (Ubuntu 8.3.0-6ubuntu1) 8.3.0
Mean +- std dev: 712 us +- 1 us
```
│ c = PyUnicode_READ(kind, buf, next);
│188: movzbl 0x0(%rbp,%rbx,1),%eax
│ mov %eax,0x34(%rsp)
│ if (c == '"' || c == '\\') {
│ cmp $0x22,%eax
│ ↓ je 1d0
│ nop
0.00 │1a0: cmp $0x5c,%eax
│ ↓ je 1d0
│ if (c <= 0x1f && invalid < 0) {
│ cmp $0x1f,%eax
49.84 │ ↓ ja 1b1
│ test %rdx,%rdx
│ cmovs %rbx,%rdx
│ for (next = end; next < len; next++) {
│1b1: add $0x1,%rbx
0.00 │ cmp %rbx,%r15
│ ↑ je ff
│ c = PyUnicode_READ(kind, buf, next);
0.61 │ movzbl 0x0(%rbp,%rbx,1),%eax
49.53 │ mov %eax,0x34(%rsp)
│ if (c == '"' || c == '\\') {
0.01 │ cmp $0x22,%eax
│ ↑ jne 1a0
0.00 │ nop
```
clang version 7.0.1-8 (tags/RELEASE_701/final)
Mean +- std dev: 951 us +- 1 us
```
│ c = PyUnicode_READ(kind, buf, next);
9.76 │110: movzbl (%r12,%r13,1),%eax
9.47 │ mov %eax,0xc(%rsp)
8.85 │ cmp $0x22,%eax
│ if (c == '"' || c == '\\') {
│ ↓ je 170
8.78 │ cmp $0x5c,%al
│ ↓ je 170
│ if (c <= 0x1f && invalid < 0) {
9.16 │ cmp $0x20,%al
9.09 │ mov %rdx,%rcx
9.16 │ cmovb %r13,%rcx
9.00 │ test %rdx,%rdx
8.78 │ cmovs %rcx,%rdx
│ for (next = end; next < len; next++) {
9.09 │ add $0x1,%r13
│ cmp %r15,%r13
8.86 │ ↑ jl 110
│ ↓ jmp 170
│ nop
```
clang version 8.0.0-3 (tags/RELEASE_800/final)
Mean +- std dev: 953 us +- 0 us
```
│ c = PyUnicode_READ(kind, buf, next);
10.04 │100: movzbl (%r15,%r14,1),%eax
9.27 │ mov %eax,0x4(%rsp)
8.87 │ cmp $0x22,%eax
│ if (c == '"' || c == '\\') {
│ ↓ je 160
8.78 │ cmp $0x5c,%al
│ ↓ je 160
│ if (c <= 0x1f && invalid < 0) {
8.97 │ cmp $0x20,%al
8.97 │ mov %rdx,%rcx
8.89 │ cmovb %r14,%rcx
8.81 │ test %rdx,%rdx
9.14 │ cmovs %rcx,%rdx
│ for (next = end; next < len; next++) {
9.25 │ add $0x1,%r14
│ cmp %rdi,%r14
8.99 │ ↑ jl 100
│ ↓ jmp 160
│ nop
```
# modified
```
/* Find the end of the string or the next escape */
Py_UCS4 c;
{
Py_UCS4 d = 0;
for (next = end; next < len; next++) {
d = PyUnicode_READ(kind, buf, next);
if (d == '"' || d == '\\') {
break;
}
if (d <= 0x1f && strict) {
raise_errmsg("Invalid control character at", pystr, next);
goto bail;
}
}
c = d;
}
```
gcc-7 (Ubuntu 7.4.0-8ubuntu1) 7.4.0
Mean +- std dev: 708 us +- 1 us
```
│ for (next = end; next < len; next++) {
20.29 │170: add $0x1,%rbx
0.31 │ cmp %rbx,%r12
│ ↓ je 1b0
│ d = PyUnicode_READ(kind, buf, next);
44.48 │179: movzbl 0x0(%rbp,%rbx,1),%eax
│ if (d == '"' || d == '\\') {
5.38 │ cmp $0x22,%eax
│ ↓ je 2c0
23.82 │ cmp $0x5c,%eax
│ ↓ je 2c0
│ if (d <= 0x1f && strict) {
│ cmp $0x1f,%eax
5.68 │ ↑ ja 170
│ test %r13d,%r13d
│ ↑ jne ed
```
gcc-8 (Ubuntu 8.3.0-6ubuntu1) 8.3.0
Mean +- std dev: 708 us +- 1 us
```
│ for (next = end; next < len; next++) {
6.54 │170: add $0x1,%rbx
19.25 │ cmp %rbx,%r12
│ ↓ jle 341
│ d = PyUnicode_READ(kind, buf, next);
13.89 │17d: movzbl 0x0(%rbp,%rbx,1),%eax
│ if (d == '"' || d == '\\') {
34.26 │ cmp $0x22,%eax
│ ↓ je 1e8
6.88 │ cmp $0x5c,%eax
│ ↓ je 1e8
│ if (d <= 0x1f && strict) {
│ cmp $0x1f,%eax
19.17 │ ↑ ja 170
│ test %r14d,%r14d
│ ↑ je 170
│ ↑ jmpq ed
```
clang version 7.0.1-8 (tags/RELEASE_701/final)
Mean +- std dev: 722 us +- 10 us
```
│ d = PyUnicode_READ(kind, buf, next);
11.62 │ c0:┌─→movzbl (%r12,%r13,1),%eax
11.99 │ │ cmp $0x22,%eax
│ │ if (d == '"' || d == '\\') {
│ │↓ je 1f0
9.61 │ │ cmp $0x5c,%al
22.56 │ │↓ je 1f0
│ │ break;
│ │ }
│ │ if (d <= 0x1f && strict) {
8.94 │ │ cmp $0x20,%al
│ │↓ jb b4a
│ │ for (next = end; next < len; next++) {
12.53 │ │ add $0x1,%r13
│ ├──cmp %r15,%r13
22.72 │ └──jl c0
│ ↓ jmpq 1f0
```
clang version 8.0.0-3 (tags/RELEASE_800/final)
Mean +- std dev: 707 us +- 1 us
```
│ d = PyUnicode_READ(kind, buf, next);
0.01 │ b0: movzbl (%r12,%r13,1),%eax
23.84 │ cmp $0x22,%eax
│ if (d == '"' || d == '\\') {
0.00 │ ↓ je 1c0
0.01 │ cmp $0x5c,%al
│ ↓ je 1c0
│ break;
│ }
│ if (d <= 0x1f && strict) {
26.23 │ cmp $0x20,%al
│ ↓ jb b1e
│ for (next = end; next < len; next++) {
│ add $0x1,%r13
│ cmp %r15,%r13
49.91 │ ↑ jl b0
│ ↓ jmpq 1c0
```
----------
_______________________________________
Python tracker <[email protected]>
<https://bugs.python.org/issue37587>
_______________________________________
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com