This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push:
new 9295e58a perf(python): Pre-allocate size for the dictionary (#1949)
9295e58a is described below
commit 9295e58ae706a3734f6fa80704c41502ba73131e
Author: penguin_wwy <[email protected]>
AuthorDate: Thu Nov 21 00:34:34 2024 +0800
perf(python): Pre-allocate size for the dictionary (#1949)
## What does this PR do?
Pre-allocate memory for the dictionary based on the data size to avoid
resizing and improve deserialization performance.
## Related issues
## Does this PR introduce any user-facing change?
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
```
# python format
fury_large_dict: Mean +- std dev: [dict_base] 548 us +- 33 us ->
[dict_resize] 531 us +- 33 us: 1.03x faster
# xlang format
fury_large_dict: Mean +- std dev: [dict_xlang_base] 550 us +- 39 us ->
[dict_xlang_resize] 527 us +- 35 us: 1.05x faster
```
---
integration_tests/cpython_benchmark/fury_benchmark.py | 4 ++++
python/pyfury/_serialization.pyx | 7 ++++---
2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/integration_tests/cpython_benchmark/fury_benchmark.py
b/integration_tests/cpython_benchmark/fury_benchmark.py
index 8b493aec..01be1d49 100644
--- a/integration_tests/cpython_benchmark/fury_benchmark.py
+++ b/integration_tests/cpython_benchmark/fury_benchmark.py
@@ -64,6 +64,7 @@ DICT = {
"view_count": 7,
"zip": "",
}
+LARGE_DICT = {str(i): i for i in range(2**10 + 1)}
TUPLE = (
[
@@ -177,6 +178,9 @@ def micro_benchmark():
runner.parse_args()
language = pyfury.Language.XLANG if args.xlang else pyfury.Language.PYTHON
runner.bench_func("fury_dict", fury_object, language, not args.no_ref,
DICT)
+ runner.bench_func(
+ "fury_large_dict", fury_object, language, not args.no_ref, LARGE_DICT
+ )
runner.bench_func(
"fury_dict_group", fury_object, language, not args.no_ref, DICT_GROUP
)
diff --git a/python/pyfury/_serialization.pyx b/python/pyfury/_serialization.pyx
index 0175f615..ce1443c6 100644
--- a/python/pyfury/_serialization.pyx
+++ b/python/pyfury/_serialization.pyx
@@ -76,6 +76,7 @@ cdef extern from *:
"""
object int2obj(int64_t obj_addr)
int64_t obj2int(object obj_ref)
+ dict _PyDict_NewPresized(Py_ssize_t minused)
cdef int8_t NULL_FLAG = -3
@@ -2081,9 +2082,9 @@ cdef class MapSerializer(Serializer):
cpdef inline read(self, Buffer buffer):
cdef MapRefResolver ref_resolver = self.ref_resolver
cdef ClassResolver class_resolver = self.class_resolver
- cdef dict map_ = {}
- ref_resolver.reference(map_)
cdef int32_t len_ = buffer.read_varint32()
+ cdef dict map_ = _PyDict_NewPresized(len_)
+ ref_resolver.reference(map_)
cdef int32_t ref_id
cdef ClassInfo key_classinfo
cdef ClassInfo value_classinfo
@@ -2131,7 +2132,7 @@ cdef class MapSerializer(Serializer):
cpdef inline xread(self, Buffer buffer):
cdef int32_t len_ = buffer.read_varint32()
- cdef dict map_ = {}
+ cdef dict map_ = _PyDict_NewPresized(len_)
self.fury.ref_resolver.reference(map_)
for i in range(len_):
k = self.fury.xdeserialize_ref(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]