This is an automated email from the ASF dual-hosted git repository.
pandalee pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push:
new 87ddcc83 perf(python): optimize bytes buffer creation (#2008)
87ddcc83 is described below
commit 87ddcc835d83002ebaab4c39b8a5ec18df8f96ca
Author: Shawn Yang <[email protected]>
AuthorDate: Sun Jan 19 10:22:45 2025 +0800
perf(python): optimize bytes buffer creation (#2008)
## What does this PR do?
This pr optimize bytes buffer creation by a fast path to get address of
`bytes` by `PyBytes_AsString` instead of using slow `memoryview`
## Related issues
<!--
Is there any related issue? Please attach here.
- #xxxx0
- #xxxx1
- #xxxx2
-->
## Does this PR introduce any user-facing change?
<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fury/issues/new/choose) describing the
need to do so and update the document if necessary.
-->
- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
## Benchmark
<!--
When the PR has an impact on performance (if you don't know whether the
PR will have an impact on performance, you can submit the PR first, and
if it will have impact on performance, the code reviewer will explain
it), be sure to attach a benchmark data here.
-->
---
.../java/org/apache/fury/CrossLanguageTest.java | 23 +------
.../org/apache/fury/format/CrossLanguageTest.java | 23 +------
.../main/java/org/apache/fury/test/TestUtils.java | 77 ++++++++++++++++++++++
.../main/java/org/apache/fury/test/bean/BeanA.java | 1 +
.../java/org/apache/fury/test/bean/TestUtils.java | 39 -----------
python/pyfury/_util.pyx | 11 ++--
python/pyfury/format/encoder.py | 2 +-
python/pyfury/tests/test_cross_language.py | 18 +++--
8 files changed, 102 insertions(+), 92 deletions(-)
diff --git
a/java/fury-core/src/test/java/org/apache/fury/CrossLanguageTest.java
b/java/fury-core/src/test/java/org/apache/fury/CrossLanguageTest.java
index 7ab2033a..be9a0918 100644
--- a/java/fury-core/src/test/java/org/apache/fury/CrossLanguageTest.java
+++ b/java/fury-core/src/test/java/org/apache/fury/CrossLanguageTest.java
@@ -48,7 +48,6 @@ import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
-import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
@@ -66,6 +65,7 @@ import org.apache.fury.serializer.BufferObject;
import org.apache.fury.serializer.EnumSerializerTest;
import org.apache.fury.serializer.Serializer;
import org.apache.fury.serializer.StructSerializer;
+import org.apache.fury.test.TestUtils;
import org.apache.fury.util.DateTimeUtils;
import org.apache.fury.util.MurmurHash3;
import org.testng.Assert;
@@ -84,29 +84,10 @@ public class CrossLanguageTest extends FuryTestBase {
* @return Whether the command succeeded.
*/
private boolean executeCommand(List<String> command, int waitTimeoutSeconds)
{
- return executeCommand(
+ return TestUtils.executeCommand(
command, waitTimeoutSeconds,
ImmutableMap.of("ENABLE_CROSS_LANGUAGE_TESTS", "true"));
}
- private boolean executeCommand(
- List<String> command, int waitTimeoutSeconds, Map<String, String> env) {
- try {
- LOG.info("Executing command: {}", String.join(" ", command));
- ProcessBuilder processBuilder =
- new ProcessBuilder(command)
- .redirectOutput(ProcessBuilder.Redirect.INHERIT)
- .redirectError(ProcessBuilder.Redirect.INHERIT);
- for (Map.Entry<String, String> entry : env.entrySet()) {
- processBuilder.environment().put(entry.getKey(), entry.getValue());
- }
- Process process = processBuilder.start();
- process.waitFor(waitTimeoutSeconds, TimeUnit.SECONDS);
- return process.exitValue() == 0;
- } catch (Exception e) {
- throw new RuntimeException("Error executing command " + String.join(" ",
command), e);
- }
- }
-
@Data
public static class A {
public Integer f1;
diff --git
a/java/fury-format/src/test/java/org/apache/fury/format/CrossLanguageTest.java
b/java/fury-format/src/test/java/org/apache/fury/format/CrossLanguageTest.java
index 20199c96..19d2950a 100644
---
a/java/fury-format/src/test/java/org/apache/fury/format/CrossLanguageTest.java
+++
b/java/fury-format/src/test/java/org/apache/fury/format/CrossLanguageTest.java
@@ -36,7 +36,6 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import lombok.Data;
import org.apache.arrow.memory.BufferAllocator;
@@ -68,6 +67,7 @@ import org.apache.fury.logging.LoggerFactory;
import org.apache.fury.memory.MemoryBuffer;
import org.apache.fury.memory.MemoryUtils;
import org.apache.fury.serializer.BufferObject;
+import org.apache.fury.test.TestUtils;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -339,29 +339,10 @@ public class CrossLanguageTest {
* @return Whether the command succeeded.
*/
private boolean executeCommand(List<String> command, int waitTimeoutSeconds)
{
- return executeCommand(
+ return TestUtils.executeCommand(
command, waitTimeoutSeconds,
ImmutableMap.of("ENABLE_CROSS_LANGUAGE_TESTS", "true"));
}
- private boolean executeCommand(
- List<String> command, int waitTimeoutSeconds, Map<String, String> env) {
- try {
- LOG.info("Executing command: {}", String.join(" ", command));
- ProcessBuilder processBuilder =
- new ProcessBuilder(command)
- .redirectOutput(ProcessBuilder.Redirect.INHERIT)
- .redirectError(ProcessBuilder.Redirect.INHERIT);
- for (Map.Entry<String, String> entry : env.entrySet()) {
- processBuilder.environment().put(entry.getKey(), entry.getValue());
- }
- Process process = processBuilder.start();
- process.waitFor(waitTimeoutSeconds, TimeUnit.SECONDS);
- return process.exitValue() == 0;
- } catch (Exception e) {
- throw new RuntimeException("Error executing command " + String.join(" ",
command), e);
- }
- }
-
@Test
public void testSerializeArrowInBand() throws Exception {
Fury fury =
diff --git
a/java/fury-test-core/src/main/java/org/apache/fury/test/TestUtils.java
b/java/fury-test-core/src/main/java/org/apache/fury/test/TestUtils.java
new file mode 100644
index 00000000..aa5120b0
--- /dev/null
+++ b/java/fury-test-core/src/main/java/org/apache/fury/test/TestUtils.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.fury.test;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+public class TestUtils {
+ public static String random(int size, int rand) {
+ return random(size, new Random(rand));
+ }
+
+ public static String random(int size, Random random) {
+ char[] chars = new char[size];
+ char start = ' ';
+ char end = 'z' + 1;
+ int gap = end - start;
+ for (int i = 0; i < size; i++) {
+ chars[i] = (char) (start + random.nextInt(gap));
+ }
+ return new String(chars);
+ }
+
+ public static boolean executeCommand(
+ List<String> command, int waitTimeoutSeconds, Map<String, String> env) {
+ try {
+ System.out.println("Executing command: " + String.join(" ", command));
+ // redirectOutput doesn't work for forked jvm such as in maven sure.
+ ProcessBuilder processBuilder = new ProcessBuilder(command);
+ for (Map.Entry<String, String> entry : env.entrySet()) {
+ processBuilder.environment().put(entry.getKey(), entry.getValue());
+ }
+ Process process = processBuilder.start();
+ // Capture output to log
+ BufferedReader reader = new BufferedReader(new
InputStreamReader(process.getInputStream()));
+ BufferedReader errorReader =
+ new BufferedReader(new InputStreamReader(process.getErrorStream()));
+ String line;
+ while ((line = reader.readLine()) != null) {
+ System.out.println(line);
+ }
+ while ((line = errorReader.readLine()) != null) {
+ System.err.println(line);
+ }
+ boolean finished = process.waitFor(waitTimeoutSeconds, TimeUnit.SECONDS);
+ if (finished) {
+ return process.exitValue() == 0;
+ } else {
+ process.destroy(); // ensure the process is terminated
+ return false;
+ }
+ } catch (Exception e) {
+ throw new RuntimeException("Error executing command " + String.join(" ",
command), e);
+ }
+ }
+}
diff --git
a/java/fury-test-core/src/main/java/org/apache/fury/test/bean/BeanA.java
b/java/fury-test-core/src/main/java/org/apache/fury/test/bean/BeanA.java
index ee656ddb..77fd593f 100644
--- a/java/fury-test-core/src/main/java/org/apache/fury/test/bean/BeanA.java
+++ b/java/fury-test-core/src/main/java/org/apache/fury/test/bean/BeanA.java
@@ -28,6 +28,7 @@ import java.util.List;
import java.util.Map;
import java.util.Random;
import lombok.Data;
+import org.apache.fury.test.TestUtils;
@Data
public class BeanA implements Serializable {
diff --git
a/java/fury-test-core/src/main/java/org/apache/fury/test/bean/TestUtils.java
b/java/fury-test-core/src/main/java/org/apache/fury/test/bean/TestUtils.java
deleted file mode 100644
index 2db4d348..00000000
--- a/java/fury-test-core/src/main/java/org/apache/fury/test/bean/TestUtils.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.fury.test.bean;
-
-import java.util.Random;
-
-public class TestUtils {
- public static String random(int size, int rand) {
- return random(size, new Random(rand));
- }
-
- public static String random(int size, Random random) {
- char[] chars = new char[size];
- char start = ' ';
- char end = 'z' + 1;
- int gap = end - start;
- for (int i = 0; i < size; i++) {
- chars[i] = (char) (start + random.nextInt(gap));
- }
- return new String(chars);
- }
-}
diff --git a/python/pyfury/_util.pyx b/python/pyfury/_util.pyx
index ca87d81e..3d0ac05f 100644
--- a/python/pyfury/_util.pyx
+++ b/python/pyfury/_util.pyx
@@ -36,15 +36,16 @@ cdef int UTF16_LE = -1
@cython.final
cdef class Buffer:
- def __init__(self, data not None, int offset=0, length=None):
+ def __init__(self, data not None, int32_t offset=0, length=None):
self.data = data
- assert 0 <= offset <= len(data), f'offset {offset} length {len(data)}'
+ cdef int32_t buffer_len = len(data)
cdef int length_
if length is None:
- length_ = len(data) - offset
+ length_ = buffer_len - offset
else:
length_ = length
- assert length_ >= 0, f'length should be >= 0 but got {length}'
+ if offset < 0 or offset + length_ > buffer_len:
+ raise ValueError(f'Wrong offset {offset} or length {length} for
buffer with size {buffer_len}')
if length_ > 0:
self._c_address = get_address(data) + offset
else:
@@ -659,6 +660,8 @@ cdef class Buffer:
cdef inline uint8_t* get_address(v):
+ if type(v) is bytes:
+ return <uint8_t*>(PyBytes_AsString(v))
view = memoryview(v)
cdef str dtype = view.format
cdef:
diff --git a/python/pyfury/format/encoder.py b/python/pyfury/format/encoder.py
index 66ade7ec..668113a0 100644
--- a/python/pyfury/format/encoder.py
+++ b/python/pyfury/format/encoder.py
@@ -46,7 +46,7 @@ class Encoder:
f"{self.schema_hash, peer_hash}. "
f"Please check writer schema."
)
- buf = pyfury.Buffer(binary, 8, len(binary))
+ buf = pyfury.Buffer(binary, 8, len(binary) - 8)
row = pyfury.RowData(self.schema, buf)
return self.row_encoder.from_row(row)
diff --git a/python/pyfury/tests/test_cross_language.py
b/python/pyfury/tests/test_cross_language.py
index 795b1438..c765e495 100644
--- a/python/pyfury/tests/test_cross_language.py
+++ b/python/pyfury/tests/test_cross_language.py
@@ -17,6 +17,7 @@
import array
import datetime
+import logging
import math
import os
import typing
@@ -611,9 +612,14 @@ def test_oob_buffer(in_band_file_path,
out_of_band_file_path):
if __name__ == "__main__":
import sys
- args = sys.argv[1:]
- assert len(args) > 0
- func = getattr(sys.modules[__name__], args[0])
- if not func:
- raise Exception("Unknown args {}".format(args))
- func(*args[1:])
+ print(f"Execute {sys.argv}")
+ try:
+ args = sys.argv[1:]
+ assert len(args) > 0
+ func = getattr(sys.modules[__name__], args[0])
+ if not func:
+ raise Exception("Unknown args {}".format(args))
+ func(*args[1:])
+ except BaseException as e:
+ logging.exception("Execute %s failed with %s", args, e)
+ raise
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]