This is an automated email from the ASF dual-hosted git repository.

chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git


The following commit(s) were added to refs/heads/main by this push:
     new 8d2d1240 feat(python): Hardcoding metastring into passable parameters 
(#1987)
8d2d1240 is described below

commit 8d2d1240825cbfa32548ecb8afc978cea533ec23
Author: PAN <[email protected]>
AuthorDate: Mon Dec 23 10:29:46 2024 +0800

    feat(python): Hardcoding metastring into passable parameters (#1987)
    
    <!--
    **Thanks for contributing to Fury.**
    
    **If this is your first time opening a PR on fury, you can refer to
    
[CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).**
    
    Contribution Checklist
    
    - The **Apache Fury (incubating)** community has restrictions on the
    naming of pr titles. You can also find instructions in
    [CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).
    
    - Fury has a strong focus on performance. If the PR you submit will have
    an impact on performance, please benchmark it first and provide the
    benchmark result here.
    -->
    
    ## What does this PR do?
    In the original MetaString, MetaStringEncoder used hard coding directly
    to solve the special char1/2 situation, but this was not the best
    choice. So it's passable, allowing MetaString to select the special char
    it passes.
    
    <!-- Describe the purpose of this PR. -->
    
    ## Related issues
    Close #1983
    <!--
    Is there any related issue? Please attach here.
    
    - #xxxx0
    - #xxxx1
    - #xxxx2
    -->
    
    ## Does this PR introduce any user-facing change?
    
    <!--
    If any user-facing interface changes, please [open an
    issue](https://github.com/apache/fury/issues/new/choose) describing the
    need to do so and update the document if necessary.
    -->
    
    - [ ] Does this PR introduce any public API change?
    - [ ] Does this PR introduce any binary protocol compatibility change?
    
    ## Benchmark
    
    <!--
    When the PR has an impact on performance (if you don't know whether the
    PR will have an impact on performance, you can submit the PR first, and
    if it will have impact on performance, the code reviewer will explain
    it), be sure to attach a benchmark data here.
    -->
---
 python/pyfury/meta/metastring.py       | 108 ++++++++++++++++++++++++++++-----
 python/pyfury/tests/test_metastring.py |  51 +++++++++-------
 2 files changed, 122 insertions(+), 37 deletions(-)

diff --git a/python/pyfury/meta/metastring.py b/python/pyfury/meta/metastring.py
index 63232b56..4ff06510 100644
--- a/python/pyfury/meta/metastring.py
+++ b/python/pyfury/meta/metastring.py
@@ -48,12 +48,20 @@ _METASTRING_NUM_CHARS_LIMIT = 32767
 
 class MetaString:
     def __init__(
-        self, original: str, encoding: Encoding, encoded_data: bytes, length: 
int
+        self,
+        original: str,
+        encoding: Encoding,
+        encoded_data: bytes,
+        length: int,
+        special_char1: str = ".",
+        special_char2: str = "|",
     ):
         self.original = original
         self.encoding = encoding
         self.encoded_data = encoded_data
         self.length = length
+        self.special_char1 = special_char1
+        self.special_char2 = special_char2
         if self.encoding != Encoding.UTF_8:
             self.strip_last_char = (encoded_data[0] & 0x80) != 0
         else:
@@ -65,6 +73,17 @@ class MetaStringDecoder:
     Decodes MetaString objects back into their original plain text form.
     """
 
+    def __init__(self, special_char1: str, special_char2: str):
+        """
+        Creates a MetaStringDecoder with specified special characters used for 
decoding.
+
+        Args:
+            special_char1 (str): The first special character used for encoding.
+            special_char2 (str): The second special character used for 
encoding.
+        """
+        self.special_char1 = special_char1
+        self.special_char2 = special_char2
+
     def decode(self, encoded_data: bytes, encoding: Encoding) -> str:
         """
         Decodes the encoded data using the specified encoding.
@@ -203,9 +222,9 @@ class MetaStringDecoder:
         elif 52 <= char_value <= 61:
             return chr(ord("0") + (char_value - 52))
         elif char_value == 62:
-            return "."
+            return self.special_char1  # Use special_char1 for the encoding
         elif char_value == 63:
-            return "_"
+            return self.special_char2  # Use special_char2 for the encoding
         else:
             raise ValueError(
                 f"Invalid character value for LOWER_UPPER_DIGIT_SPECIAL: 
{char_value}"
@@ -250,9 +269,16 @@ class MetaStringDecoder:
 
 
 class MetaStringEncoder:
-    """
-    Encodes plain text strings into MetaString objects with specified encoding 
mechanisms.
-    """
+    def __init__(self, special_char1: str, special_char2: str):
+        """
+        Creates a MetaStringEncoder with specified special characters used for 
encoding.
+
+        Args:
+            special_char1 (str): The first special character used in custom 
encoding.
+            special_char2 (str): The second special character used in custom 
encoding.
+        """
+        self.special_char1 = special_char1
+        self.special_char2 = special_char2
 
     def encode(self, input_string: str) -> MetaString:
         """
@@ -270,7 +296,14 @@ class MetaStringEncoder:
         ), "Long meta string than _METASTRING_NUM_CHARS_LIMIT is not allowed."
 
         if not input_string:
-            return MetaString(input_string, Encoding.UTF_8, bytes(), 0)
+            return MetaString(
+                input_string,
+                Encoding.UTF_8,
+                bytes(),
+                0,
+                self.special_char1,
+                self.special_char2,
+            )
 
         encoding = self.compute_encoding(input_string)
         return self.encode_with_encoding(input_string, encoding)
@@ -292,29 +325,67 @@ class MetaStringEncoder:
         ), "Long meta string than _METASTRING_NUM_CHARS_LIMIT is not allowed."
 
         if not input_string:
-            return MetaString(input_string, Encoding.UTF_8, bytes(), 0)
+            return MetaString(
+                input_string,
+                Encoding.UTF_8,
+                bytes(),
+                0,
+                self.special_char1,
+                self.special_char2,
+            )
 
         length = len(input_string)
         if encoding == Encoding.LOWER_SPECIAL:
             encoded_data = self._encode_lower_special(input_string)
-            return MetaString(input_string, encoding, encoded_data, length * 5)
+            return MetaString(
+                input_string,
+                encoding,
+                encoded_data,
+                length * 5,
+                self.special_char1,
+                self.special_char2,
+            )
         elif encoding == Encoding.LOWER_UPPER_DIGIT_SPECIAL:
             encoded_data = self._encode_lower_upper_digit_special(input_string)
-            return MetaString(input_string, encoding, encoded_data, length * 6)
+            return MetaString(
+                input_string,
+                encoding,
+                encoded_data,
+                length * 6,
+                self.special_char1,
+                self.special_char2,
+            )
         elif encoding == Encoding.FIRST_TO_LOWER_SPECIAL:
             encoded_data = self._encode_first_to_lower_special(input_string)
-            return MetaString(input_string, encoding, encoded_data, length * 5)
+            return MetaString(
+                input_string,
+                encoding,
+                encoded_data,
+                length * 5,
+                self.special_char1,
+                self.special_char2,
+            )
         elif encoding == Encoding.ALL_TO_LOWER_SPECIAL:
             chars = list(input_string)
             upper_count = sum(1 for c in chars if c.isupper())
             encoded_data = self._encode_all_to_lower_special(chars)
             return MetaString(
-                input_string, encoding, encoded_data, (upper_count + length) * 
5
+                input_string,
+                encoding,
+                encoded_data,
+                (upper_count + length) * 5,
+                self.special_char1,
+                self.special_char2,
             )
         else:
             encoded_data = bytes(input_string, "utf-8")
             return MetaString(
-                input_string, Encoding.UTF_8, encoded_data, len(encoded_data) 
* 8
+                input_string,
+                Encoding.UTF_8,
+                encoded_data,
+                len(encoded_data) * 8,
+                self.special_char1,
+                self.special_char2,
             )
 
     def compute_encoding(self, input_string: str) -> Encoding:
@@ -363,7 +434,12 @@ class MetaStringEncoder:
         upper_count = 0
         for c in chars:
             if can_lower_upper_digit_special_encoded:
-                if not (c.islower() or c.isupper() or c.isdigit() or c in 
{".", "_"}):
+                if not (
+                    c.islower()
+                    or c.isupper()
+                    or c.isdigit()
+                    or c in {self.special_char1, self.special_char2}
+                ):
                     can_lower_upper_digit_special_encoded = False
             if can_lower_special_encoded:
                 if not (c.islower() or c in {".", "_", "$", "|"}):
@@ -500,9 +576,9 @@ class MetaStringEncoder:
                 return 26 + (ord(c) - ord("A"))
             elif "0" <= c <= "9":
                 return 52 + (ord(c) - ord("0"))
-            elif c == ".":
+            elif c == self.special_char1:
                 return 62
-            elif c == "_":
+            elif c == self.special_char2:
                 return 63
             else:
                 raise ValueError(
diff --git a/python/pyfury/tests/test_metastring.py 
b/python/pyfury/tests/test_metastring.py
index 7dd98ff7..95596edf 100644
--- a/python/pyfury/tests/test_metastring.py
+++ b/python/pyfury/tests/test_metastring.py
@@ -24,8 +24,10 @@ from pyfury.meta.metastring import (
 
 
 def test_encode_metastring_lower_special():
-    encoder = MetaStringEncoder()
-    decoder = MetaStringDecoder()
+    encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+    decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
+    # Test for encoding and decoding
     encoded = encoder._encode_lower_special("abc_def")
     assert len(encoded) == 5
     assert len(encoder.encode("org.apache.fury.benchmark.data").encoded_data) 
== 19
@@ -41,10 +43,12 @@ def test_encode_metastring_lower_special():
 
 
 def test_encode_metastring_lower_upper_digit_special():
-    encoder = MetaStringEncoder()
+    encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+    decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
+    # Test for encoding and decoding
     encoded = encoder._encode_lower_upper_digit_special("ExampleInput123")
     assert len(encoded) == 12
-    decoder = MetaStringDecoder()
     decoded = decoder.decode(encoded, Encoding.LOWER_UPPER_DIGIT_SPECIAL)
     assert decoded == "ExampleInput123"
 
@@ -73,8 +77,9 @@ def create_string(length):
 
 
 def test_metastring():
+    encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+    decoder = MetaStringDecoder(special_char1=".", special_char2="_")
 
-    encoder = MetaStringEncoder()
     for i in range(1, 128):
         try:
             string = create_string(i)
@@ -82,7 +87,6 @@ def test_metastring():
             assert metastring.encoding != Encoding.UTF_8
             assert metastring.original == string
 
-            decoder = MetaStringDecoder()
             new_string = decoder.decode(metastring.encoded_data, 
metastring.encoding)
             assert new_string == string
         except Exception as e:
@@ -90,8 +94,9 @@ def test_metastring():
 
 
 def test_encode_empty_string():
-    encoder = MetaStringEncoder()
-    decoder = MetaStringDecoder()
+    encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+    decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
     for encoding in [
         Encoding.LOWER_SPECIAL,
         Encoding.LOWER_UPPER_DIGIT_SPECIAL,
@@ -106,7 +111,7 @@ def test_encode_empty_string():
 
 
 def test_encode_characters_outside_of_lower_special():
-    encoder = MetaStringEncoder()
+    encoder = MetaStringEncoder(special_char1=".", special_char2="_")
 
     test_string = "abcdefABCDEF1234!@#"
     metastring = encoder.encode(test_string)
@@ -114,8 +119,9 @@ def test_encode_characters_outside_of_lower_special():
 
 
 def test_all_to_upper_special_encoding():
-    encoder = MetaStringEncoder()
-    decoder = MetaStringDecoder()
+    encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+    decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
     test_string = "ABC_DEF"
     metastring = encoder.encode(test_string)
     assert metastring.encoding == Encoding.LOWER_UPPER_DIGIT_SPECIAL
@@ -124,8 +130,9 @@ def test_all_to_upper_special_encoding():
 
 
 def test_first_to_lower_special_encoding():
-    encoder = MetaStringEncoder()
-    decoder = MetaStringDecoder()
+    encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+    decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
     test_string = "Aabcdef"
     metastring = encoder.encode(test_string)
     assert metastring.encoding == Encoding.FIRST_TO_LOWER_SPECIAL
@@ -134,8 +141,9 @@ def test_first_to_lower_special_encoding():
 
 
 def test_utf8_encoding():
-    encoder = MetaStringEncoder()
-    decoder = MetaStringDecoder()
+    encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+    decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
     test_string = "你好,世界"  # Non-Latin characters
     metastring = encoder.encode(test_string)
     assert metastring.encoding == Encoding.UTF_8
@@ -144,7 +152,7 @@ def test_utf8_encoding():
 
 
 def test_strip_last_char():
-    encoder = MetaStringEncoder()
+    encoder = MetaStringEncoder(special_char1=".", special_char2="_")
 
     test_string = "abc"  # encoded as 1|00000|00, 001|00010, exactly two bytes
     encoded_metastring = encoder.encode(test_string)
@@ -156,8 +164,9 @@ def test_strip_last_char():
 
 
 def test_empty_string():
-    encoder = MetaStringEncoder()
-    decoder = MetaStringDecoder()
+    encoder = MetaStringEncoder(special_char1=".", special_char2="_")
+    decoder = MetaStringDecoder(special_char1=".", special_char2="_")
+
     metastring = encoder.encode("")
     assert metastring.encoded_data == bytes()
 
@@ -166,7 +175,7 @@ def test_empty_string():
 
 
 def test_ascii_encoding():
-    encoder = MetaStringEncoder()
+    encoder = MetaStringEncoder(special_char1=".", special_char2="_")
 
     test_string = "asciiOnly"
     encoded_metastring = encoder.encode(test_string)
@@ -175,7 +184,7 @@ def test_ascii_encoding():
 
 
 def test_non_ascii_encoding():
-    encoder = MetaStringEncoder()
+    encoder = MetaStringEncoder(special_char1=".", special_char2="_")
 
     test_string = "こんにちは"  # Non-ASCII string
     encoded_metastring = encoder.encode(test_string)
@@ -183,7 +192,7 @@ def test_non_ascii_encoding():
 
 
 def test_non_ascii_encoding_and_non_utf8():
-    encoder = MetaStringEncoder()
+    encoder = MetaStringEncoder(special_char1=".", special_char2="_")
 
     non_ascii_string = "こんにちは"  # Non-ASCII string
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to