subhashb commented on code in PR #1181:
URL: https://github.com/apache/avro/pull/1181#discussion_r971159902
##########
lang/py/avro/schema.py:
##########
@@ -169,7 +170,94 @@ def canonical_properties(self) -> Mapping[str, object]:
return collections.OrderedDict((key, props[key]) for key in
CANONICAL_FIELD_ORDER if key in props)
-class Schema(abc.ABC, CanonicalPropertiesMixin):
+class FingerprintMixin:
+ """
+ A Mixin to generate schema fingerprints for supported algorithms
+ """
+
+ _FP_TABLE = None
+ _EMPTY64 = 0xC15D213AA4D7A795
+
+ # All algorithms guaranteed by hashlib are supported
+ # Supported Algorithms are:
+ # - 'blake2b',
+ # - 'blake2s',
+ # - 'md5',
+ # - 'sha1',
+ # - 'sha224',
+ # - 'sha256',
+ # - 'sha384',
+ # - 'sha3_224',
+ # - 'sha3_256',
+ # - 'sha3_384',
+ # - 'sha3_512',
+ # - 'sha512',
+ # - 'shake_128',
+ # - 'shake_256'
+ algorithms = set(hashlib.algorithms_guaranteed)
+ # Additionally, we provide a custom implementation of 64-bit Rabin
fingerprint
+ algorithms.update({"CRC-64-AVRO"})
+ SUPPORTED_ALGORITHMS: FrozenSet[str] = frozenset(algorithms)
+
+ def fingerprint(self, algorithm="CRC-64-AVRO"):
+ """
+ Generate fingerprint for supplied algorithm.
+
+ 'CRC-64-AVRO' will be used as the algorithm by default, but any
+ algorithm supported by hashlib (as can be referenced with
+ `hashlib.algorithms_guaranteed`) can be specified.
+
+ `algorithm` param is used as an algorithm name, and
NoSuchAlgorithmException
+ will be thrown if the algorithm is not among supported.
+ """
+ schema = self.canonical_form.encode("utf-8")
Review Comment:
Yes, it makes sense to fold Fingerprint capabilities into Schema itself.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]