martin-g commented on code in PR #3528:
URL: https://github.com/apache/avro/pull/3528#discussion_r2493240275
##########
lang/php/lib/DataFile/AvroDataIOReader.php:
##########
@@ -74,16 +64,12 @@ class AvroDataIOReader
* is not supported
* @uses readHeader()
*/
- public function __construct($io, $datum_reader)
- {
-
- if (!($io instanceof AvroIO)) {
- throw new AvroDataIOException('io must be instance of AvroIO');
- }
-
+ public function __construct(
+ AvroIO $io,
+ private AvroIODatumReader $datum_reader
Review Comment:
Why `AvroIO $io` is assigned manually with `$this->io = $io` but
`$datum_reader` is not ?
##########
lang/php/lib/DataFile/AvroDataIO.php:
##########
@@ -99,7 +99,7 @@ class AvroDataIO
*/
public static function magicSize()
{
- return strlen(self::magic());
+ return strlen((string) self::magic());
Review Comment:
Why the cast here ?
`magic()` return a `string`, no ?
##########
lang/php/lib/Datum/AvroIOBinaryDecoder.php:
##########
@@ -164,120 +156,115 @@ public function readDouble()
* XXX: This is <b>not</b> endian-aware! See comments in
* {@link AvroIOBinaryEncoder::floatToIntBits()} for details.
*
- * @param string $bits
- * @returns float
*/
- public static function longBitsToDouble($bits)
+ public static function longBitsToDouble(string $bits)
{
$double = unpack('e', $bits);
- return (double) $double[1];
+ return (float) $double[1];
Review Comment:
The name of the function still says `ToDouble`. Why now it casts to `float` ?
##########
lang/php/lib/Schema/AvroSchema.php:
##########
@@ -379,81 +380,80 @@ public static function realParse($avro,
$default_namespace = null, &$schemata =
$new_name = new AvroName($name, $namespace,
$default_namespace);
$doc = $avro[self::DOC_ATTR] ?? null;
$aliases = $avro[self::ALIASES_ATTR] ?? null;
+ AvroNamedSchema::hasValidAliases($aliases);
switch ($type) {
case self::FIXED_SCHEMA:
- $size = $avro[self::SIZE_ATTR] ?? null;
+ $size = $avro[self::SIZE_ATTR] ?? throw new
AvroSchemaParseException(
+ "Size is required for fixed schema"
+ );
+ $size = (int) $size;
+
if (array_key_exists(self::LOGICAL_TYPE_ATTR, $avro)) {
switch ($avro[self::LOGICAL_TYPE_ATTR]) {
case self::DURATION_LOGICAL_TYPE:
return AvroFixedSchema::duration(
- $new_name,
- $doc,
Review Comment:
Why `$doc` is no more used for FixedSchema ?
##########
lang/php/lib/Datum/AvroIOBinaryEncoder.php:
##########
@@ -172,30 +155,26 @@ public static function doubleToLongBits($double): string
}
/**
- * @param string $str
* @uses self::writeBytes()
*/
- public function writeString($str): void
+ public function writeString(string $str): void
{
$this->writeBytes($str);
}
- /**
- * @param string $bytes
- */
- public function writeBytes($bytes): void
+ public function writeBytes(string $bytes): void
{
$this->writeLong(strlen($bytes));
$this->write($bytes);
}
- public function writeDecimal($decimal, int $scale, int $precision): void
+ public function writeDecimal(string $decimal, int $scale, int $precision):
void
{
if (!is_numeric($decimal)) {
throw new AvroException("Decimal value '{$decimal}' must be
numeric");
}
- $value = $decimal * (10 ** $scale);
+ $value = ((float) $decimal) * (10 ** $scale);
Review Comment:
What is the reason to cast to float here ? Doesn't this lose precision ?!
##########
lang/php/lib/Protocol/AvroProtocol.php:
##########
@@ -18,50 +18,78 @@
* limitations under the License.
*/
+declare(strict_types=1);
+
namespace Apache\Avro\Protocol;
use Apache\Avro\Schema\AvroNamedSchemata;
use Apache\Avro\Schema\AvroSchema;
+use Apache\Avro\Schema\AvroSchemaParseException;
/**
* Avro library for protocols
* @package Avro
*/
class AvroProtocol
{
- public $protocol;
- public $name;
- public $namespace;
- public $schemata;
- public $messages;
+ public function __construct(
+ public readonly string $protocol,
+ public readonly string $name,
+ public readonly string $namespace,
+ public readonly AvroNamedSchemata $schemata,
+ /** @var array<int, AvroProtocolMessage> */
+ public readonly array $messages,
+ ) {
+ }
- public static function parse($json)
+ /**
+ * @throws AvroProtocolParseException
+ * @throws AvroSchemaParseException
+ */
+ public static function parse(string $json): self
{
- if (is_null($json)) {
- throw new AvroProtocolParseException("Protocol can't be null");
+ try {
+ return self::realParse(
+ json_decode($json, associative: true, flags:
JSON_THROW_ON_ERROR)
+ );
+ } catch (\JsonException $e) {
+ throw new AvroProtocolParseException(
+ "Protocol json schema is invalid: ".$e->getMessage(),
+ previous: $e
+ );
}
-
- $protocol = new AvroProtocol();
- $protocol->realParse(json_decode($json, true));
- return $protocol;
}
- public function realParse($avro)
+ /**
+ * @param array $avro AVRO protocol as associative array
+ * @throws AvroSchemaParseException
+ */
+ public static function realParse(array $avro): self
{
- $this->protocol = $avro["protocol"];
- $this->namespace = $avro["namespace"];
- $this->schemata = new AvroNamedSchemata();
- $this->name = $avro["protocol"];
+ $schemata = new AvroNamedSchemata();
if (!is_null($avro["types"])) {
- $types = AvroSchema::realParse($avro["types"], $this->namespace,
$this->schemata);
+ AvroSchema::realParse($avro["types"], $avro["namespace"],
$schemata);
}
+ $messages = [];
if (!is_null($avro["messages"])) {
Review Comment:
```suggestion
if (isset($avro["messages"])) {
```
##########
lang/php/lib/Datum/AvroIOBinaryEncoder.php:
##########
@@ -172,30 +155,26 @@ public static function doubleToLongBits($double): string
}
/**
- * @param string $str
* @uses self::writeBytes()
*/
- public function writeString($str): void
+ public function writeString(string $str): void
{
$this->writeBytes($str);
}
- /**
- * @param string $bytes
- */
- public function writeBytes($bytes): void
+ public function writeBytes(string $bytes): void
{
$this->writeLong(strlen($bytes));
$this->write($bytes);
}
- public function writeDecimal($decimal, int $scale, int $precision): void
+ public function writeDecimal(string $decimal, int $scale, int $precision):
void
{
if (!is_numeric($decimal)) {
Review Comment:
The new type of `$decimal` is `string`. Is `is_numeric(string)` parsing it
first ?!
##########
lang/php/lib/Protocol/AvroProtocol.php:
##########
@@ -18,50 +18,78 @@
* limitations under the License.
*/
+declare(strict_types=1);
+
namespace Apache\Avro\Protocol;
use Apache\Avro\Schema\AvroNamedSchemata;
use Apache\Avro\Schema\AvroSchema;
+use Apache\Avro\Schema\AvroSchemaParseException;
/**
* Avro library for protocols
* @package Avro
*/
class AvroProtocol
{
- public $protocol;
- public $name;
- public $namespace;
- public $schemata;
- public $messages;
+ public function __construct(
+ public readonly string $protocol,
+ public readonly string $name,
+ public readonly string $namespace,
+ public readonly AvroNamedSchemata $schemata,
+ /** @var array<int, AvroProtocolMessage> */
+ public readonly array $messages,
+ ) {
+ }
- public static function parse($json)
+ /**
+ * @throws AvroProtocolParseException
+ * @throws AvroSchemaParseException
+ */
+ public static function parse(string $json): self
{
- if (is_null($json)) {
- throw new AvroProtocolParseException("Protocol can't be null");
+ try {
+ return self::realParse(
+ json_decode($json, associative: true, flags:
JSON_THROW_ON_ERROR)
+ );
+ } catch (\JsonException $e) {
+ throw new AvroProtocolParseException(
+ "Protocol json schema is invalid: ".$e->getMessage(),
+ previous: $e
+ );
}
-
- $protocol = new AvroProtocol();
- $protocol->realParse(json_decode($json, true));
- return $protocol;
}
- public function realParse($avro)
+ /**
+ * @param array $avro AVRO protocol as associative array
+ * @throws AvroSchemaParseException
+ */
+ public static function realParse(array $avro): self
{
- $this->protocol = $avro["protocol"];
- $this->namespace = $avro["namespace"];
- $this->schemata = new AvroNamedSchemata();
- $this->name = $avro["protocol"];
+ $schemata = new AvroNamedSchemata();
if (!is_null($avro["types"])) {
Review Comment:
```suggestion
if (isset($avro["types"])) {
```
Isn't this better ? To prevent undefined array key warnings.
##########
lang/php/lib/Schema/AvroNamedSchemata.php:
##########
@@ -28,17 +28,13 @@
*/
class AvroNamedSchemata
{
- /**
- * @var AvroNamedSchema[]
- */
- private $schemata;
-
- /**
- * @param AvroNamedSchemata[]
- */
- public function __construct($schemata = array())
+ public function __construct(
+ /**
+ * @var AvroNamedSchema[]
+ */
+ private $schemata = []
Review Comment:
```suggestion
private array $schemata = []
```
##########
lang/php/lib/Datum/AvroIODatumReader.php:
##########
@@ -219,28 +214,37 @@ public static function schemasMatch(AvroSchema
$writers_schema, AvroSchema $read
* @param AvroSchema $schema_two
* @param string[] $attribute_names array of string attribute names to
compare
*
- * @return boolean true if the attributes match and false otherwise.
+ * @return bool true if the attributes match and false otherwise.
+ * @throws AvroSchemaParseException
*/
- public static function attributesMatch($schema_one, $schema_two,
$attribute_names)
- {
+ public static function attributesMatch(
+ AvroSchema $schema_one,
+ AvroSchema $schema_two,
+ array $attribute_names
+ ): bool {
foreach ($attribute_names as $attribute_name) {
- if ($schema_one->attribute($attribute_name) !==
$schema_two->attribute($attribute_name)) {
- if ($attribute_name === AvroSchema::FULLNAME_ATTR) {
- foreach ($schema_two->getAliases() as $alias) {
- if (
- $schema_one->attribute($attribute_name) === (new
AvroName(
- $alias,
-
$schema_two->attribute(AvroSchema::NAMESPACE_ATTR),
- null
- ))->fullname()
- ) {
- return true;
- }
+ if ($schema_one->attribute($attribute_name) ===
$schema_two->attribute($attribute_name)) {
+ continue;
+ }
+
+ if (AvroSchema::FULLNAME_ATTR === $attribute_name) {
+ if (!$schema_two instanceof AvroAliasedSchema) {
+ return false;
+ }
+ foreach ($schema_two->getAliases() as $alias) {
+ if (
+ $schema_one->attribute($attribute_name) !== (new
AvroName(
+ $alias,
+ $schema_two->attribute(AvroSchema::NAMESPACE_ATTR),
+ null
+ ))->fullname()
+ ) {
+ return false;
Review Comment:
This looks wrong.
The new logic iterates over the aliases in the second schema and returns
false if an alias does not match. It is expected that only one alias could
match, not all of them! You need to return true if an alias match, otherwise -
continue with the next alias and return false if all aliases do not match.
##########
.github/workflows/test-lang-php.yml:
##########
@@ -71,8 +68,12 @@ jobs:
${{ runner.os }}-composer-
- name: Lint
+ if: matrix.php.version == '8.1'
Review Comment:
```suggestion
if: matrix.php == '8.1'
```
##########
lang/php/.php-cs-fixer.dist.php:
##########
@@ -0,0 +1,102 @@
+<?php
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+declare(strict_types=1);
+
+use PhpCsFixer\Runner\Parallel\ParallelConfigFactory;
+
+/**
+ * PHP CS Fixer documentation:
+ * - Homepage: https://cs.symfony.com/
+ * - List of all available rules: https://cs.symfony.com/doc/rules/index.html
+ * - List of all available rule sets:
https://cs.symfony.com/doc/ruleSets/index.html
+ *
+ * To inspect a specific rule (e.g. `blank_line_before_statement`), run:
+ *
+ * ```console
+ * > php-cs-fixer describe blank_line_before_statement
+ * ```
+ */
+$finder = PhpCsFixer\Finder::create()
+ ->in(['test'])
Review Comment:
Any reason why this uses only the `test` folder ? Why `lib` is not used too ?
##########
lang/php/test/InterOpTest.php:
##########
@@ -36,45 +38,46 @@ public function setUp(): void
$this->projection = AvroSchema::parse($this->projection_json);
}
- public function file_name_provider()
+ public static function file_name_provider(): array
{
$data_dir = AVRO_BUILD_DATA_DIR;
- $data_files = array();
+ $data_files = [];
if (!($dh = opendir($data_dir))) {
- die("Could not open data dir '$data_dir'\n");
+ exit("Could not open data dir '$data_dir'\n");
}
while ($file = readdir($dh)) {
if
(preg_match('/^[a-z]+(_deflate|_snappy|_zstandard|_bzip2)?\.avro$/', $file)) {
- $data_files [] = implode(DIRECTORY_SEPARATOR, array($data_dir,
$file));
- } else if (preg_match('/[^.]/', $file)) {
+ $data_files[] = implode(DIRECTORY_SEPARATOR, [$data_dir,
$file]);
+ } elseif (preg_match('/[^.]/', $file)) {
echo "Skipped: $data_dir/$file", PHP_EOL;
}
}
closedir($dh);
- $ary = array();
+ $ary = [];
foreach ($data_files as $df) {
echo "Reading: $df", PHP_EOL;
- $ary [] = array($df);
+ $ary[] = [$df];
}
+
return $ary;
}
- /**
- * @coversNothing
- * @dataProvider file_name_provider
- */
- public function test_read($file_name)
+ #[DataProvider('file_name_provider')]
+ public function test_read($file_name): void
Review Comment:
```suggestion
public function test_read(string $file_name): void
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]