[ https://issues.apache.org/jira/browse/AVRO-3841?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
ASF GitHub Bot updated AVRO-3841: --------------------------------- Labels: pull-request-available (was: ) > Align the specification of the way to encode NaN to the actual implementations > ------------------------------------------------------------------------------ > > Key: AVRO-3841 > URL: https://issues.apache.org/jira/browse/AVRO-3841 > Project: Apache Avro > Issue Type: Improvement > Components: spec > Affects Versions: 1.12.0 > Reporter: Kousuke Saruta > Priority: Minor > Labels: pull-request-available > Time Spent: 10m > Remaining Estimate: 0h > > The specification says about the way to encode float/double like as follows. > {code} > a float is written as 4 bytes. The float is converted into a 32-bit integer > using a method equivalent to Java’s floatToIntBits and then encoded in > little-endian format. > a double is written as 8 bytes. The double is converted into a 64-bit integer > using a method equivalent to Java’s doubleToLongBits and then encoded in > little-endian format. > {code} > But the actual implementation in Java uses > floatToRawIntBits/doubleToRawLongBits rather than > floatToIntBits/doubleToLongBits. > The they are different in the way to encode NaN. > floatToIntBits/doubleToLongBits doesn't distinguish between NaN and -NaN but > floatToRawIntBits/doubleToRawLongBits does. > I confirmed all the implementation distinguish between NaN and -NaN. > So, I think it's better to modify the specification. > Java > {code} > public static int encodeFloat(float f, byte[] buf, int pos) { > final int bits = Float.floatToRawIntBits(f); > buf[pos + 3] = (byte) (bits >>> 24); > buf[pos + 2] = (byte) (bits >>> 16); > buf[pos + 1] = (byte) (bits >>> 8); > buf[pos] = (byte) (bits); > return 4; > } > public static int encodeDouble(double d, byte[] buf, int pos) { > final long bits = Double.doubleToRawLongBits(d); > int first = (int) (bits & 0xFFFFFFFF); > int second = (int) ((bits >>> 32) & 0xFFFFFFFF); > // the compiler seems to execute this order the best, likely due to > // register allocation -- the lifetime of constants is minimized. > buf[pos] = (byte) (first); > buf[pos + 4] = (byte) (second); > buf[pos + 5] = (byte) (second >>> 8); > buf[pos + 1] = (byte) (first >>> 8); > buf[pos + 2] = (byte) (first >>> 16); > buf[pos + 6] = (byte) (second >>> 16); > buf[pos + 7] = (byte) (second >>> 24); > buf[pos + 3] = (byte) (first >>> 24); > return 8; > } > {code} > Rust > {code} > Value::Float(x) => buffer.extend_from_slice(&x.to_le_bytes()), > Value::Double(x) => buffer.extend_from_slice(&x.to_le_bytes()), > {code} > Python > {code} > def write_float(self, datum: float) -> None: > > """ > > A float is written as 4 bytes. > > The float is converted into a 32-bit integer using a method > equivalent to > Java's floatToIntBits and then encoded in little-endian format. > > """ > > self.write(STRUCT_FLOAT.pack(datum)) > def write_double(self, datum: float) -> None: > > """ > > A double is written as 8 bytes. > > The double is converted into a 64-bit integer using a method > equivalent to > Java's doubleToLongBits and then encoded in little-endian format. > > """ > > self.write(STRUCT_DOUBLE.pack(datum)) > {code} > C > {code} > static int write_float(avro_writer_t writer, const float f) > { > #if AVRO_PLATFORM_IS_BIG_ENDIAN > uint8_t buf[4]; > #endif > union { > float f; > int32_t i; > } v; > v.f = f; > #if AVRO_PLATFORM_IS_BIG_ENDIAN > buf[0] = (uint8_t) (v.i >> 0); > buf[1] = (uint8_t) (v.i >> 8); > buf[2] = (uint8_t) (v.i >> 16); > buf[3] = (uint8_t) (v.i >> 24); > AVRO_WRITE(writer, buf, 4); > #else > AVRO_WRITE(writer, (void *)&v.i, 4); > #endif > return 0; > } > static int write_double(avro_writer_t writer, const double d) > { > #if AVRO_PLATFORM_IS_BIG_ENDIAN > uint8_t buf[8]; > #endif > union { > double d; > int64_t l; > } v; > v.d = d; > #if AVRO_PLATFORM_IS_BIG_ENDIAN > buf[0] = (uint8_t) (v.l >> 0); > buf[1] = (uint8_t) (v.l >> 8); > buf[2] = (uint8_t) (v.l >> 16); > buf[3] = (uint8_t) (v.l >> 24); > buf[4] = (uint8_t) (v.l >> 32); > buf[5] = (uint8_t) (v.l >> 40); > buf[6] = (uint8_t) (v.l >> 48); > buf[7] = (uint8_t) (v.l >> 56); > AVRO_WRITE(writer, buf, 8); > #else > AVRO_WRITE(writer, (void *)&v.l, 8); > #endif > return 0; > } > {code} > C++ > {code} > void BinaryEncoder::encodeFloat(float f) { > const auto *p = reinterpret_cast<const uint8_t *>(&f); > out_.writeBytes(p, sizeof(float)); > } > void BinaryEncoder::encodeDouble(double d) { > const auto *p = reinterpret_cast<const uint8_t *>(&d); > out_.writeBytes(p, sizeof(double)); > } > {code} > C# > {code} > public void WriteFloat(float value) > { > byte[] buffer = BitConverter.GetBytes(value); > if (!BitConverter.IsLittleEndian) Array.Reverse(buffer); > writeBytes(buffer); > } > public void WriteDouble(double value) > { > long bits = BitConverter.DoubleToInt64Bits(value); > writeByte((byte)(bits & 0xFF)); > writeByte((byte)((bits >> 8) & 0xFF)); > writeByte((byte)((bits >> 16) & 0xFF)); > writeByte((byte)((bits >> 24) & 0xFF)); > writeByte((byte)((bits >> 32) & 0xFF)); > writeByte((byte)((bits >> 40) & 0xFF)); > writeByte((byte)((bits >> 48) & 0xFF)); > writeByte((byte)((bits >> 56) & 0xFF)); > } > {code} > Ruby > {code} > def read_float > # A float is written as 4 bytes. > # The float is converted into a 32-bit integer using a method > # equivalent to Java's floatToRawIntBits and then encoded in > # little-endian format. > read_and_unpack(4, 'e') > end > def read_double > # A double is written as 8 bytes. > # The double is converted into a 64-bit integer using a method > # equivalent to Java's doubleToRawLongBits and then encoded in > # little-endian format. > read_and_unpack(8, 'E') > end > {code} > Perl > {code} > sub encode_float { > my $class = shift; > my ($schema, $data, $cb) = @_; > my $enc = pack "f<", $data; > $cb->(\$enc); > } > sub encode_double { > my $class = shift; > my ($schema, $data, $cb) = @_; > my $enc = pack "d<", $data; > $cb->(\$enc); > } > {code} > PHP > {code} > public static function floatToIntBits($float) > { > return pack('g', (float) $float); > } > public static function doubleToLongBits($double) > { > return pack('e', (double) $double); > } > {code} > JavaScript > {code} > Tap.prototype.writeFloat = function (f) { > var buf = this.buf; > var pos = this.pos; > this.pos += 4; > if (this.pos > buf.length) { > return; > } > return this.buf.writeFloatLE(f, pos); > }; > Tap.prototype.writeDouble = function (d) { > var buf = this.buf; > var pos = this.pos; > this.pos += 8; > if (this.pos > buf.length) { > return; > } > return this.buf.writeDoubleLE(d, pos); > }; > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)