[PATCH] D150114: [Headers][doc] Add "add/sub/mul" intrinsic descriptions to avx2intrin.h

2023-05-08 Thread Paul Robinson via Phabricator via cfe-commits
probinson created this revision.
probinson added reviewers: pengfei, RKSimon, goldstein.w.n, craig.topper.
Herald added a project: All.
probinson requested review of this revision.

https://reviews.llvm.org/D150114

Files:
  clang/lib/Headers/avx2intrin.h

Index: clang/lib/Headers/avx2intrin.h
===
--- clang/lib/Headers/avx2intrin.h
+++ clang/lib/Headers/avx2intrin.h
@@ -65,48 +65,150 @@
   return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
 }
 
+/// Adds 8-bit integers from corresponding bytes of two 256-bit integer
+///vectors and returns the lower 8 bits of each sum in the corresponding
+///byte of the 256-bit integer vector result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDB instruction.
+///
+/// \param __a
+///A 256-bit vector containing one of the source operands.
+/// \param __b
+///A 256-bit vector containing one of the source operands.
+/// \returns A 256-bit vector containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qu)__a + (__v32qu)__b);
 }
 
+/// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
+///[16 x i16] and returns the lower 16 bits of each sum in the
+///corresponding element of the [16 x i16] result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDW instruction.
+///
+/// \param __a
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \returns A 256-bit vector of [16 x i16] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hu)__a + (__v16hu)__b);
 }
 
+/// Adds 32-bit integers from corresponding elements of two 256-bit vectors of
+///[8 x i32] and returns the lower 32 bits of each sum in the corresponding
+///element of the [8 x i32] result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDD instruction.
+///
+/// \param __a
+///A 256-bit vector of [8 x i32] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [8 x i32] containing one of the source operands.
+/// \returns A 256-bit vector of [8 x i32] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8su)__a + (__v8su)__b);
 }
 
+/// Adds 64-bit integers from corresponding elements of two 256-bit vectors of
+///[4 x i64] and returns the lower 64 bits of each sum in the corresponding
+///element of the [4 x i64] result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDQ instruction.
+///
+/// \param __a
+///A 256-bit vector of [4 x i64] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [4 x i64] containing one of the source operands.
+/// \returns A 256-bit vector of [4 x i64] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a + (__v4du)__b);
 }
 
+/// Adds 8-bit integers from corresponding bytes of two 256-bit integer
+///vectors using signed saturation, and returns each sum in the
+///corresponding byte of the 256-bit integer vector result.
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDSB instruction.
+///
+/// \param __a
+///A 256-bit vector containing one of the source operands.
+/// \param __b
+///A 256-bit vector containing one of the source operands.
+/// \returns A 256-bit vector containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b);
 }
 
+/// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
+///[16 x i16] using signed saturation, and returns the [16 x i16] result.
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDSW instruction.
+///
+/// \param __a
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \returns A 256-bit vector of [16 x i16] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_elementwise_add_sat((__v16hi)__a, (__v16hi)__b);
 }
 
+/// Adds 8-bit integers from corresponding bytes of two 256-bit integer
+///vectors using unsigned saturation, and returns each sum in the
+///corresponding byte of the 256-bit integer vector result.
+///
+/// \headerfile 
+///

[PATCH] D150114: [Headers][doc] Add "add/sub/mul" intrinsic descriptions to avx2intrin.h

2023-05-15 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

@pengfei Any objections?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150114/new/

https://reviews.llvm.org/D150114

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150114: [Headers][doc] Add "add/sub/mul" intrinsic descriptions to avx2intrin.h

2023-05-15 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added inline comments.



Comment at: clang/lib/Headers/avx2intrin.h:156
+///A 256-bit vector containing one of the source operands.
+/// \returns A 256-bit vector containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256

Why do some return descriptions include the type like [4 x i64] but some don't?



Comment at: clang/lib/Headers/avx2intrin.h:1050
+/// \param __a
+///A 256-bit vector containing the subtrahends.
+/// \param __b

I think minuend and subtrahend are swapped here.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150114/new/

https://reviews.llvm.org/D150114

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150114: [Headers][doc] Add "add/sub/mul" intrinsic descriptions to avx2intrin.h

2023-05-15 Thread Phoebe Wang via Phabricator via cfe-commits
pengfei added inline comments.



Comment at: clang/lib/Headers/avx2intrin.h:1043
+///corresponding byte of the 256-bit integer vector result (overflow is
+///ignored). For each byte, computes  result = __a - __b .
+///

It better to move it to `\code{.operation}` for consistency. Same for the below.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150114/new/

https://reviews.llvm.org/D150114

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150114: [Headers][doc] Add "add/sub/mul" intrinsic descriptions to avx2intrin.h

2023-05-23 Thread Paul Robinson via Phabricator via cfe-commits
probinson added inline comments.



Comment at: clang/lib/Headers/avx2intrin.h:156
+///A 256-bit vector containing one of the source operands.
+/// \returns A 256-bit vector containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256

craig.topper wrote:
> Why do some return descriptions include the type like [4 x i64] but some 
> don't?
My policy has been to provide the type for element sizes other than byte. So, I 
haven't been saying [32 x i8] but I do say [4 x i64] or whatever.
Although, I have also tended to say "integer vector" when it's a byte vector, 
and I'll make that consistent here as well.



Comment at: clang/lib/Headers/avx2intrin.h:1043
+///corresponding byte of the 256-bit integer vector result (overflow is
+///ignored). For each byte, computes  result = __a - __b .
+///

pengfei wrote:
> It better to move it to `\code{.operation}` for consistency. Same for the 
> below.
Okay.



Comment at: clang/lib/Headers/avx2intrin.h:1050
+/// \param __a
+///A 256-bit vector containing the subtrahends.
+/// \param __b

craig.topper wrote:
> I think minuend and subtrahend are swapped here.
Thanks for catching that!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150114/new/

https://reviews.llvm.org/D150114

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150114: [Headers][doc] Add "add/sub/mul" intrinsic descriptions to avx2intrin.h

2023-05-23 Thread Paul Robinson via Phabricator via cfe-commits
probinson updated this revision to Diff 524786.
probinson added a comment.

Address review comments


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150114/new/

https://reviews.llvm.org/D150114

Files:
  clang/lib/Headers/avx2intrin.h

Index: clang/lib/Headers/avx2intrin.h
===
--- clang/lib/Headers/avx2intrin.h
+++ clang/lib/Headers/avx2intrin.h
@@ -65,48 +65,150 @@
   return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
 }
 
+/// Adds 8-bit integers from corresponding bytes of two 256-bit integer
+///vectors and returns the lower 8 bits of each sum in the corresponding
+///byte of the 256-bit integer vector result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDB instruction.
+///
+/// \param __a
+///A 256-bit integer vector containing one of the source operands.
+/// \param __b
+///A 256-bit integer vector containing one of the source operands.
+/// \returns A 256-bit integer vector containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qu)__a + (__v32qu)__b);
 }
 
+/// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
+///[16 x i16] and returns the lower 16 bits of each sum in the
+///corresponding element of the [16 x i16] result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDW instruction.
+///
+/// \param __a
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \returns A 256-bit vector of [16 x i16] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hu)__a + (__v16hu)__b);
 }
 
+/// Adds 32-bit integers from corresponding elements of two 256-bit vectors of
+///[8 x i32] and returns the lower 32 bits of each sum in the corresponding
+///element of the [8 x i32] result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDD instruction.
+///
+/// \param __a
+///A 256-bit vector of [8 x i32] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [8 x i32] containing one of the source operands.
+/// \returns A 256-bit vector of [8 x i32] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8su)__a + (__v8su)__b);
 }
 
+/// Adds 64-bit integers from corresponding elements of two 256-bit vectors of
+///[4 x i64] and returns the lower 64 bits of each sum in the corresponding
+///element of the [4 x i64] result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDQ instruction.
+///
+/// \param __a
+///A 256-bit vector of [4 x i64] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [4 x i64] containing one of the source operands.
+/// \returns A 256-bit vector of [4 x i64] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a + (__v4du)__b);
 }
 
+/// Adds 8-bit integers from corresponding bytes of two 256-bit integer
+///vectors using signed saturation, and returns each sum in the
+///corresponding byte of the 256-bit integer vector result.
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDSB instruction.
+///
+/// \param __a
+///A 256-bit integer vector containing one of the source operands.
+/// \param __b
+///A 256-bit integer vector containing one of the source operands.
+/// \returns A 256-bit integer vector containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b);
 }
 
+/// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
+///[16 x i16] using signed saturation, and returns the [16 x i16] result.
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDSW instruction.
+///
+/// \param __a
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \returns A 256-bit vector of [16 x i16] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_elementwise_add_sat((__v16hi)__a, (__v16hi)__b);
 }
 
+/// Adds 8-bit integers from corresponding bytes of two 256-bit integer
+///vectors using unsigned saturation, and returns each sum in the
+///corresponding byte of the 256-bit integer vector 

[PATCH] D150114: [Headers][doc] Add "add/sub/mul" intrinsic descriptions to avx2intrin.h

2023-05-23 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added inline comments.



Comment at: clang/lib/Headers/avx2intrin.h:456
+///   j := i*128
+///   result[j+31:j] := __a[j+63:j+32] - __a[j+31:j]
+///   result[j+63:j+32] := __a[j+127:j+96] - __a[j+95:j+64]

Intel intrinsics guide says

```
dst[31:0] := a[31:0] - a[63:32]
dst[63:32] := a[95:64] - a[127:96]
dst[95:64] := b[31:0] - b[63:32]
dst[127:96] := b[95:64] - b[127:96]
dst[159:128] := a[159:128] - a[191:160]
dst[191:160] := a[223:192] - a[255:224]
dst[223:192] := b[159:128] - b[191:160]
dst[255:224] := b[223:192] - b[255:224]
dst[MAX:256] := 0
```

So I think the operands are in the wrong order here?



Comment at: clang/lib/Headers/avx2intrin.h:488
+///   j := i*128
+///   result[j+15:j] := SATURATE16(__a[j+31:j+16] - __a[j+15:j])
+///   result[j+31:j+16] := SATURATE16(__a[j+63:j+48] - __a[j+47:j+32])

Operands are reversed?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150114/new/

https://reviews.llvm.org/D150114

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150114: [Headers][doc] Add "add/sub/mul" intrinsic descriptions to avx2intrin.h

2023-05-25 Thread Paul Robinson via Phabricator via cfe-commits
probinson updated this revision to Diff 525632.
probinson added a comment.

Correct order of horizontal operands


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150114/new/

https://reviews.llvm.org/D150114

Files:
  clang/lib/Headers/avx2intrin.h

Index: clang/lib/Headers/avx2intrin.h
===
--- clang/lib/Headers/avx2intrin.h
+++ clang/lib/Headers/avx2intrin.h
@@ -65,48 +65,150 @@
   return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
 }
 
+/// Adds 8-bit integers from corresponding bytes of two 256-bit integer
+///vectors and returns the lower 8 bits of each sum in the corresponding
+///byte of the 256-bit integer vector result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDB instruction.
+///
+/// \param __a
+///A 256-bit integer vector containing one of the source operands.
+/// \param __b
+///A 256-bit integer vector containing one of the source operands.
+/// \returns A 256-bit integer vector containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qu)__a + (__v32qu)__b);
 }
 
+/// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
+///[16 x i16] and returns the lower 16 bits of each sum in the
+///corresponding element of the [16 x i16] result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDW instruction.
+///
+/// \param __a
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \returns A 256-bit vector of [16 x i16] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hu)__a + (__v16hu)__b);
 }
 
+/// Adds 32-bit integers from corresponding elements of two 256-bit vectors of
+///[8 x i32] and returns the lower 32 bits of each sum in the corresponding
+///element of the [8 x i32] result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDD instruction.
+///
+/// \param __a
+///A 256-bit vector of [8 x i32] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [8 x i32] containing one of the source operands.
+/// \returns A 256-bit vector of [8 x i32] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8su)__a + (__v8su)__b);
 }
 
+/// Adds 64-bit integers from corresponding elements of two 256-bit vectors of
+///[4 x i64] and returns the lower 64 bits of each sum in the corresponding
+///element of the [4 x i64] result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDQ instruction.
+///
+/// \param __a
+///A 256-bit vector of [4 x i64] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [4 x i64] containing one of the source operands.
+/// \returns A 256-bit vector of [4 x i64] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a + (__v4du)__b);
 }
 
+/// Adds 8-bit integers from corresponding bytes of two 256-bit integer
+///vectors using signed saturation, and returns each sum in the
+///corresponding byte of the 256-bit integer vector result.
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDSB instruction.
+///
+/// \param __a
+///A 256-bit integer vector containing one of the source operands.
+/// \param __b
+///A 256-bit integer vector containing one of the source operands.
+/// \returns A 256-bit integer vector containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b);
 }
 
+/// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
+///[16 x i16] using signed saturation, and returns the [16 x i16] result.
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDSW instruction.
+///
+/// \param __a
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \returns A 256-bit vector of [16 x i16] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_elementwise_add_sat((__v16hi)__a, (__v16hi)__b);
 }
 
+/// Adds 8-bit integers from corresponding bytes of two 256-bit integer
+///vectors using unsigned saturation, and returns each sum in the
+///corresponding byte of the 256-bit in

[PATCH] D150114: [Headers][doc] Add "add/sub/mul" intrinsic descriptions to avx2intrin.h

2023-05-25 Thread Paul Robinson via Phabricator via cfe-commits
probinson added inline comments.



Comment at: clang/lib/Headers/avx2intrin.h:456
+///   j := i*128
+///   result[j+31:j] := __a[j+63:j+32] - __a[j+31:j]
+///   result[j+63:j+32] := __a[j+127:j+96] - __a[j+95:j+64]

craig.topper wrote:
> Intel intrinsics guide says
> 
> ```
> dst[31:0] := a[31:0] - a[63:32]
> dst[63:32] := a[95:64] - a[127:96]
> dst[95:64] := b[31:0] - b[63:32]
> dst[127:96] := b[95:64] - b[127:96]
> dst[159:128] := a[159:128] - a[191:160]
> dst[191:160] := a[223:192] - a[255:224]
> dst[223:192] := b[159:128] - b[191:160]
> dst[255:224] := b[223:192] - b[255:224]
> dst[MAX:256] := 0
> ```
> 
> So I think the operands are in the wrong order here?
Words fail me. Also diagrams. I wanted the add and sub descriptions to look 
similar, and copy-pasted from add to sub without verifying the order.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150114/new/

https://reviews.llvm.org/D150114

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150114: [Headers][doc] Add "add/sub/mul" intrinsic descriptions to avx2intrin.h

2023-05-25 Thread Phoebe Wang via Phabricator via cfe-commits
pengfei accepted this revision.
pengfei added a comment.
This revision is now accepted and ready to land.

LGTM except for a possible typo.




Comment at: clang/lib/Headers/avx2intrin.h:412
+///vectors of [16 x i16] and returns the lower 16 bits of each difference
+///in an element of the [16 x i16] result (overflow is ignored).
+///Differences from \a __a are returned in the lower 64 bits of each

underflow?



Comment at: clang/lib/Headers/avx2intrin.h:448
+///vectors of [8 x i32] and returns the lower 32 bits of each difference in
+///an element of the [8 x i31] result (overflow is ignored). Differences
+///from \a __a are returned in the lower 64 bits of each 128-bit half of

typo or intended?



Comment at: clang/lib/Headers/avx2intrin.h:448
+///vectors of [8 x i32] and returns the lower 32 bits of each difference in
+///an element of the [8 x i31] result (overflow is ignored). Differences
+///from \a __a are returned in the lower 64 bits of each 128-bit half of

pengfei wrote:
> typo or intended?
underflow.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150114/new/

https://reviews.llvm.org/D150114

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150114: [Headers][doc] Add "add/sub/mul" intrinsic descriptions to avx2intrin.h

2023-05-30 Thread Paul Robinson via Phabricator via cfe-commits
probinson marked an inline comment as done.
probinson added inline comments.



Comment at: clang/lib/Headers/avx2intrin.h:412
+///vectors of [16 x i16] and returns the lower 16 bits of each difference
+///in an element of the [16 x i16] result (overflow is ignored).
+///Differences from \a __a are returned in the lower 64 bits of each

pengfei wrote:
> underflow?
I don't often see "underflow" applied to integer operations. Technically, any 
signed add or subtract could either overflow or underflow, depending on the 
sign and magnitude of the operands. I think just saying "overflow" is clear 
enough?



Comment at: clang/lib/Headers/avx2intrin.h:448
+///vectors of [8 x i32] and returns the lower 32 bits of each difference in
+///an element of the [8 x i31] result (overflow is ignored). Differences
+///from \a __a are returned in the lower 64 bits of each 128-bit half of

pengfei wrote:
> pengfei wrote:
> > typo or intended?
> underflow.
The `i31` is a typo, fixed.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150114/new/

https://reviews.llvm.org/D150114

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D150114: [Headers][doc] Add "add/sub/mul" intrinsic descriptions to avx2intrin.h

2023-05-30 Thread Paul Robinson via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
probinson marked an inline comment as done.
Closed by commit rGd8291908ef49: [Headers][doc] Add add/sub/mul intrinsic 
descriptions to avx2intrin.h (authored by probinson).
Herald added a project: clang.

Changed prior to commit:
  https://reviews.llvm.org/D150114?vs=525632&id=526700#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150114/new/

https://reviews.llvm.org/D150114

Files:
  clang/lib/Headers/avx2intrin.h

Index: clang/lib/Headers/avx2intrin.h
===
--- clang/lib/Headers/avx2intrin.h
+++ clang/lib/Headers/avx2intrin.h
@@ -65,48 +65,150 @@
   return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
 }
 
+/// Adds 8-bit integers from corresponding bytes of two 256-bit integer
+///vectors and returns the lower 8 bits of each sum in the corresponding
+///byte of the 256-bit integer vector result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDB instruction.
+///
+/// \param __a
+///A 256-bit integer vector containing one of the source operands.
+/// \param __b
+///A 256-bit integer vector containing one of the source operands.
+/// \returns A 256-bit integer vector containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qu)__a + (__v32qu)__b);
 }
 
+/// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
+///[16 x i16] and returns the lower 16 bits of each sum in the
+///corresponding element of the [16 x i16] result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDW instruction.
+///
+/// \param __a
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \returns A 256-bit vector of [16 x i16] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hu)__a + (__v16hu)__b);
 }
 
+/// Adds 32-bit integers from corresponding elements of two 256-bit vectors of
+///[8 x i32] and returns the lower 32 bits of each sum in the corresponding
+///element of the [8 x i32] result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDD instruction.
+///
+/// \param __a
+///A 256-bit vector of [8 x i32] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [8 x i32] containing one of the source operands.
+/// \returns A 256-bit vector of [8 x i32] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8su)__a + (__v8su)__b);
 }
 
+/// Adds 64-bit integers from corresponding elements of two 256-bit vectors of
+///[4 x i64] and returns the lower 64 bits of each sum in the corresponding
+///element of the [4 x i64] result (overflow is ignored).
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDQ instruction.
+///
+/// \param __a
+///A 256-bit vector of [4 x i64] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [4 x i64] containing one of the source operands.
+/// \returns A 256-bit vector of [4 x i64] containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a + (__v4du)__b);
 }
 
+/// Adds 8-bit integers from corresponding bytes of two 256-bit integer
+///vectors using signed saturation, and returns each sum in the
+///corresponding byte of the 256-bit integer vector result.
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDSB instruction.
+///
+/// \param __a
+///A 256-bit integer vector containing one of the source operands.
+/// \param __b
+///A 256-bit integer vector containing one of the source operands.
+/// \returns A 256-bit integer vector containing the sums.
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b);
 }
 
+/// Adds 16-bit integers from corresponding elements of two 256-bit vectors of
+///[16 x i16] using signed saturation, and returns the [16 x i16] result.
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the \c VPADDSW instruction.
+///
+/// \param __a
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \param __b
+///A 256-bit vector of [16 x i16] containing one of the source operands.
+/// \returns A 256-bit vector of [16 x i16] containing the sums.
 static __inline__ __m25