[PATCH] D127976: [IR] Move vector.insert/vector.extract out of experimental namespace

2022-06-27 Thread Bradley Smith via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGa83aa33d1bf9: [IR] Move vector.insert/vector.extract out of 
experimental namespace (authored by bsmith).

Changed prior to commit:
  https://reviews.llvm.org/D127976?vs=438975=440156#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127976/new/

https://reviews.llvm.org/D127976

Files:
  clang/include/clang/Basic/riscv_vector.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vget.c
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlmul.c
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vset.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vget-vset-ice.cpp
  clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vlmul.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
  clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c
  clang/test/CodeGen/aarch64-sve-vls-compare-ops.c
  clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
  clang/test/CodeGen/aarch64-sve-vls-subscript-ops.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
  llvm/docs/LangRef.rst
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/CodeGen/BasicTTIImpl.h
  llvm/include/llvm/IR/IRBuilder.h
  llvm/include/llvm/IR/Intrinsics.td
  llvm/lib/Analysis/InstructionSimplify.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/IR/Verifier.cpp
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
  llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
  llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
  llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
  llvm/test/Bitcode/upgrade-vector-insert-extract-intrinsics.ll
  llvm/test/Bitcode/upgrade-vector-insert-extract-intrinsics.ll.bc
  llvm/test/CodeGen/AArch64/dag-combine-insert-subvector.ll
  llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
  llvm/test/CodeGen/AArch64/split-vector-insert.ll
  llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll
  llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
  llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll
  llvm/test/CodeGen/AArch64/sve-insert-vector.ll
  llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
  llvm/test/CodeGen/AArch64/sve-no-typesize-warnings.ll
  llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
  llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll
  llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
  llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
  llvm/test/CodeGen/RISCV/rvv/vpload.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
  llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll
  llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
  llvm/test/Transforms/InstSimplify/extract-vector.ll
  llvm/test/Transforms/InstSimplify/insert-vector.ll
  llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll
  llvm/test/Verifier/extract-vector-mismatched-element-types.ll
  llvm/test/Verifier/insert-extract-intrinsics-invalid.ll
  llvm/test/Verifier/insert-vector-mismatched-element-types.ll

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127976: [IR] Move vector.insert/vector.extract out of experimental namespace

2022-06-22 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 438975.
bsmith added a comment.

- Further improve clarity on usable types in LangRef


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127976/new/

https://reviews.llvm.org/D127976

Files:
  clang/include/clang/Basic/riscv_vector.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vget.c
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlmul.c
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vset.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vlmul.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
  clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c
  clang/test/CodeGen/aarch64-sve-vls-compare-ops.c
  clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
  clang/test/CodeGen/aarch64-sve-vls-subscript-ops.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
  llvm/docs/LangRef.rst
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/CodeGen/BasicTTIImpl.h
  llvm/include/llvm/IR/IRBuilder.h
  llvm/include/llvm/IR/Intrinsics.td
  llvm/lib/Analysis/InstructionSimplify.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/IR/Verifier.cpp
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
  llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
  llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
  llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
  llvm/test/Bitcode/upgrade-vector-insert-extract-intrinsics.ll
  llvm/test/Bitcode/upgrade-vector-insert-extract-intrinsics.ll.bc
  llvm/test/CodeGen/AArch64/dag-combine-insert-subvector.ll
  llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
  llvm/test/CodeGen/AArch64/split-vector-insert.ll
  llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll
  llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
  llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll
  llvm/test/CodeGen/AArch64/sve-insert-vector.ll
  llvm/test/CodeGen/AArch64/sve-no-typesize-warnings.ll
  llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
  llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll
  llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
  llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
  llvm/test/CodeGen/RISCV/rvv/vpload.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
  llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll
  llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
  llvm/test/Transforms/InstSimplify/extract-vector.ll
  llvm/test/Transforms/InstSimplify/insert-vector.ll
  llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll
  llvm/test/Verifier/extract-vector-mismatched-element-types.ll
  llvm/test/Verifier/insert-extract-intrinsics-invalid.ll
  llvm/test/Verifier/insert-vector-mismatched-element-types.ll

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127976: [IR] Move vector.insert/vector.extract out of experimental namespace

2022-06-21 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 438718.
bsmith added a comment.

- Clarify LangRef slightly to make it clearer that fixed types can be used
- Rebase on top of recent test changes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127976/new/

https://reviews.llvm.org/D127976

Files:
  clang/include/clang/Basic/riscv_vector.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vget.c
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlmul.c
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vset.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vlmul.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
  clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c
  clang/test/CodeGen/aarch64-sve-vls-compare-ops.c
  clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
  clang/test/CodeGen/aarch64-sve-vls-subscript-ops.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
  llvm/docs/LangRef.rst
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/CodeGen/BasicTTIImpl.h
  llvm/include/llvm/IR/IRBuilder.h
  llvm/include/llvm/IR/Intrinsics.td
  llvm/lib/Analysis/InstructionSimplify.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/IR/Verifier.cpp
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
  llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
  llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
  llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
  llvm/test/Bitcode/upgrade-vector-insert-extract-intrinsics.ll
  llvm/test/Bitcode/upgrade-vector-insert-extract-intrinsics.ll.bc
  llvm/test/CodeGen/AArch64/dag-combine-insert-subvector.ll
  llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
  llvm/test/CodeGen/AArch64/split-vector-insert.ll
  llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll
  llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
  llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll
  llvm/test/CodeGen/AArch64/sve-insert-vector.ll
  llvm/test/CodeGen/AArch64/sve-no-typesize-warnings.ll
  llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
  llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll
  llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
  llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
  llvm/test/CodeGen/RISCV/rvv/vpload.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
  llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll
  llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
  llvm/test/Transforms/InstSimplify/extract-vector.ll
  llvm/test/Transforms/InstSimplify/insert-vector.ll
  llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll
  llvm/test/Verifier/extract-vector-mismatched-element-types.ll
  llvm/test/Verifier/insert-extract-intrinsics-invalid.ll
  llvm/test/Verifier/insert-vector-mismatched-element-types.ll

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127976: [IR] Move vector.insert/vector.extract out of experimental namespace

2022-06-16 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 437557.
bsmith added a comment.

- Add info to release notes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127976/new/

https://reviews.llvm.org/D127976

Files:
  clang/include/clang/Basic/riscv_vector.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vget.c
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlmul.c
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vset.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vlmul.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
  clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c
  clang/test/CodeGen/aarch64-sve-vls-compare-ops.c
  clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
  clang/test/CodeGen/aarch64-sve-vls-subscript-ops.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
  llvm/docs/LangRef.rst
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/CodeGen/BasicTTIImpl.h
  llvm/include/llvm/IR/IRBuilder.h
  llvm/include/llvm/IR/Intrinsics.td
  llvm/lib/Analysis/InstructionSimplify.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/IR/Verifier.cpp
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
  llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
  llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
  llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
  llvm/test/Bitcode/upgrade-vector-insert-extract-intrinsics.ll
  llvm/test/Bitcode/upgrade-vector-insert-extract-intrinsics.ll.bc
  llvm/test/CodeGen/AArch64/dag-combine-insert-subvector.ll
  llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
  llvm/test/CodeGen/AArch64/split-vector-insert.ll
  llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll
  llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
  llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll
  llvm/test/CodeGen/AArch64/sve-insert-vector.ll
  llvm/test/CodeGen/AArch64/sve-no-typesize-warnings.ll
  llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
  llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll
  llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
  llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
  llvm/test/CodeGen/RISCV/rvv/vpload.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
  llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll
  llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
  llvm/test/Transforms/InstSimplify/extract-vector.ll
  llvm/test/Transforms/InstSimplify/insert-vector.ll
  llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll
  llvm/test/Verifier/extract-vector-mismatched-element-types.ll
  llvm/test/Verifier/insert-extract-intrinsics-invalid.ll
  llvm/test/Verifier/insert-vector-mismatched-element-types.ll

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D127976: [IR] Move vector.insert/vector.extract out of experimental namespace

2022-06-16 Thread Bradley Smith via Phabricator via cfe-commits
bsmith created this revision.
bsmith added reviewers: paulwalker-arm, peterwaller-arm, c-rhodes, sdesmalen.
Herald added subscribers: ctetreau, frasercrmck, jdoerfert, luismarques, 
apazos, sameer.abuasal, s.egerton, Jim, jocewei, PkmX, the_o, brucehoult, 
MartinMosbeck, rogfer01, edward-jones, zzheng, jrtc27, niosHD, sabuasal, 
simoncook, johnrusso, rbar, asb, hiraditya.
Herald added a project: All.
bsmith requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, pcwang-thead, MaskRay.
Herald added projects: clang, LLVM.

These intrinsics are now fundemental for SVE code generation and have been
present for a year and a half, hence move them out of the experimental
namespace.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D127976

Files:
  clang/include/clang/Basic/riscv_vector.td
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vget.c
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlmul.c
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vset.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vlmul.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  clang/test/CodeGen/aarch64-sve-vls-arith-ops.c
  clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c
  clang/test/CodeGen/aarch64-sve-vls-compare-ops.c
  clang/test/CodeGen/aarch64-sve-vls-shift-ops.c
  clang/test/CodeGen/aarch64-sve-vls-subscript-ops.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
  llvm/docs/LangRef.rst
  llvm/include/llvm/CodeGen/BasicTTIImpl.h
  llvm/include/llvm/IR/IRBuilder.h
  llvm/include/llvm/IR/Intrinsics.td
  llvm/lib/Analysis/InstructionSimplify.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/IR/Verifier.cpp
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
  llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
  llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
  llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
  llvm/test/Bitcode/upgrade-vector-insert-extract-intrinsics.ll
  llvm/test/Bitcode/upgrade-vector-insert-extract-intrinsics.ll.bc
  llvm/test/CodeGen/AArch64/dag-combine-insert-subvector.ll
  llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
  llvm/test/CodeGen/AArch64/split-vector-insert.ll
  llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
  llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll
  llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
  llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll
  llvm/test/CodeGen/AArch64/sve-insert-vector.ll
  llvm/test/CodeGen/AArch64/sve-no-typesize-warnings.ll
  llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
  llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll
  llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
  llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
  llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
  llvm/test/CodeGen/RISCV/rvv/vpload.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
  llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll
  llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
  llvm/test/Transforms/InstSimplify/extract-vector.ll
  llvm/test/Transforms/InstSimplify/insert-vector.ll
  llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll
  llvm/test/Verifier/extract-vector-mismatched-element-types.ll
  llvm/test/Verifier/insert-extract-intrinsics-invalid.ll
  llvm/test/Verifier/insert-vector-mismatched-element-types.ll

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D122732: [Clang][AArch64][SVE] Allow subscript operator for SVE types

2022-04-08 Thread Bradley Smith via Phabricator via cfe-commits
bsmith accepted this revision.
bsmith added a comment.
This revision is now accepted and ready to land.

LGTM!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D122732/new/

https://reviews.llvm.org/D122732

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D120323: [clang][SVE] Add support for arithmetic operators on SVE types

2022-03-04 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added a comment.
Herald added a project: All.

This all looks reasonable to me, but I'll let @peterwaller-arm have the final 
say.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120323/new/

https://reviews.llvm.org/D120323

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113776: [Clang][SVE] Properly enable/disable dependant SVE target features based upon +(no)sve.* options

2021-11-18 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added a comment.

The tests in this are broken for windows, which I've fixed in 
45e102a173680fd3c90def79a7f0766ed2786ff0 
.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113776/new/

https://reviews.llvm.org/D113776

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 45e102a - [Clang][SVE] Fix windows test breakage in 26f56438e3dab44cea4c8f16d4cb16e9424b02c6

2021-11-18 Thread Bradley Smith via cfe-commits

Author: Bradley Smith
Date: 2021-11-18T16:52:32Z
New Revision: 45e102a173680fd3c90def79a7f0766ed2786ff0

URL: 
https://github.com/llvm/llvm-project/commit/45e102a173680fd3c90def79a7f0766ed2786ff0
DIFF: 
https://github.com/llvm/llvm-project/commit/45e102a173680fd3c90def79a7f0766ed2786ff0.diff

LOG: [Clang][SVE] Fix windows test breakage in 
26f56438e3dab44cea4c8f16d4cb16e9424b02c6

Added: 


Modified: 
clang/test/Driver/aarch64-implied-sve-features.c

Removed: 




diff  --git a/clang/test/Driver/aarch64-implied-sve-features.c 
b/clang/test/Driver/aarch64-implied-sve-features.c
index 5eebc66749ba..d26b7a07c16e 100644
--- a/clang/test/Driver/aarch64-implied-sve-features.c
+++ b/clang/test/Driver/aarch64-implied-sve-features.c
@@ -1,78 +1,78 @@
-// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve %s -### |& 
FileCheck %s --check-prefix=SVE-ONLY
+// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve %s -### 2>&1 | 
FileCheck %s --check-prefix=SVE-ONLY
 // SVE-ONLY: "-target-feature" "+sve"
 
-// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+nosve %s -### |& 
FileCheck %s --check-prefix=NOSVE
+// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+nosve %s -### 2>&1 | 
FileCheck %s --check-prefix=NOSVE
 // NOSVE: "-target-feature" "-sve"
 
-// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve+nosve %s -### |& 
FileCheck %s --check-prefix=SVE-REVERT
+// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve+nosve %s -### 2>&1 
| FileCheck %s --check-prefix=SVE-REVERT
 // SVE-REVERT-NOT: "-target-feature" "+sve"
 // SVE-REVERT: "-target-feature" "-sve"
 
-// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve2 %s -### |& 
FileCheck %s --check-prefix=SVE2-IMPLY
+// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve2 %s -### 2>&1 | 
FileCheck %s --check-prefix=SVE2-IMPLY
 // SVE2-IMPLY: "-target-feature" "+sve2" "-target-feature" "+sve"
 
-// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve2+nosve2 %s -### |& 
FileCheck %s --check-prefix=SVE2-REVERT
+// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve2+nosve2 %s -### 
2>&1 | FileCheck %s --check-prefix=SVE2-REVERT
 // SVE2-REVERT: "-target-feature" "+sve" "-target-feature" "-sve2"
 
-// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve2+nosve %s -### |& 
FileCheck %s --check-prefix=SVE2-CONFLICT
+// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve2+nosve %s -### 
2>&1 | FileCheck %s --check-prefix=SVE2-CONFLICT
 // SVE2-CONFLICT: "-target-feature" "-sve" "-target-feature" "-sve2" 
"-target-feature" "-sve2-bitperm" "-target-feature" "-sve2-sha3" 
"-target-feature" "-sve2-aes" "-target-feature" "-sve2-sm4"
 
-// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+nosve+sve2 %s -### |& 
FileCheck %s --check-prefix=SVE2-CONFLICT-REV
+// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+nosve+sve2 %s -### 
2>&1 | FileCheck %s --check-prefix=SVE2-CONFLICT-REV
 // SVE2-CONFLICT-REV: "-target-feature" "+sve2" "-target-feature" "+sve"
 
-// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve+sve2 %s -### |& 
FileCheck %s --check-prefix=SVE-SVE2
+// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve+sve2 %s -### 2>&1 
| FileCheck %s --check-prefix=SVE-SVE2
 // SVE-SVE2: "-target-feature" "+sve2" "-target-feature" "+sve"
 
-// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve2-bitperm %s -### 
|& FileCheck %s --check-prefix=SVE2-BITPERM
+// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve2-bitperm %s -### 
2>&1 | FileCheck %s --check-prefix=SVE2-BITPERM
 // SVE2-BITPERM: "-target-feature" "+sve2-bitperm" "-target-feature" "+sve" 
"-target-feature" "+sve2"
 
-// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+nosve2-bitperm %s -### 
|& FileCheck %s --check-prefix=NOSVE2-BITPERM
+// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+nosve2-bitperm %s -### 
2>&1 | FileCheck %s --check-prefix=NOSVE2-BITPERM
 // NOSVE2-BITPERM-NOT: "-target-feature" "+sve2-bitperm"
 // NOSVE2-BITPERM-NOT: "-target-feature" "+sve2"
 // NOSVE2-BITPERM-NOT: "-target-feature" "+sve"
 // NOSVE2-BITPERM: "-target-feature" "-sve2-bitperm"
 
-// RUN: %clang -target aarch64-linux-gnu 
-march=armv8-a+sve2-bitperm+nosve2-bitperm %s -### |& FileCheck %s 
--check-prefix=SVE2-BITPERM-REVERT
+// RUN: %clang 

[PATCH] D113776: [Clang][SVE] Properly enable/disable dependant SVE target features based upon +(no)sve.* options

2021-11-18 Thread Bradley Smith via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG26f56438e3da: [Clang][SVE] Properly enable/disable dependant 
SVE target features based upon +… (authored by bsmith).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113776/new/

https://reviews.llvm.org/D113776

Files:
  clang/lib/Driver/ToolChains/Arch/AArch64.cpp
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/aarch64-implied-sve-features.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -910,11 +910,11 @@
  AArch64::AEK_SIMD | AArch64::AEK_RAS |
  AArch64::AEK_LSE | AArch64::AEK_RDM |
  AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
- AArch64::AEK_SVE2 | AArch64::AEK_BF16 |
- AArch64::AEK_I8MM | AArch64::AEK_SVE2BITPERM |
- AArch64::AEK_PAUTH | AArch64::AEK_MTE |
- AArch64::AEK_SSBS | AArch64::AEK_FP16FML |
- AArch64::AEK_SB,
+ AArch64::AEK_BF16 | AArch64::AEK_I8MM |
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 |
+ AArch64::AEK_SVE2BITPERM | AArch64::AEK_PAUTH |
+ AArch64::AEK_MTE | AArch64::AEK_SSBS |
+ AArch64::AEK_FP16FML | AArch64::AEK_SB,
  "9-A"),
 ARMCPUTestParams("cortex-a57", "armv8-a", "crypto-neon-fp-armv8",
  AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
@@ -1030,12 +1030,12 @@
  AArch64::AEK_CRC | AArch64::AEK_FP |
  AArch64::AEK_SIMD | AArch64::AEK_RAS |
  AArch64::AEK_LSE | AArch64::AEK_RDM |
- AArch64::AEK_RCPC | AArch64::AEK_SVE2 |
- AArch64::AEK_DOTPROD | AArch64::AEK_MTE |
- AArch64::AEK_PAUTH | AArch64::AEK_I8MM |
- AArch64::AEK_BF16 | AArch64::AEK_SVE2BITPERM |
- AArch64::AEK_SSBS | AArch64::AEK_SB |
- AArch64::AEK_FP16FML,
+ AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+ AArch64::AEK_MTE | AArch64::AEK_PAUTH |
+ AArch64::AEK_I8MM | AArch64::AEK_BF16 |
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 |
+ AArch64::AEK_SVE2BITPERM | AArch64::AEK_SSBS |
+ AArch64::AEK_SB | AArch64::AEK_FP16FML,
  "9-A"),
 ARMCPUTestParams("cyclone", "armv8-a", "crypto-neon-fp-armv8",
  AArch64::AEK_NONE | AArch64::AEK_CRYPTO |
Index: llvm/include/llvm/Support/AArch64TargetParser.def
===
--- llvm/include/llvm/Support/AArch64TargetParser.def
+++ llvm/include/llvm/Support/AArch64TargetParser.def
@@ -145,9 +145,10 @@
 AARCH64_CPU_NAME("cortex-a55", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
  (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC))
 AARCH64_CPU_NAME("cortex-a510", ARMV9A, FK_NEON_FP_ARMV8, false,
- (AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SVE2BITPERM |
+ (AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SB |
   AArch64::AEK_PAUTH | AArch64::AEK_MTE | AArch64::AEK_SSBS |
-  AArch64::AEK_SB | AArch64::AEK_FP16FML))
+  AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+  AArch64::AEK_FP16FML))
 AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
  (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("cortex-a65", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
@@ -188,8 +189,9 @@
   AArch64::AEK_SSBS))
 AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false,
  (AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM |
-  AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SVE2BITPERM |
-  AArch64::AEK_SB | AArch64::AEK_FP16FML))
+  AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB |
+  AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+  AArch64::AEK_FP16FML))
 AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
  (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS |
 

[clang] 26f5643 - [Clang][SVE] Properly enable/disable dependant SVE target features based upon +(no)sve.* options

2021-11-18 Thread Bradley Smith via cfe-commits

Author: Bradley Smith
Date: 2021-11-18T15:52:28Z
New Revision: 26f56438e3dab44cea4c8f16d4cb16e9424b02c6

URL: 
https://github.com/llvm/llvm-project/commit/26f56438e3dab44cea4c8f16d4cb16e9424b02c6
DIFF: 
https://github.com/llvm/llvm-project/commit/26f56438e3dab44cea4c8f16d4cb16e9424b02c6.diff

LOG: [Clang][SVE] Properly enable/disable dependant SVE target features based 
upon +(no)sve.* options

Co-authored-by: Graham Hunter 

Differential Revision: https://reviews.llvm.org/D113776

Added: 
clang/test/Driver/aarch64-implied-sve-features.c

Modified: 
clang/lib/Driver/ToolChains/Arch/AArch64.cpp
clang/test/Driver/aarch64-cpus.c
llvm/include/llvm/Support/AArch64TargetParser.def
llvm/unittests/Support/TargetParserTest.cpp

Removed: 




diff  --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp 
b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index b43edbe1b080b..0b60d097b9ca3 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -79,6 +79,25 @@ static bool DecodeAArch64Features(const Driver , StringRef 
text,
 else
   return false;
 
+if (Feature == "sve2")
+  Features.push_back("+sve");
+else if (Feature == "sve2-bitperm" || Feature == "sve2-sha3" ||
+ Feature == "sve2-aes" || Feature == "sve2-sm4") {
+  Features.push_back("+sve");
+  Features.push_back("+sve2");
+} else if (Feature == "nosve") {
+  Features.push_back("-sve2");
+  Features.push_back("-sve2-bitperm");
+  Features.push_back("-sve2-sha3");
+  Features.push_back("-sve2-aes");
+  Features.push_back("-sve2-sm4");
+} else if (Feature == "nosve2") {
+  Features.push_back("-sve2-bitperm");
+  Features.push_back("-sve2-sha3");
+  Features.push_back("-sve2-aes");
+  Features.push_back("-sve2-sm4");
+}
+
 // +sve implies +f32mm if the base architecture is v8.6A, v8.7A, v9.1A or
 // v9.2A. It isn't the case in general that sve implies both f64mm and 
f32mm
 if ((ArchKind == llvm::AArch64::ArchKind::ARMV8_6A ||
@@ -130,8 +149,20 @@ getAArch64ArchFeaturesFromMarch(const Driver , StringRef 
March,
 
   llvm::AArch64::ArchKind ArchKind = llvm::AArch64::parseArch(Split.first);
   if (ArchKind == llvm::AArch64::ArchKind::INVALID ||
-  !llvm::AArch64::getArchFeatures(ArchKind, Features) ||
-  (Split.second.size() &&
+  !llvm::AArch64::getArchFeatures(ArchKind, Features))
+return false;
+
+  // Enable SVE2 by default on Armv9-A.
+  // It can still be disabled if +nosve2 is present.
+  // We must do this early so that DecodeAArch64Features has the correct state
+  if ((ArchKind == llvm::AArch64::ArchKind::ARMV9A ||
+   ArchKind == llvm::AArch64::ArchKind::ARMV9_1A ||
+   ArchKind == llvm::AArch64::ArchKind::ARMV9_2A)) {
+Features.push_back("+sve");
+Features.push_back("+sve2");
+  }
+
+  if ((Split.second.size() &&
!DecodeAArch64Features(D, Split.second, Features, ArchKind)))
 return false;
 
@@ -419,14 +450,6 @@ void aarch64::getAArch64TargetFeatures(const Driver ,
   if (Pos != std::end(Features))
 Pos = Features.insert(std::next(Pos), {"+i8mm", "+bf16"});
 
-  // Enable SVE2 by default on Armv9-A.
-  // It can still be disabled if +nosve2 is present.
-  const char *SVE2Archs[] = {"+v9a", "+v9.1a", "+v9.2a"};
-  Pos = std::find_first_of(Features.begin(), Features.end(),
-   std::begin(SVE2Archs), std::end(SVE2Archs));
-  if (Pos != Features.end())
-Features.insert(++Pos, "+sve2");
-
   if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
options::OPT_munaligned_access)) {
 if (A->getOption().matches(options::OPT_mno_unaligned_access))

diff  --git a/clang/test/Driver/aarch64-cpus.c 
b/clang/test/Driver/aarch64-cpus.c
index 89cfb7e99a57e..4a377df99f925 100644
--- a/clang/test/Driver/aarch64-cpus.c
+++ b/clang/test/Driver/aarch64-cpus.c
@@ -809,7 +809,7 @@
 // RUN: %clang -target aarch64 -mlittle-endian -march=armv9-a -### -c %s 2>&1 
| FileCheck -check-prefix=GENERICV9A %s
 // RUN: %clang -target aarch64_be -mlittle-endian -march=armv9a -### -c %s 
2>&1 | FileCheck -check-prefix=GENERICV9A %s
 // RUN: %clang -target aarch64_be -mlittle-endian -march=armv9-a -### -c %s 
2>&1 | FileCheck -check-prefix=GENERICV9A %s
-// GENERICV9A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" 
"-target-feature" "+neon" "-target-feature" "+v9a" "-target-feature" "+sve2"

[PATCH] D113776: [Clang][SVE] Properly enable/disable dependant SVE target features based upon +(no)sve.* options

2021-11-17 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added inline comments.



Comment at: clang/lib/Driver/ToolChains/Arch/AArch64.cpp:82-88
+if (Feature == "sve2")
+  Features.push_back("+sve");
+else if (Feature == "sve2-bitperm" || Feature == "sve2-sha3" ||
+ Feature == "sve2-aes" || Feature == "sve2-sm4") {
+  Features.push_back("+sve");
+  Features.push_back("+sve2");
+} else if (Feature == "nosve") {

sdesmalen wrote:
> ^^^ Are the above changes necessary? i.e. if only `+sve2-sha3` is set as 
> target feature, I thought LLVM automatically infers that it requires `+sve` 
> and `+sve2`. Is that not the case?
That is the case yes, however this code is to catch combinations, for example 
`+sve2-bitperm+nosve2` needs to keep `sve` enabled, if +sve was never in the 
feature set this wouldn't be the case.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113776/new/

https://reviews.llvm.org/D113776

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113776: [Clang][SVE] Properly enable/disable dependant SVE target features based upon +(no)sve.* options

2021-11-17 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 387900.
bsmith added a comment.

- Use more brute force approach to ensure ordering is accounted for
  - This massively simplifies things and removes what was becoming very 
confusing logic
- Add tests for missing cases


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113776/new/

https://reviews.llvm.org/D113776

Files:
  clang/lib/Driver/ToolChains/Arch/AArch64.cpp
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/aarch64-implied-sve-features.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -903,11 +903,11 @@
  AArch64::AEK_SIMD | AArch64::AEK_RAS |
  AArch64::AEK_LSE | AArch64::AEK_RDM |
  AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
- AArch64::AEK_SVE2 | AArch64::AEK_BF16 |
- AArch64::AEK_I8MM | AArch64::AEK_SVE2BITPERM |
- AArch64::AEK_PAUTH | AArch64::AEK_MTE |
- AArch64::AEK_SSBS | AArch64::AEK_FP16FML |
- AArch64::AEK_SB,
+ AArch64::AEK_BF16 | AArch64::AEK_I8MM |
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 |
+ AArch64::AEK_SVE2BITPERM | AArch64::AEK_PAUTH |
+ AArch64::AEK_MTE | AArch64::AEK_SSBS |
+ AArch64::AEK_FP16FML | AArch64::AEK_SB,
  "9-A"),
 ARMCPUTestParams("cortex-a57", "armv8-a", "crypto-neon-fp-armv8",
  AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
@@ -1012,12 +1012,12 @@
  AArch64::AEK_CRC | AArch64::AEK_FP |
  AArch64::AEK_SIMD | AArch64::AEK_RAS |
  AArch64::AEK_LSE | AArch64::AEK_RDM |
- AArch64::AEK_RCPC | AArch64::AEK_SVE2 |
- AArch64::AEK_DOTPROD | AArch64::AEK_MTE |
- AArch64::AEK_PAUTH | AArch64::AEK_I8MM |
- AArch64::AEK_BF16 | AArch64::AEK_SVE2BITPERM |
- AArch64::AEK_SSBS | AArch64::AEK_SB |
- AArch64::AEK_FP16FML,
+ AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+ AArch64::AEK_MTE | AArch64::AEK_PAUTH |
+ AArch64::AEK_I8MM | AArch64::AEK_BF16 |
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 |
+ AArch64::AEK_SVE2BITPERM | AArch64::AEK_SSBS |
+ AArch64::AEK_SB | AArch64::AEK_FP16FML,
  "9-A"),
 ARMCPUTestParams("cyclone", "armv8-a", "crypto-neon-fp-armv8",
  AArch64::AEK_NONE | AArch64::AEK_CRYPTO |
Index: llvm/include/llvm/Support/AArch64TargetParser.def
===
--- llvm/include/llvm/Support/AArch64TargetParser.def
+++ llvm/include/llvm/Support/AArch64TargetParser.def
@@ -145,9 +145,10 @@
 AARCH64_CPU_NAME("cortex-a55", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
  (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC))
 AARCH64_CPU_NAME("cortex-a510", ARMV9A, FK_NEON_FP_ARMV8, false,
- (AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SVE2BITPERM |
+ (AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SB |
   AArch64::AEK_PAUTH | AArch64::AEK_MTE | AArch64::AEK_SSBS |
-  AArch64::AEK_SB | AArch64::AEK_FP16FML))
+  AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+  AArch64::AEK_FP16FML))
 AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
  (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("cortex-a65", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
@@ -184,8 +185,9 @@
   AArch64::AEK_SSBS))
 AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false,
  (AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM |
-  AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SVE2BITPERM |
-  AArch64::AEK_SB | AArch64::AEK_FP16FML))
+  AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB |
+  AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+  AArch64::AEK_FP16FML))
 AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
  (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS |
  

[PATCH] D113776: [Clang][SVE] Properly enable/disable dependant SVE target features based upon +(no)sve.* options

2021-11-16 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added inline comments.



Comment at: clang/lib/Driver/ToolChains/Arch/AArch64.cpp:73
 static bool DecodeAArch64Features(const Driver , StringRef text,
   std::vector ,
   llvm::AArch64::ArchKind ArchKind) {

sdesmalen wrote:
> does the order of the features matter?
> ```+sve,+nosve => disables sve
> +nosve,+sve => enables sve
> +nosve,+sve2 => enables sve and sve2```
> 
> 
Yes it does, but I believe that is the desired behaviour.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113776/new/

https://reviews.llvm.org/D113776

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113776: [Clang][SVE] Properly enable/disable dependant SVE target features based upon +(no)sve.* options

2021-11-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 387212.
bsmith edited the summary of this revision.
bsmith added a comment.
Herald added a subscriber: kristof.beyls.

- Fix duplicate arch feature in unit test
- Use enum class instead of plain enum with typedef


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113776/new/

https://reviews.llvm.org/D113776

Files:
  clang/lib/Driver/ToolChains/Arch/AArch64.cpp
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/aarch64-implied-sve-features.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -903,11 +903,11 @@
  AArch64::AEK_SIMD | AArch64::AEK_RAS |
  AArch64::AEK_LSE | AArch64::AEK_RDM |
  AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
- AArch64::AEK_SVE2 | AArch64::AEK_BF16 |
- AArch64::AEK_I8MM | AArch64::AEK_SVE2BITPERM |
- AArch64::AEK_PAUTH | AArch64::AEK_MTE |
- AArch64::AEK_SSBS | AArch64::AEK_FP16FML |
- AArch64::AEK_SB,
+ AArch64::AEK_BF16 | AArch64::AEK_I8MM |
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 |
+ AArch64::AEK_SVE2BITPERM | AArch64::AEK_PAUTH |
+ AArch64::AEK_MTE | AArch64::AEK_SSBS |
+ AArch64::AEK_FP16FML | AArch64::AEK_SB,
  "9-A"),
 ARMCPUTestParams("cortex-a57", "armv8-a", "crypto-neon-fp-armv8",
  AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
@@ -1012,12 +1012,12 @@
  AArch64::AEK_CRC | AArch64::AEK_FP |
  AArch64::AEK_SIMD | AArch64::AEK_RAS |
  AArch64::AEK_LSE | AArch64::AEK_RDM |
- AArch64::AEK_RCPC | AArch64::AEK_SVE2 |
- AArch64::AEK_DOTPROD | AArch64::AEK_MTE |
- AArch64::AEK_PAUTH | AArch64::AEK_I8MM |
- AArch64::AEK_BF16 | AArch64::AEK_SVE2BITPERM |
- AArch64::AEK_SSBS | AArch64::AEK_SB |
- AArch64::AEK_FP16FML,
+ AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+ AArch64::AEK_MTE | AArch64::AEK_PAUTH |
+ AArch64::AEK_I8MM | AArch64::AEK_BF16 |
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 |
+ AArch64::AEK_SVE2BITPERM | AArch64::AEK_SSBS |
+ AArch64::AEK_SB | AArch64::AEK_FP16FML,
  "9-A"),
 ARMCPUTestParams("cyclone", "armv8-a", "crypto-neon-fp-armv8",
  AArch64::AEK_NONE | AArch64::AEK_CRYPTO |
Index: llvm/include/llvm/Support/AArch64TargetParser.def
===
--- llvm/include/llvm/Support/AArch64TargetParser.def
+++ llvm/include/llvm/Support/AArch64TargetParser.def
@@ -145,9 +145,10 @@
 AARCH64_CPU_NAME("cortex-a55", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
  (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC))
 AARCH64_CPU_NAME("cortex-a510", ARMV9A, FK_NEON_FP_ARMV8, false,
- (AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SVE2BITPERM |
+ (AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SB |
   AArch64::AEK_PAUTH | AArch64::AEK_MTE | AArch64::AEK_SSBS |
-  AArch64::AEK_SB | AArch64::AEK_FP16FML))
+  AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+  AArch64::AEK_FP16FML))
 AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
  (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("cortex-a65", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
@@ -184,8 +185,9 @@
   AArch64::AEK_SSBS))
 AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false,
  (AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM |
-  AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SVE2BITPERM |
-  AArch64::AEK_SB | AArch64::AEK_FP16FML))
+  AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB |
+  AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+  AArch64::AEK_FP16FML))
 AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
  (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS |
 

[PATCH] D113776: [Clang][SVE] Properly enable/disable dependant SVE target features based upon +(no)sve.* options

2021-11-12 Thread Bradley Smith via Phabricator via cfe-commits
bsmith created this revision.
bsmith added reviewers: paulwalker-arm, peterwaller-arm, sdesmalen.
Herald added subscribers: psnobl, tschuett.
Herald added a reviewer: efriedma.
bsmith requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D113776

Files:
  clang/lib/Driver/ToolChains/Arch/AArch64.cpp
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/aarch64-implied-sve-features.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -904,7 +904,8 @@
  AArch64::AEK_LSE | AArch64::AEK_RDM |
  AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
  AArch64::AEK_SVE2 | AArch64::AEK_BF16 |
- AArch64::AEK_I8MM | AArch64::AEK_SVE2BITPERM |
+ AArch64::AEK_I8MM | AArch64::AEK_SVE |
+ AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
  AArch64::AEK_PAUTH | AArch64::AEK_MTE |
  AArch64::AEK_SSBS | AArch64::AEK_FP16FML |
  AArch64::AEK_SB,
@@ -1012,12 +1013,12 @@
  AArch64::AEK_CRC | AArch64::AEK_FP |
  AArch64::AEK_SIMD | AArch64::AEK_RAS |
  AArch64::AEK_LSE | AArch64::AEK_RDM |
- AArch64::AEK_RCPC | AArch64::AEK_SVE2 |
- AArch64::AEK_DOTPROD | AArch64::AEK_MTE |
- AArch64::AEK_PAUTH | AArch64::AEK_I8MM |
- AArch64::AEK_BF16 | AArch64::AEK_SVE2BITPERM |
- AArch64::AEK_SSBS | AArch64::AEK_SB |
- AArch64::AEK_FP16FML,
+ AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+ AArch64::AEK_MTE | AArch64::AEK_PAUTH |
+ AArch64::AEK_I8MM | AArch64::AEK_BF16 |
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 |
+ AArch64::AEK_SVE2BITPERM | AArch64::AEK_SSBS |
+ AArch64::AEK_SB | AArch64::AEK_FP16FML,
  "9-A"),
 ARMCPUTestParams("cyclone", "armv8-a", "crypto-neon-fp-armv8",
  AArch64::AEK_NONE | AArch64::AEK_CRYPTO |
Index: llvm/include/llvm/Support/AArch64TargetParser.def
===
--- llvm/include/llvm/Support/AArch64TargetParser.def
+++ llvm/include/llvm/Support/AArch64TargetParser.def
@@ -145,9 +145,10 @@
 AARCH64_CPU_NAME("cortex-a55", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
  (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC))
 AARCH64_CPU_NAME("cortex-a510", ARMV9A, FK_NEON_FP_ARMV8, false,
- (AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SVE2BITPERM |
+ (AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SB |
   AArch64::AEK_PAUTH | AArch64::AEK_MTE | AArch64::AEK_SSBS |
-  AArch64::AEK_SB | AArch64::AEK_FP16FML))
+  AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+  AArch64::AEK_FP16FML))
 AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
  (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("cortex-a65", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
@@ -184,8 +185,9 @@
   AArch64::AEK_SSBS))
 AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false,
  (AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM |
-  AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SVE2BITPERM |
-  AArch64::AEK_SB | AArch64::AEK_FP16FML))
+  AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB |
+  AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+  AArch64::AEK_FP16FML))
 AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
  (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS |
   AArch64::AEK_RCPC | AArch64::AEK_SSBS))
Index: clang/test/Driver/aarch64-implied-sve-features.c
===
--- /dev/null
+++ clang/test/Driver/aarch64-implied-sve-features.c
@@ -0,0 +1,65 @@
+// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+sve %s -### |& FileCheck %s --check-prefix=SVE-ONLY
+// SVE-ONLY: "-target-feature" "+sve"
+
+// RUN: %clang -target aarch64-linux-gnu -march=armv8-a+nosve %s 

[clang] 0ce46a1 - [AArch64][Driver][SVE] Allow -msve-vector-bits=+ syntax to mean no maximum vscale

2021-10-25 Thread Bradley Smith via cfe-commits

Author: Bradley Smith
Date: 2021-10-25T11:10:52Z
New Revision: 0ce46a1d43c6c2e0df429a6a80848d4acc781eb6

URL: 
https://github.com/llvm/llvm-project/commit/0ce46a1d43c6c2e0df429a6a80848d4acc781eb6
DIFF: 
https://github.com/llvm/llvm-project/commit/0ce46a1d43c6c2e0df429a6a80848d4acc781eb6.diff

LOG: [AArch64][Driver][SVE] Allow -msve-vector-bits=+ syntax to mean no 
maximum vscale

This patch splits the existing SveVectorBits LangOpt into VScaleMin and
VScaleMax LangOpts such that we can represent such an option. The cc1
option has also been split into -mvscale-{min,max}= options so that the
cc1 arguments better reflect the vscale_range IR attribute.

Differential Revision: https://reviews.llvm.org/D111790

Added: 


Modified: 
clang/include/clang/Basic/LangOptions.def
clang/include/clang/Driver/Options.td
clang/lib/AST/ASTContext.cpp
clang/lib/Basic/Targets/AArch64.cpp
clang/lib/Driver/ToolChains/Clang.cpp
clang/lib/Sema/SemaType.cpp
clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
clang/test/CodeGen/attr-arm-sve-vector-bits-types.c
clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp
clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp
clang/test/Driver/aarch64-sve-vector-bits.c
clang/test/Preprocessor/aarch64-target-features.c
clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c
clang/test/Sema/aarch64-sve-lax-vector-conversions.c
clang/test/Sema/attr-arm-sve-vector-bits.c
clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/LangOptions.def 
b/clang/include/clang/Basic/LangOptions.def
index 912fd0ec18961..4651f4fff6aa0 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -425,7 +425,8 @@ LANGOPT(SpeculativeLoadHardening, 1, 0, "Speculative load 
hardening enabled")
 LANGOPT(RelativeCXXABIVTables, 1, 0,
 "Use an ABI-incompatible v-table layout that uses relative references")
 
-LANGOPT(ArmSveVectorBits, 32, 0, "SVE vector size in bits")
+LANGOPT(VScaleMin, 32, 0, "Minimum vscale value")
+LANGOPT(VScaleMax, 32, 0, "Maximum vscale value")
 
 ENUM_LANGOPT(ExtendIntArgs, ExtendArgsKind, 1, ExtendArgsKind::ExtendTo32,
  "Controls how scalar integer arguments are extended in calls "

diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 25f3ddd97f12a..b4a2411fa5c5c 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3331,13 +3331,20 @@ foreach i = {8-15,18} in
   def fcall_saved_x#i : Flag<["-"], "fcall-saved-x"#i>, 
Group,
 HelpText<"Make the x"#i#" register call-saved (AArch64 only)">;
 
-def msve_vector_bits_EQ : Joined<["-"], "msve-vector-bits=">,
-  Group, Flags<[NoXarchOption,CC1Option]>,
+def msve_vector_bits_EQ : Joined<["-"], "msve-vector-bits=">, 
Group,
   HelpText<"Specify the size in bits of an SVE vector register. Defaults to 
the"
-   " vector length agnostic value of \"scalable\". (AArch64 only)">,
-  Values<"128,256,512,1024,2048,scalable">,
-  NormalizedValues<["128", "256", "512", "1024", "2048", "0"]>,
-  MarshallingInfoEnum, "0">;
+   " vector length agnostic value of \"scalable\". (AArch64 only)">;
+
+def mvscale_min_EQ : Joined<["-"], "mvscale-min=">,
+  Group, Flags<[NoXarchOption,CC1Option]>,
+  HelpText<"Specify the vscale minimum. Defaults to the"
+   " vector length agnostic value of \"0\". (AArch64 only)">,
+  MarshallingInfoInt>;
+def mvscale_max_EQ : Joined<["-"], "mvscale-max=">,
+  Group, Flags<[NoXarchOption,CC1Option]>,
+  HelpText<"Specify the vscale maximum. Defaults to the"
+   " vector length agnostic value of \"0\". (AArch64 only)">,
+  MarshallingInfoInt>;
 
 def msign_return

[PATCH] D111790: [AArch64][Driver][SVE] Allow -msve-vector-bits=+ syntax to mean no maximum vscale

2021-10-25 Thread Bradley Smith via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG0ce46a1d43c6: [AArch64][Driver][SVE] Allow 
-msve-vector-bits=n+ syntax to mean no maximum… (authored by bsmith).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111790/new/

https://reviews.llvm.org/D111790

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/AST/ASTContext.cpp
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Sema/SemaType.cpp
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
  clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
  clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-types.c
  clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp
  clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp
  clang/test/Driver/aarch64-sve-vector-bits.c
  clang/test/Preprocessor/aarch64-target-features.c
  clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c
  clang/test/Sema/aarch64-sve-lax-vector-conversions.c
  clang/test/Sema/attr-arm-sve-vector-bits.c
  clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
  clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
  clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp

Index: clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
===
--- clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
+++ clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -msve-vector-bits=512 -fallow-half-arguments-and-returns -Wconversion %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -mvscale-min=4 -mvscale-max=4 -fallow-half-arguments-and-returns -Wconversion %s
 // expected-no-diagnostics
 
 #include 
Index: clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
===
--- clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
+++ clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-none %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=integer -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=all -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-all %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-none %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=integer -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=all -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-all %s
 
 #include 
 
Index: clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
===
--- clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
+++ clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=128 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=256 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: 

[PATCH] D111790: [AArch64][Driver][SVE] Allow -msve-vector-bits=+ syntax to mean no maximum vscale

2021-10-22 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 381511.
bsmith added a comment.

- Don't define SVE target bits macros when vscale min != max
- Add tests for above change
- Use correct (unsigned) version of getAsInteger


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111790/new/

https://reviews.llvm.org/D111790

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/AST/ASTContext.cpp
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Sema/SemaType.cpp
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
  clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
  clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-types.c
  clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp
  clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp
  clang/test/Driver/aarch64-sve-vector-bits.c
  clang/test/Preprocessor/aarch64-target-features.c
  clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c
  clang/test/Sema/aarch64-sve-lax-vector-conversions.c
  clang/test/Sema/attr-arm-sve-vector-bits.c
  clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
  clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
  clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp

Index: clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
===
--- clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
+++ clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -msve-vector-bits=512 -fallow-half-arguments-and-returns -Wconversion %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -mvscale-min=4 -mvscale-max=4 -fallow-half-arguments-and-returns -Wconversion %s
 // expected-no-diagnostics
 
 #include 
Index: clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
===
--- clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
+++ clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-none %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=integer -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=all -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-all %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-none %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=integer -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=all -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-all %s
 
 #include 
 
Index: clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
===
--- clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
+++ clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=128 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=256 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature 

[PATCH] D111790: [AArch64][Driver][SVE] Allow -msve-vector-bits=+ syntax to mean no maximum vscale

2021-10-18 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 380330.
bsmith added a comment.

- Avoid side-effects in assertions


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111790/new/

https://reviews.llvm.org/D111790

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/AST/ASTContext.cpp
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Sema/SemaType.cpp
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
  clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
  clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-types.c
  clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp
  clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp
  clang/test/Driver/aarch64-sve-vector-bits.c
  clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c
  clang/test/Sema/aarch64-sve-lax-vector-conversions.c
  clang/test/Sema/attr-arm-sve-vector-bits.c
  clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
  clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
  clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp

Index: clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
===
--- clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
+++ clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -msve-vector-bits=512 -fallow-half-arguments-and-returns -Wconversion %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -mvscale-min=4 -mvscale-max=4 -fallow-half-arguments-and-returns -Wconversion %s
 // expected-no-diagnostics
 
 #include 
Index: clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
===
--- clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
+++ clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-none %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=integer -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=all -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-all %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-none %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=integer -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=all -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-all %s
 
 #include 
 
Index: clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
===
--- clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
+++ clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=128 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=256 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=1024 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 

[PATCH] D111790: [AArch64][Driver][SVE] Allow -msve-vector-bits=+ syntax to mean no maximum vscale

2021-10-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 380015.
bsmith added a comment.

- Update sema checking for sve_vector_bits attribute to emit an error when the 
vscale min != max, i.e. when -mvse-vector-bits=+ is used
- Add test to cover the above case


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111790/new/

https://reviews.llvm.org/D111790

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/AST/ASTContext.cpp
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Sema/SemaType.cpp
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
  clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
  clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-types.c
  clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp
  clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp
  clang/test/Driver/aarch64-sve-vector-bits.c
  clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c
  clang/test/Sema/aarch64-sve-lax-vector-conversions.c
  clang/test/Sema/attr-arm-sve-vector-bits.c
  clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
  clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
  clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp

Index: clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
===
--- clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
+++ clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -msve-vector-bits=512 -fallow-half-arguments-and-returns -Wconversion %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -mvscale-min=4 -mvscale-max=4 -fallow-half-arguments-and-returns -Wconversion %s
 // expected-no-diagnostics
 
 #include 
Index: clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
===
--- clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
+++ clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-none %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=integer -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=all -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-all %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-none %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=integer -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=all -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-all %s
 
 #include 
 
Index: clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
===
--- clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
+++ clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=128 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=256 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 

[PATCH] D111790: [AArch64][Driver][SVE] Allow -msve-vector-bits=+ syntax to mean no maximum vscale

2021-10-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added inline comments.



Comment at: clang/lib/Sema/SemaType.cpp:7916
   // The attribute vector size must match -msve-vector-bits.
-  if (VecSize != S.getLangOpts().ArmSveVectorBits) {
+  if (VecSize != S.getLangOpts().VScaleMin * 128) {
 S.Diag(Attr.getLoc(), diag::err_attribute_bad_sve_vector_size)

bsmith wrote:
> paulwalker-arm wrote:
> > I'm thinking there should be a check for `S.getLangOpts().VScaleMin == 
> > S.getLangOpts().VScaleMax` somewhere, but I guess the current diagnostic 
> > probably doesn't account for two values, nor should it really.  What about 
> > just duplicating this block but using `VScaleMax` in place of `VScaleMin`?
> I think I would rather have a separate check for min == max, and output a 
> diagnostic stating that an exact -msve-vector-bits value must be used, does 
> that sound sensible?
> 
> This could then perhaps be combined with the previous comment to produce 
> sensible output, i.e.:
> 
> !vscale_min -> not supported
> vscale_min != vscale_max -> must be fixed value
Actually, now that I re-read the error message, and the fact that is directly 
refers to the valid values, I think instead there should just be an initial 
check of `min == 0 || min != max`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111790/new/

https://reviews.llvm.org/D111790

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D111790: [AArch64][Driver][SVE] Allow -msve-vector-bits=+ syntax to mean no maximum vscale

2021-10-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added inline comments.



Comment at: clang/lib/Sema/SemaType.cpp:7916
   // The attribute vector size must match -msve-vector-bits.
-  if (VecSize != S.getLangOpts().ArmSveVectorBits) {
+  if (VecSize != S.getLangOpts().VScaleMin * 128) {
 S.Diag(Attr.getLoc(), diag::err_attribute_bad_sve_vector_size)

paulwalker-arm wrote:
> I'm thinking there should be a check for `S.getLangOpts().VScaleMin == 
> S.getLangOpts().VScaleMax` somewhere, but I guess the current diagnostic 
> probably doesn't account for two values, nor should it really.  What about 
> just duplicating this block but using `VScaleMax` in place of `VScaleMin`?
I think I would rather have a separate check for min == max, and output a 
diagnostic stating that an exact -msve-vector-bits value must be used, does 
that sound sensible?

This could then perhaps be combined with the previous comment to produce 
sensible output, i.e.:

!vscale_min -> not supported
vscale_min != vscale_max -> must be fixed value


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111790/new/

https://reviews.llvm.org/D111790

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D111790: [AArch64][Driver][SVE] Allow -msve-vector-bits=+ syntax to mean no maximum vscale

2021-10-14 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 379701.
bsmith added a comment.

- Remove mention of 128-bit chunks from help texts
- Allow any positive integer value for -mvscale-{min,max}, not just powers of 2


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111790/new/

https://reviews.llvm.org/D111790

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/AST/ASTContext.cpp
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Sema/SemaType.cpp
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
  clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
  clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-types.c
  clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp
  clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp
  clang/test/Driver/aarch64-sve-vector-bits.c
  clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c
  clang/test/Sema/aarch64-sve-lax-vector-conversions.c
  clang/test/Sema/attr-arm-sve-vector-bits.c
  clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
  clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
  clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp

Index: clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
===
--- clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
+++ clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -msve-vector-bits=512 -fallow-half-arguments-and-returns -Wconversion %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -mvscale-min=4 -mvscale-max=4 -fallow-half-arguments-and-returns -Wconversion %s
 // expected-no-diagnostics
 
 #include 
Index: clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
===
--- clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
+++ clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-none %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=integer -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=all -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-all %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-none %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=integer -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=all -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-all %s
 
 #include 
 
Index: clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
===
--- clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
+++ clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=128 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=256 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=1024 -flax-vector-conversions=none 

[PATCH] D111790: [AArch64][Driver][SVE] Allow -msve-vector-bits=+ syntax to mean no maximum vscale

2021-10-14 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added a comment.

In D111790#3063698 , @paulwalker-arm 
wrote:

> Are the references to "128-bit chunks" for the vscale flags necessary?  
> That's really a nuisance of SVE that LLVM IR should not need to worry about.  
> Can we speak exclusively in terms of vscale or is the "multiples of 128" 
> required somewhere?  Perhaps we're missing a target specific convert function 
> from vscale+elt to bytes or something.  Also there's nothing stoping vscale 
> from being 3 (essentially any positive number) but you look to be restricting 
> it to a power of two.

It does feel like we should have something somewhere that specifies the vscale 
chunk as I've had to hardcode 128 in a lot of places (thought they are SVE 
specific places, so I'll remove mention of it from the helptext). As for vscale 
values allowed, if we don't validate the values used in the option itself, 
where would we do it? I guess we could just blindly propagate the value down to 
the IR attribute and let the backends figure out what they want. The issue then 
is providing a nice error, though perhaps we don't need one given they are cc1 
options?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111790/new/

https://reviews.llvm.org/D111790

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D111790: [AArch64][Driver][SVE] Allow -msve-vector-bits=+ syntax to mean no maximum vscale

2021-10-14 Thread Bradley Smith via Phabricator via cfe-commits
bsmith created this revision.
bsmith added reviewers: paulwalker-arm, peterwaller-arm, sdesmalen.
Herald added subscribers: ctetreau, dexonsmith, dang, psnobl, kristof.beyls, 
tschuett.
Herald added a reviewer: efriedma.
bsmith requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

This patch splits the existing SveVectorBits LangOpt into VScaleMin and
VScaleMax LangOpts such that we can represent such an option. The cc1
option has also been split into -mvscale-{min,max}= options so that the
cc1 arguments better reflect the vscale_range IR attribute.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D111790

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/AST/ASTContext.cpp
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Sema/SemaType.cpp
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
  clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
  clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
  clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-types.c
  clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp
  clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp
  clang/test/Driver/aarch64-sve-vector-bits.c
  clang/test/Sema/aarch64-sve-explicit-casts-fixed-size.c
  clang/test/Sema/aarch64-sve-lax-vector-conversions.c
  clang/test/Sema/attr-arm-sve-vector-bits.c
  clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
  clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
  clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp

Index: clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
===
--- clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
+++ clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -msve-vector-bits=512 -fallow-half-arguments-and-returns -Wconversion %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -mvscale-min=4 -mvscale-max=4 -fallow-half-arguments-and-returns -Wconversion %s
 // expected-no-diagnostics
 
 #include 
Index: clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
===
--- clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
+++ clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-none %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=integer -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -flax-vector-conversions=all -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-all %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-none %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=integer -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=all -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify=lax-vector-all %s
 
 #include 
 
Index: clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
===
--- clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
+++ clang/test/SemaCXX/aarch64-sve-explicit-casts-fixed-size.cpp
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=128 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=256 -flax-vector-conversions=none -fallow-half-arguments-and-returns -ffreestanding -fsyntax-only 

[PATCH] D106860: [clang][AArch64][SVE] Avoid going through memory for fixed/scalable predicate casts

2021-08-04 Thread Bradley Smith via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGe57e1e4e0026: [clang][AArch64][SVE] Avoid going through 
memory for fixed/scalable predicate… (authored by bsmith).

Changed prior to commit:
  https://reviews.llvm.org/D106860?vs=363093=364153#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106860/new/

https://reviews.llvm.org/D106860

Files:
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c

Index: clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
===
--- clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
@@ -49,20 +49,16 @@
 
 // CHECK-128-LABEL: @write_global_bool(
 // CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca , align 2
-// CHECK-128-NEXT:store  [[V:%.*]], * [[SAVED_VALUE]], align 2, !tbaa [[TBAA9:![0-9]+]]
-// CHECK-128-NEXT:[[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_VALUE]] to <2 x i8>*
-// CHECK-128-NEXT:[[TMP0:%.*]] = load <2 x i8>, <2 x i8>* [[CASTFIXEDSVE]], align 2, !tbaa [[TBAA6]]
-// CHECK-128-NEXT:store <2 x i8> [[TMP0]], <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[TMP0:%.*]] = bitcast  [[V:%.*]] to 
+// CHECK-128-NEXT:[[CASTFIXEDSVE:%.*]] = call <2 x i8> @llvm.experimental.vector.extract.v2i8.nxv2i8( [[TMP0]], i64 0)
+// CHECK-128-NEXT:store <2 x i8> [[CASTFIXEDSVE]], <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6:![0-9]+]]
 // CHECK-128-NEXT:ret void
 //
 // CHECK-512-LABEL: @write_global_bool(
 // CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:[[SAVED_VALUE:%.*]] = alloca , align 8
-// CHECK-512-NEXT:store  [[V:%.*]], * [[SAVED_VALUE]], align 8, !tbaa [[TBAA9:![0-9]+]]
-// CHECK-512-NEXT:[[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_VALUE]] to <8 x i8>*
-// CHECK-512-NEXT:[[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 8, !tbaa [[TBAA6]]
-// CHECK-512-NEXT:store <8 x i8> [[TMP0]], <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
+// CHECK-512-NEXT:[[TMP0:%.*]] = bitcast  [[V:%.*]] to 
+// CHECK-512-NEXT:[[CASTFIXEDSVE:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8( [[TMP0]], i64 0)
+// CHECK-512-NEXT:store <8 x i8> [[CASTFIXEDSVE]], <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
 // CHECK-512-NEXT:ret void
 //
 void write_global_bool(svbool_t v) { global_bool = v; }
@@ -101,20 +97,16 @@
 
 // CHECK-128-LABEL: @read_global_bool(
 // CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <2 x i8>, align 2
 // CHECK-128-NEXT:[[TMP0:%.*]] = load <2 x i8>, <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
-// CHECK-128-NEXT:store <2 x i8> [[TMP0]], <2 x i8>* [[SAVED_VALUE]], align 2, !tbaa [[TBAA6]]
-// CHECK-128-NEXT:[[CASTFIXEDSVE:%.*]] = bitcast <2 x i8>* [[SAVED_VALUE]] to *
-// CHECK-128-NEXT:[[TMP1:%.*]] = load , * [[CASTFIXEDSVE]], align 2, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[CASTFIXEDSVE:%.*]] = call  @llvm.experimental.vector.insert.nxv2i8.v2i8( undef, <2 x i8> [[TMP0]], i64 0)
+// CHECK-128-NEXT:[[TMP1:%.*]] = bitcast  [[CASTFIXEDSVE]] to 
 // CHECK-128-NEXT:ret  [[TMP1]]
 //
 // CHECK-512-LABEL: @read_global_bool(
 // CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:[[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8
 // CHECK-512-NEXT:[[TMP0:%.*]] = load <8 x i8>, <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
-// CHECK-512-NEXT:store <8 x i8> [[TMP0]], <8 x i8>* [[SAVED_VALUE]], align 8, !tbaa [[TBAA6]]
-// CHECK-512-NEXT:[[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to *
-// CHECK-512-NEXT:[[TMP1:%.*]] = load , * [[CASTFIXEDSVE]], align 8, !tbaa [[TBAA6]]
+// CHECK-512-NEXT:[[CASTFIXEDSVE:%.*]] = call  @llvm.experimental.vector.insert.nxv2i8.v8i8( undef, <8 x i8> [[TMP0]], i64 0)
+// CHECK-512-NEXT:[[TMP1:%.*]] = bitcast  [[CASTFIXEDSVE]] to 
 // CHECK-512-NEXT:ret  [[TMP1]]
 //
 svbool_t read_global_bool() { return global_bool; }
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
===
--- clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
@@ -18,19 +18,15 @@
 // CHECK-NEXT:[[PRED_ADDR:%.*]] = alloca , align 2
 // CHECK-NEXT:[[VEC_ADDR:%.*]] = alloca , align 16
 // CHECK-NEXT:[[PG:%.*]] = alloca , align 2
-// CHECK-NEXT:[[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8
-// CHECK-NEXT:[[SAVED_VALUE1:%.*]] = alloca <8 x i8>, align 8
 // 

[clang] e57e1e4 - [clang][AArch64][SVE] Avoid going through memory for fixed/scalable predicate casts

2021-08-04 Thread Bradley Smith via cfe-commits

Author: Bradley Smith
Date: 2021-08-04T16:10:37Z
New Revision: e57e1e4e00264b77b2b35ad2bf419a48aecdd6bc

URL: 
https://github.com/llvm/llvm-project/commit/e57e1e4e00264b77b2b35ad2bf419a48aecdd6bc
DIFF: 
https://github.com/llvm/llvm-project/commit/e57e1e4e00264b77b2b35ad2bf419a48aecdd6bc.diff

LOG: [clang][AArch64][SVE] Avoid going through memory for fixed/scalable 
predicate casts

For fixed SVE types, predicates are represented using vectors of i8,
where as for scalable types they are represented using vectors of i1. We
can avoid going through memory for casts between these by bitcasting the
i1 scalable vectors to/from a scalable i8 vector of matching size, which
can then use the existing vector insert/extract logic.

Differential Revision: https://reviews.llvm.org/D106860

Added: 


Modified: 
clang/lib/CodeGen/CGCall.cpp
clang/lib/CodeGen/CGExprScalar.cpp
clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c

Removed: 




diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 47a4ed35be85e..1296dfa18b9a5 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1271,12 +1271,26 @@ static llvm::Value *CreateCoercedLoad(Address Src, 
llvm::Type *Ty,
   // perform the conversion.
   if (auto *ScalableDst = dyn_cast(Ty)) {
 if (auto *FixedSrc = dyn_cast(SrcTy)) {
+  // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate
+  // vector, use a vector insert and bitcast the result.
+  bool NeedsBitcast = false;
+  auto PredType =
+  llvm::ScalableVectorType::get(CGF.Builder.getInt1Ty(), 16);
+  llvm::Type *OrigType = Ty;
+  if (ScalableDst == PredType &&
+  FixedSrc->getElementType() == CGF.Builder.getInt8Ty()) {
+ScalableDst = llvm::ScalableVectorType::get(CGF.Builder.getInt8Ty(), 
2);
+NeedsBitcast = true;
+  }
   if (ScalableDst->getElementType() == FixedSrc->getElementType()) {
 auto *Load = CGF.Builder.CreateLoad(Src);
 auto *UndefVec = llvm::UndefValue::get(ScalableDst);
 auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
-return CGF.Builder.CreateInsertVector(ScalableDst, UndefVec, Load, 
Zero,
-  "castScalableSve");
+llvm::Value *Result = CGF.Builder.CreateInsertVector(
+ScalableDst, UndefVec, Load, Zero, "castScalableSve");
+if (NeedsBitcast)
+  Result = CGF.Builder.CreateBitCast(Result, OrigType);
+return Result;
   }
 }
   }
@@ -2857,9 +2871,18 @@ void CodeGenFunction::EmitFunctionProlog(const 
CGFunctionInfo ,
   // llvm.experimental.vector.extract to convert back to the original
   // VLST.
   if (auto *VecTyTo = dyn_cast(ConvertType(Ty))) {
-auto *Coerced = Fn->getArg(FirstIRArg);
+llvm::Value *Coerced = Fn->getArg(FirstIRArg);
 if (auto *VecTyFrom =
 dyn_cast(Coerced->getType())) {
+  // If we are casting a scalable 16 x i1 predicate vector to a fixed 
i8
+  // vector, bitcast the source and use a vector extract.
+  auto PredType =
+  llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
+  if (VecTyFrom == PredType &&
+  VecTyTo->getElementType() == Builder.getInt8Ty()) {
+VecTyFrom = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
+Coerced = Builder.CreateBitCast(Coerced, VecTyFrom);
+  }
   if (VecTyFrom->getElementType() == VecTyTo->getElementType()) {
 llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);
 

diff  --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index 418f23bd1a97b..e47701915f2f4 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2063,11 +2063,25 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
 // perform the bitcast.
 if (const auto *FixedSrc = dyn_cast(SrcTy)) {
   if (const auto *ScalableDst = dyn_cast(DstTy)) 
{
+// If we are casting a fixed i8 vector to a scalable 16 x i1 predicate
+// vector, use a vector insert and bitcast the result.
+bool NeedsBitCast = false;
+auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
+llvm::Type *OrigType = DstTy;
+if (ScalableDst == PredType &&
+FixedSrc->getElementType() == Builder.getInt8Ty()) {
+  DstTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
+  ScalableDst = dyn_cast(DstTy);
+ 

[PATCH] D106860: [clang][AArch64][SVE] Avoid going through memory for fixed/scalable predicate casts

2021-07-30 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 363093.
bsmith marked an inline comment as done.
bsmith added a comment.

- Update comment to reflect changes
- Add new test for lax casting via memory


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106860/new/

https://reviews.llvm.org/D106860

Files:
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c

Index: clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
===
--- clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
@@ -49,20 +49,16 @@
 
 // CHECK-128-LABEL: @write_global_bool(
 // CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca , align 16
-// CHECK-128-NEXT:store  [[V:%.*]], * [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
-// CHECK-128-NEXT:[[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_VALUE]] to <2 x i8>*
-// CHECK-128-NEXT:[[TMP0:%.*]] = load <2 x i8>, <2 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
-// CHECK-128-NEXT:store <2 x i8> [[TMP0]], <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[TMP0:%.*]] = bitcast  [[V:%.*]] to 
+// CHECK-128-NEXT:[[CASTFIXEDSVE:%.*]] = call <2 x i8> @llvm.experimental.vector.extract.v2i8.nxv2i8( [[TMP0]], i64 0)
+// CHECK-128-NEXT:store <2 x i8> [[CASTFIXEDSVE]], <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6:![0-9]+]]
 // CHECK-128-NEXT:ret void
 //
 // CHECK-512-LABEL: @write_global_bool(
 // CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:[[SAVED_VALUE:%.*]] = alloca , align 16
-// CHECK-512-NEXT:store  [[V:%.*]], * [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
-// CHECK-512-NEXT:[[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_VALUE]] to <8 x i8>*
-// CHECK-512-NEXT:[[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
-// CHECK-512-NEXT:store <8 x i8> [[TMP0]], <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
+// CHECK-512-NEXT:[[TMP0:%.*]] = bitcast  [[V:%.*]] to 
+// CHECK-512-NEXT:[[CASTFIXEDSVE:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8( [[TMP0]], i64 0)
+// CHECK-512-NEXT:store <8 x i8> [[CASTFIXEDSVE]], <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
 // CHECK-512-NEXT:ret void
 //
 void write_global_bool(svbool_t v) { global_bool = v; }
@@ -101,20 +97,16 @@
 
 // CHECK-128-LABEL: @read_global_bool(
 // CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <2 x i8>, align 16
 // CHECK-128-NEXT:[[TMP0:%.*]] = load <2 x i8>, <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
-// CHECK-128-NEXT:store <2 x i8> [[TMP0]], <2 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
-// CHECK-128-NEXT:[[CASTFIXEDSVE:%.*]] = bitcast <2 x i8>* [[SAVED_VALUE]] to *
-// CHECK-128-NEXT:[[TMP1:%.*]] = load , * [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[CASTFIXEDSVE:%.*]] = call  @llvm.experimental.vector.insert.nxv2i8.v2i8( undef, <2 x i8> [[TMP0]], i64 0)
+// CHECK-128-NEXT:[[TMP1:%.*]] = bitcast  [[CASTFIXEDSVE]] to 
 // CHECK-128-NEXT:ret  [[TMP1]]
 //
 // CHECK-512-LABEL: @read_global_bool(
 // CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:[[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 16
 // CHECK-512-NEXT:[[TMP0:%.*]] = load <8 x i8>, <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
-// CHECK-512-NEXT:store <8 x i8> [[TMP0]], <8 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
-// CHECK-512-NEXT:[[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to *
-// CHECK-512-NEXT:[[TMP1:%.*]] = load , * [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
+// CHECK-512-NEXT:[[CASTFIXEDSVE:%.*]] = call  @llvm.experimental.vector.insert.nxv2i8.v8i8( undef, <8 x i8> [[TMP0]], i64 0)
+// CHECK-512-NEXT:[[TMP1:%.*]] = bitcast  [[CASTFIXEDSVE]] to 
 // CHECK-512-NEXT:ret  [[TMP1]]
 //
 svbool_t read_global_bool() { return global_bool; }
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
===
--- clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
@@ -18,19 +18,15 @@
 // CHECK-NEXT:[[PRED_ADDR:%.*]] = alloca , align 2
 // CHECK-NEXT:[[VEC_ADDR:%.*]] = alloca , align 16
 // CHECK-NEXT:[[PG:%.*]] = alloca , align 2
-// CHECK-NEXT:[[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8
-// CHECK-NEXT:[[SAVED_VALUE1:%.*]] = alloca <8 x i8>, align 8
 // CHECK-NEXT:store  [[PRED:%.*]], * [[PRED_ADDR]], align 2
 // CHECK-NEXT:store  [[VEC:%.*]], * [[VEC_ADDR]], align 16
 // CHECK-NEXT:

[PATCH] D106860: [clang][AArch64][SVE] Avoid going through memory for fixed/scalable predicate casts

2021-07-29 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added inline comments.



Comment at: clang/lib/CodeGen/CGExprScalar.cpp:2102
+  Src = Builder.CreateBitCast(Src, SrcTy);
+}
 if (ScalableSrc->getElementType() == FixedDst->getElementType()) {

junparser wrote:
> I think this may also works for casting between vectors with different 
> element types.
A similar argument applies here as the other related ticket, in principal we 
could, however it's not clear that there is a good use case for writing code 
that would make use of this. So for now it's probably best to just deal with 
predicates which are definitely a problem and other cases as they arise.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106860/new/

https://reviews.llvm.org/D106860

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106860: [clang][AArch64][SVE] Avoid going through memory for fixed/scalable predicate casts

2021-07-28 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added inline comments.



Comment at: clang/lib/CodeGen/CGExprScalar.cpp:2110-2129
 // Perform VLAT <-> VLST bitcast through memory.
 // TODO: since the llvm.experimental.vector.{insert,extract} intrinsics
 //   require the element types of the vectors to be the same, we
 //   need to keep this around for casting between predicates, or more
 //   generally for bitcasts between VLAT <-> VLST where the element
 //   types of the vectors are not the same, until we figure out a 
better
 //   way of doing these casts.

c-rhodes wrote:
> With the predicate casting now using the intrinsics I don't think this is 
> needed any longer. Perhaps we should add an unreachable above if the element 
> type doesn't match?
Don't we still need this for casting between vectors with different element 
types, or are these guaranteed to not hit this code path?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106860/new/

https://reviews.llvm.org/D106860

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106860: [clang][AArch64][SVE] Avoid going through memory for fixed/scalable predicate casts

2021-07-27 Thread Bradley Smith via Phabricator via cfe-commits
bsmith created this revision.
bsmith added reviewers: paulwalker-arm, peterwaller-arm, eli.friedman, 
junparser.
Herald added subscribers: psnobl, kristof.beyls, tschuett.
Herald added a reviewer: efriedma.
bsmith requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

For fixed SVE types, predicates are represented using vectors of i8,
where as for scalable types they are represented using vectors of i1. We
can avoid going through memory for casts between these by bitcasting the
i1 scalable vectors to/from a scalable i8 vector of matching size, which
can then use the existing vector insert/extract logic.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D106860

Files:
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
  clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c

Index: clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
===
--- clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
@@ -49,20 +49,16 @@
 
 // CHECK-128-LABEL: @write_global_bool(
 // CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca , align 16
-// CHECK-128-NEXT:store  [[V:%.*]], * [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
-// CHECK-128-NEXT:[[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_VALUE]] to <2 x i8>*
-// CHECK-128-NEXT:[[TMP0:%.*]] = load <2 x i8>, <2 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
-// CHECK-128-NEXT:store <2 x i8> [[TMP0]], <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[TMP0:%.*]] = bitcast  [[V:%.*]] to 
+// CHECK-128-NEXT:[[CASTFIXEDSVE:%.*]] = call <2 x i8> @llvm.experimental.vector.extract.v2i8.nxv2i8( [[TMP0]], i64 0)
+// CHECK-128-NEXT:store <2 x i8> [[CASTFIXEDSVE]], <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6:![0-9]+]]
 // CHECK-128-NEXT:ret void
 //
 // CHECK-512-LABEL: @write_global_bool(
 // CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:[[SAVED_VALUE:%.*]] = alloca , align 16
-// CHECK-512-NEXT:store  [[V:%.*]], * [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
-// CHECK-512-NEXT:[[CASTFIXEDSVE:%.*]] = bitcast * [[SAVED_VALUE]] to <8 x i8>*
-// CHECK-512-NEXT:[[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
-// CHECK-512-NEXT:store <8 x i8> [[TMP0]], <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
+// CHECK-512-NEXT:[[TMP0:%.*]] = bitcast  [[V:%.*]] to 
+// CHECK-512-NEXT:[[CASTFIXEDSVE:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv2i8( [[TMP0]], i64 0)
+// CHECK-512-NEXT:store <8 x i8> [[CASTFIXEDSVE]], <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
 // CHECK-512-NEXT:ret void
 //
 void write_global_bool(svbool_t v) { global_bool = v; }
@@ -101,20 +97,16 @@
 
 // CHECK-128-LABEL: @read_global_bool(
 // CHECK-128-NEXT:  entry:
-// CHECK-128-NEXT:[[SAVED_VALUE:%.*]] = alloca <2 x i8>, align 16
 // CHECK-128-NEXT:[[TMP0:%.*]] = load <2 x i8>, <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
-// CHECK-128-NEXT:store <2 x i8> [[TMP0]], <2 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
-// CHECK-128-NEXT:[[CASTFIXEDSVE:%.*]] = bitcast <2 x i8>* [[SAVED_VALUE]] to *
-// CHECK-128-NEXT:[[TMP1:%.*]] = load , * [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
+// CHECK-128-NEXT:[[CASTFIXEDSVE:%.*]] = call  @llvm.experimental.vector.insert.nxv2i8.v2i8( undef, <2 x i8> [[TMP0]], i64 0)
+// CHECK-128-NEXT:[[TMP1:%.*]] = bitcast  [[CASTFIXEDSVE]] to 
 // CHECK-128-NEXT:ret  [[TMP1]]
 //
 // CHECK-512-LABEL: @read_global_bool(
 // CHECK-512-NEXT:  entry:
-// CHECK-512-NEXT:[[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 16
 // CHECK-512-NEXT:[[TMP0:%.*]] = load <8 x i8>, <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
-// CHECK-512-NEXT:store <8 x i8> [[TMP0]], <8 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
-// CHECK-512-NEXT:[[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to *
-// CHECK-512-NEXT:[[TMP1:%.*]] = load , * [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
+// CHECK-512-NEXT:[[CASTFIXEDSVE:%.*]] = call  @llvm.experimental.vector.insert.nxv2i8.v8i8( undef, <8 x i8> [[TMP0]], i64 0)
+// CHECK-512-NEXT:[[TMP1:%.*]] = bitcast  [[CASTFIXEDSVE]] to 
 // CHECK-512-NEXT:ret  [[TMP1]]
 //
 svbool_t read_global_bool() { return global_bool; }
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
===
--- clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
@@ -18,19 +18,15 @@
 // CHECK-NEXT:[[PRED_ADDR:%.*]] = 

[PATCH] D106277: [SVE] Remove the interface for getMaxVScale in favour of the IR attributes

2021-07-19 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added inline comments.



Comment at: clang/lib/CodeGen/CodeGenFunction.cpp:505-506
+  } else if (getContext().getTargetInfo().hasFeature("sve")) {
+CurFn->addFnAttr(
+llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(), 0, 16));
   }

paulwalker-arm wrote:
> bsmith wrote:
> > Is this really what we want? Won't this enable fixed length codegen all of 
> > the time?
> Fixed length codegen is tied to the minimum `vscale` value, so by using `0` 
> here means nothing is known about the minimum `vscale` and thus fixed length 
> codegen will be restricted to 128bit as is the case when no attribute is 
> specified.
Ah right ok, I'd missed that detail. Ignore me then!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106277/new/

https://reviews.llvm.org/D106277

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D106277: [SVE] Remove the interface for getMaxVScale in favour of the IR attributes

2021-07-19 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added a reviewer: paulwalker-arm.
bsmith added inline comments.



Comment at: clang/lib/CodeGen/CodeGenFunction.cpp:505-506
+  } else if (getContext().getTargetInfo().hasFeature("sve")) {
+CurFn->addFnAttr(
+llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(), 0, 16));
   }

Is this really what we want? Won't this enable fixed length codegen all of the 
time?



Comment at: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp:119-131
-Optional RISCVTTIImpl::getMaxVScale() const {
-  // There is no assumption of the maximum vector length in V specification.
-  // We use the value specified by users as the maximum vector length.
-  // This function will use the assumed maximum vector length to get the
-  // maximum vscale for LoopVectorizer.
-  // If users do not specify the maximum vector length, we have no way to
-  // know whether the LoopVectorizer is safe to do or not.

I'm not sure that RISCV have made a commitment to use the vscale_range 
attribute yet have they? In either case I think they should be involved in a 
change like this.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106277/new/

https://reviews.llvm.org/D106277

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D103702: [AArch64][SVE] Wire up vscale_range attribute to SVE min/max vector queries

2021-06-21 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added a comment.

Yup, just committed a fix in ed31ff9c7a9e538ead1fa4feecf09987998621b4 
, sorry 
for the noise.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103702/new/

https://reviews.llvm.org/D103702

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D104643: [AArch64][SVE] Add missing target require to test

2021-06-21 Thread Bradley Smith via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGed31ff9c7a9e: [AArch64][SVE] Add missing target require to 
test (authored by bsmith).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104643/new/

https://reviews.llvm.org/D104643

Files:
  clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c


Index: clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
===
--- clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
+++ clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
@@ -2,6 +2,7 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=512  | 
FileCheck %s --check-prefixes=CHECK,CHECK512
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=1024 | 
FileCheck %s --check-prefixes=CHECK,CHECK1024
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=2048 | 
FileCheck %s --check-prefixes=CHECK,CHECK2048
+// REQUIRES: aarch64-registered-target
 
 #include 
 


Index: clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
===
--- clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
+++ clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
@@ -2,6 +2,7 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=512  | FileCheck %s --check-prefixes=CHECK,CHECK512
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=1024 | FileCheck %s --check-prefixes=CHECK,CHECK1024
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=2048 | FileCheck %s --check-prefixes=CHECK,CHECK2048
+// REQUIRES: aarch64-registered-target
 
 #include 
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] ed31ff9 - [AArch64][SVE] Add missing target require to test

2021-06-21 Thread Bradley Smith via cfe-commits

Author: Bradley Smith
Date: 2021-06-21T15:36:44+01:00
New Revision: ed31ff9c7a9e538ead1fa4feecf09987998621b4

URL: 
https://github.com/llvm/llvm-project/commit/ed31ff9c7a9e538ead1fa4feecf09987998621b4
DIFF: 
https://github.com/llvm/llvm-project/commit/ed31ff9c7a9e538ead1fa4feecf09987998621b4.diff

LOG: [AArch64][SVE] Add missing target require to test

Differential revision: https://reviews.llvm.org/D104643

Added: 


Modified: 
clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c

Removed: 




diff  --git a/clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c 
b/clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
index fbae8c261703..2ef8698d3dda 100644
--- a/clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
+++ b/clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
@@ -2,6 +2,7 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=512  | 
FileCheck %s --check-prefixes=CHECK,CHECK512
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=1024 | 
FileCheck %s --check-prefixes=CHECK,CHECK1024
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=2048 | 
FileCheck %s --check-prefixes=CHECK,CHECK2048
+// REQUIRES: aarch64-registered-target
 
 #include 
 



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D104643: [AArch64][SVE] Add missing target require to test

2021-06-21 Thread Bradley Smith via Phabricator via cfe-commits
bsmith created this revision.
bsmith added a reviewer: peterwaller-arm.
Herald added subscribers: psnobl, kristof.beyls, tschuett.
Herald added a reviewer: efriedma.
bsmith requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D104643

Files:
  clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c


Index: clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
===
--- clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
+++ clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
@@ -2,6 +2,7 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=512  | 
FileCheck %s --check-prefixes=CHECK,CHECK512
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=1024 | 
FileCheck %s --check-prefixes=CHECK,CHECK1024
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=2048 | 
FileCheck %s --check-prefixes=CHECK,CHECK2048
+// REQUIRES: aarch64-registered-target
 
 #include 
 


Index: clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
===
--- clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
+++ clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
@@ -2,6 +2,7 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=512  | FileCheck %s --check-prefixes=CHECK,CHECK512
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=1024 | FileCheck %s --check-prefixes=CHECK,CHECK1024
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=2048 | FileCheck %s --check-prefixes=CHECK,CHECK2048
+// REQUIRES: aarch64-registered-target
 
 #include 
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D103702: [AArch64][SVE] Wire up vscale_range attribute to SVE min/max vector queries

2021-06-21 Thread Bradley Smith via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG9e7329e37ede: [AArch64][SVE] Wire up vscale_range attribute 
to SVE min/max vector queries (authored by bsmith).

Changed prior to commit:
  https://reviews.llvm.org/D103702?vs=352101=353345#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103702/new/

https://reviews.llvm.org/D103702

Files:
  clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
  llvm/test/CodeGen/AArch64/sve-vscale-attr.ll

Index: llvm/test/CodeGen/AArch64/sve-vscale-attr.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-vscale-attr.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOARG
+; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ARG
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @func_vscale_none(<16 x i32>* %a, <16 x i32>* %b) #0 {
+; CHECK-NOARG-LABEL: func_vscale_none:
+; CHECK-NOARG:   // %bb.0:
+; CHECK-NOARG-NEXT:ldp q0, q1, [x0]
+; CHECK-NOARG-NEXT:ldp q2, q3, [x1]
+; CHECK-NOARG-NEXT:ldp q4, q5, [x0, #32]
+; CHECK-NOARG-NEXT:ldp q7, q6, [x1, #32]
+; CHECK-NOARG-NEXT:add v1.4s, v1.4s, v3.4s
+; CHECK-NOARG-NEXT:add v0.4s, v0.4s, v2.4s
+; CHECK-NOARG-NEXT:add v2.4s, v5.4s, v6.4s
+; CHECK-NOARG-NEXT:add v3.4s, v4.4s, v7.4s
+; CHECK-NOARG-NEXT:stp q3, q2, [x0, #32]
+; CHECK-NOARG-NEXT:stp q0, q1, [x0]
+; CHECK-NOARG-NEXT:ret
+;
+; CHECK-ARG-LABEL: func_vscale_none:
+; CHECK-ARG:   // %bb.0:
+; CHECK-ARG-NEXT:ptrue p0.s, vl16
+; CHECK-ARG-NEXT:ld1w { z0.s }, p0/z, [x0]
+; CHECK-ARG-NEXT:ld1w { z1.s }, p0/z, [x1]
+; CHECK-ARG-NEXT:add z0.s, p0/m, z0.s, z1.s
+; CHECK-ARG-NEXT:st1w { z0.s }, p0, [x0]
+; CHECK-ARG-NEXT:ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %op2 = load <16 x i32>, <16 x i32>* %b
+  %res = add <16 x i32> %op1, %op2
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+attributes #0 = { "target-features"="+sve" }
+
+define void @func_vscale1_1(<16 x i32>* %a, <16 x i32>* %b) #1 {
+; CHECK-LABEL: func_vscale1_1:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:ldp q0, q1, [x0]
+; CHECK-NEXT:ldp q2, q3, [x1]
+; CHECK-NEXT:ldp q4, q5, [x0, #32]
+; CHECK-NEXT:ldp q7, q6, [x1, #32]
+; CHECK-NEXT:add v1.4s, v1.4s, v3.4s
+; CHECK-NEXT:add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:add v2.4s, v5.4s, v6.4s
+; CHECK-NEXT:add v3.4s, v4.4s, v7.4s
+; CHECK-NEXT:stp q3, q2, [x0, #32]
+; CHECK-NEXT:stp q0, q1, [x0]
+; CHECK-NEXT:ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %op2 = load <16 x i32>, <16 x i32>* %b
+  %res = add <16 x i32> %op1, %op2
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+attributes #1 = { "target-features"="+sve" vscale_range(1,1) }
+
+define void @func_vscale2_2(<16 x i32>* %a, <16 x i32>* %b) #2 {
+; CHECK-LABEL: func_vscale2_2:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:ptrue p0.s, vl8
+; CHECK-NEXT:add x8, x0, #32 // =32
+; CHECK-NEXT:add x9, x1, #32 // =32
+; CHECK-NEXT:ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:ld1w { z1.s }, p0/z, [x8]
+; CHECK-NEXT:ld1w { z2.s }, p0/z, [x1]
+; CHECK-NEXT:ld1w { z3.s }, p0/z, [x9]
+; CHECK-NEXT:add z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT:add z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT:st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:st1w { z1.s }, p0, [x8]
+; CHECK-NEXT:ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %op2 = load <16 x i32>, <16 x i32>* %b
+  %res = add <16 x i32> %op1, %op2
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+attributes #2 = { "target-features"="+sve" vscale_range(2,2) }
+
+define void @func_vscale2_4(<16 x i32>* %a, <16 x i32>* %b) #3 {
+; CHECK-LABEL: func_vscale2_4:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:ptrue p0.s, vl8
+; CHECK-NEXT:add x8, x0, #32 // =32
+; CHECK-NEXT:add x9, x1, #32 // =32
+; CHECK-NEXT:ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:ld1w { z1.s }, p0/z, [x8]
+; CHECK-NEXT:ld1w { z2.s }, p0/z, [x1]
+; CHECK-NEXT:ld1w { z3.s }, p0/z, [x9]
+; CHECK-NEXT:add z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT:add z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT:st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:st1w { z1.s }, p0, [x8]
+; CHECK-NEXT:ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %op2 = load <16 x i32>, <16 x i32>* %b
+  %res = add <16 x i32> %op1, %op2
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+attributes #3 = { "target-features"="+sve" vscale_range(2,4) }
+
+define void @func_vscale4_4(<16 x i32>* %a, <16 x i32>* %b) #4 {
+; CHECK-LABEL: func_vscale4_4:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:ptrue p0.s, 

[clang] 9e7329e - [AArch64][SVE] Wire up vscale_range attribute to SVE min/max vector queries

2021-06-21 Thread Bradley Smith via cfe-commits

Author: Bradley Smith
Date: 2021-06-21T13:00:36+01:00
New Revision: 9e7329e37edee0b4e6e212c90c76014a09dc6d90

URL: 
https://github.com/llvm/llvm-project/commit/9e7329e37edee0b4e6e212c90c76014a09dc6d90
DIFF: 
https://github.com/llvm/llvm-project/commit/9e7329e37edee0b4e6e212c90c76014a09dc6d90.diff

LOG: [AArch64][SVE] Wire up vscale_range attribute to SVE min/max vector queries

Differential Revision: https://reviews.llvm.org/D103702

Added: 
clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
llvm/test/CodeGen/AArch64/sve-vscale-attr.ll

Modified: 
llvm/lib/Target/AArch64/AArch64Subtarget.cpp
llvm/lib/Target/AArch64/AArch64Subtarget.h
llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

Removed: 




diff  --git a/clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c 
b/clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
new file mode 100644
index 0..fbae8c261703b
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=256  | 
FileCheck %s --check-prefixes=CHECK,CHECK256
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=512  | 
FileCheck %s --check-prefixes=CHECK,CHECK512
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=1024 | 
FileCheck %s --check-prefixes=CHECK,CHECK1024
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=2048 | 
FileCheck %s --check-prefixes=CHECK,CHECK2048
+
+#include 
+
+void func(int *restrict a, int *restrict b) {
+// CHECK-LABEL: func
+// CHECK256-COUNT-8: st1w
+// CHECK512-COUNT-4: st1w
+// CHECK1024-COUNT-2: st1w
+// CHECK2048-COUNT-1: st1w
+#pragma clang loop vectorize(enable)
+  for (int i = 0; i < 64; ++i)
+a[i] += b[i];
+}

diff  --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp 
b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 480600dcd9321..b22eb3b154f54 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -47,18 +47,6 @@ static cl::opt
cl::desc("Call nonlazybind functions via direct GOT load"),
cl::init(false), cl::Hidden);
 
-static cl::opt SVEVectorBitsMax(
-"aarch64-sve-vector-bits-max",
-cl::desc("Assume SVE vector registers are at most this big, "
- "with zero meaning no maximum size is assumed."),
-cl::init(0), cl::Hidden);
-
-static cl::opt SVEVectorBitsMin(
-"aarch64-sve-vector-bits-min",
-cl::desc("Assume SVE vector registers are at least this big, "
- "with zero meaning no minimum size is assumed."),
-cl::init(0), cl::Hidden);
-
 static cl::opt UseAA("aarch64-use-aa", cl::init(true),
cl::desc("Enable the use of AA during codegen."));
 
@@ -210,14 +198,17 @@ void AArch64Subtarget::initializeProperties() {
 
 AArch64Subtarget::AArch64Subtarget(const Triple , const std::string ,
const std::string ,
-   const TargetMachine , bool LittleEndian)
+   const TargetMachine , bool LittleEndian,
+   unsigned MinSVEVectorSizeInBitsOverride,
+   unsigned MaxSVEVectorSizeInBitsOverride)
 : AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
   ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
   CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
   IsLittle(LittleEndian),
-  TargetTriple(TT), FrameLowering(),
-  InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
-  TLInfo(TM, *this) {
+  MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
+  MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
+  FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)),
+  TSInfo(), TLInfo(TM, *this) {
   if (AArch64::isX18ReservedByDefault(TT))
 ReserveXRegister.set(18);
 
@@ -356,28 +347,6 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction ) 
const {
 MFI.computeMaxCallFrameSize(MF);
 }
 
-unsigned AArch64Subtarget::getMaxSVEVectorSizeInBits() const {
-  assert(HasSVE && "Tried to get SVE vector length without SVE support!");
-  assert(SVEVectorBitsMax % 128 == 0 &&
- "SVE requires vector length in multiples of 128!");
-  assert((SVEVectorBitsMax >= SVEVectorBitsMin || SVEVectorBitsMax == 0) &&
- "Mi

[PATCH] D104539: [Sema][SVE] Properly match builtin ID when using aux target

2021-06-21 Thread Bradley Smith via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG325b6707942d: [Sema][SVE] Properly match builtin ID when 
using aux target (authored by bsmith).

Changed prior to commit:
  https://reviews.llvm.org/D104539?vs=353002=353340#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104539/new/

https://reviews.llvm.org/D104539

Files:
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/test/Sema/aarch64-sve-alias-attribute.c


Index: clang/test/Sema/aarch64-sve-alias-attribute.c
===
--- /dev/null
+++ clang/test/Sema/aarch64-sve-alias-attribute.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -aux-triple 
aarch64-none-unknown-eabi -target-feature +sve -fopenmp-is-device -fopenmp 
-verify -fsyntax-only %s
+
+static __inline__ 
__attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m))) // 
expected-no-diagnostics
+void
+nop(void);
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -5163,7 +5163,10 @@
   return ArmBuiltinAliasValid(BuiltinID, AliasName, Map, IntrinNames);
 }
 
-static bool ArmSveAliasValid(unsigned BuiltinID, StringRef AliasName) {
+static bool ArmSveAliasValid(ASTContext , unsigned BuiltinID,
+ StringRef AliasName) {
+  if (Context.BuiltinInfo.isAuxBuiltinID(BuiltinID))
+BuiltinID = Context.BuiltinInfo.getAuxBuiltinID(BuiltinID);
   return BuiltinID >= AArch64::FirstSVEBuiltin &&
  BuiltinID <= AArch64::LastSVEBuiltin;
 }
@@ -5180,7 +5183,7 @@
   StringRef AliasName = cast(D)->getIdentifier()->getName();
 
   bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64();
-  if ((IsAArch64 && !ArmSveAliasValid(BuiltinID, AliasName)) ||
+  if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
   (!IsAArch64 && !ArmMveAliasValid(BuiltinID, AliasName) &&
!ArmCdeAliasValid(BuiltinID, AliasName))) {
 S.Diag(AL.getLoc(), diag::err_attribute_arm_builtin_alias);
@@ -5210,7 +5213,7 @@
   bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64();
   bool IsARM = S.Context.getTargetInfo().getTriple().isARM();
   bool IsRISCV = S.Context.getTargetInfo().getTriple().isRISCV();
-  if ((IsAArch64 && !ArmSveAliasValid(BuiltinID, AliasName)) ||
+  if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
   (IsARM && !ArmMveAliasValid(BuiltinID, AliasName) &&
!ArmCdeAliasValid(BuiltinID, AliasName)) ||
   (IsRISCV && !RISCVAliasValid(BuiltinID, AliasName)) ||


Index: clang/test/Sema/aarch64-sve-alias-attribute.c
===
--- /dev/null
+++ clang/test/Sema/aarch64-sve-alias-attribute.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -aux-triple aarch64-none-unknown-eabi -target-feature +sve -fopenmp-is-device -fopenmp -verify -fsyntax-only %s
+
+static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m))) // expected-no-diagnostics
+void
+nop(void);
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -5163,7 +5163,10 @@
   return ArmBuiltinAliasValid(BuiltinID, AliasName, Map, IntrinNames);
 }
 
-static bool ArmSveAliasValid(unsigned BuiltinID, StringRef AliasName) {
+static bool ArmSveAliasValid(ASTContext , unsigned BuiltinID,
+ StringRef AliasName) {
+  if (Context.BuiltinInfo.isAuxBuiltinID(BuiltinID))
+BuiltinID = Context.BuiltinInfo.getAuxBuiltinID(BuiltinID);
   return BuiltinID >= AArch64::FirstSVEBuiltin &&
  BuiltinID <= AArch64::LastSVEBuiltin;
 }
@@ -5180,7 +5183,7 @@
   StringRef AliasName = cast(D)->getIdentifier()->getName();
 
   bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64();
-  if ((IsAArch64 && !ArmSveAliasValid(BuiltinID, AliasName)) ||
+  if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
   (!IsAArch64 && !ArmMveAliasValid(BuiltinID, AliasName) &&
!ArmCdeAliasValid(BuiltinID, AliasName))) {
 S.Diag(AL.getLoc(), diag::err_attribute_arm_builtin_alias);
@@ -5210,7 +5213,7 @@
   bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64();
   bool IsARM = S.Context.getTargetInfo().getTriple().isARM();
   bool IsRISCV = S.Context.getTargetInfo().getTriple().isRISCV();
-  if ((IsAArch64 && !ArmSveAliasValid(BuiltinID, AliasName)) ||
+  if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
   (IsARM && !ArmMveAliasValid(BuiltinID, AliasName) &&
!ArmCdeAliasValid(BuiltinID, AliasName)) ||
   (IsRISCV && !RISCVAliasValid(BuiltinID, AliasName)) ||

[clang] 325b670 - [Sema][SVE] Properly match builtin ID when using aux target

2021-06-21 Thread Bradley Smith via cfe-commits

Author: Bradley Smith
Date: 2021-06-21T12:52:18+01:00
New Revision: 325b6707942dc295a0d7fc9bc23a8242d7a3824f

URL: 
https://github.com/llvm/llvm-project/commit/325b6707942dc295a0d7fc9bc23a8242d7a3824f
DIFF: 
https://github.com/llvm/llvm-project/commit/325b6707942dc295a0d7fc9bc23a8242d7a3824f.diff

LOG: [Sema][SVE] Properly match builtin ID when using aux target

Differential Revision: https://reviews.llvm.org/D104539

Added: 
clang/test/Sema/aarch64-sve-alias-attribute.c

Modified: 
clang/lib/Sema/SemaDeclAttr.cpp

Removed: 




diff  --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 17fe8c0713457..d8416c6b5769a 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5163,7 +5163,10 @@ static bool ArmCdeAliasValid(unsigned BuiltinID, 
StringRef AliasName) {
   return ArmBuiltinAliasValid(BuiltinID, AliasName, Map, IntrinNames);
 }
 
-static bool ArmSveAliasValid(unsigned BuiltinID, StringRef AliasName) {
+static bool ArmSveAliasValid(ASTContext , unsigned BuiltinID,
+ StringRef AliasName) {
+  if (Context.BuiltinInfo.isAuxBuiltinID(BuiltinID))
+BuiltinID = Context.BuiltinInfo.getAuxBuiltinID(BuiltinID);
   return BuiltinID >= AArch64::FirstSVEBuiltin &&
  BuiltinID <= AArch64::LastSVEBuiltin;
 }
@@ -5180,7 +5183,7 @@ static void handleArmBuiltinAliasAttr(Sema , Decl *D, 
const ParsedAttr ) {
   StringRef AliasName = cast(D)->getIdentifier()->getName();
 
   bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64();
-  if ((IsAArch64 && !ArmSveAliasValid(BuiltinID, AliasName)) ||
+  if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
   (!IsAArch64 && !ArmMveAliasValid(BuiltinID, AliasName) &&
!ArmCdeAliasValid(BuiltinID, AliasName))) {
 S.Diag(AL.getLoc(), diag::err_attribute_arm_builtin_alias);
@@ -5210,7 +5213,7 @@ static void handleBuiltinAliasAttr(Sema , Decl *D,
   bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64();
   bool IsARM = S.Context.getTargetInfo().getTriple().isARM();
   bool IsRISCV = S.Context.getTargetInfo().getTriple().isRISCV();
-  if ((IsAArch64 && !ArmSveAliasValid(BuiltinID, AliasName)) ||
+  if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
   (IsARM && !ArmMveAliasValid(BuiltinID, AliasName) &&
!ArmCdeAliasValid(BuiltinID, AliasName)) ||
   (IsRISCV && !RISCVAliasValid(BuiltinID, AliasName)) ||

diff  --git a/clang/test/Sema/aarch64-sve-alias-attribute.c 
b/clang/test/Sema/aarch64-sve-alias-attribute.c
new file mode 100644
index 0..306d98d27ac97
--- /dev/null
+++ b/clang/test/Sema/aarch64-sve-alias-attribute.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -aux-triple 
aarch64-none-unknown-eabi -target-feature +sve -fopenmp-is-device -fopenmp 
-verify -fsyntax-only %s
+
+static __inline__ 
__attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m))) // 
expected-no-diagnostics
+void
+nop(void);



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D104539: [Sema][SVE] Properly match builtin ID when using aux target

2021-06-18 Thread Bradley Smith via Phabricator via cfe-commits
bsmith created this revision.
bsmith added reviewers: paulwalker-arm, peterwaller-arm, joechrisellis, 
sdesmalen.
Herald added subscribers: psnobl, tschuett.
Herald added a reviewer: efriedma.
Herald added a reviewer: aaron.ballman.
bsmith requested review of this revision.
Herald added a reviewer: jdoerfert.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D104539

Files:
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/test/Sema/aarch64-sve-alias-attribute.c


Index: clang/test/Sema/aarch64-sve-alias-attribute.c
===
--- /dev/null
+++ clang/test/Sema/aarch64-sve-alias-attribute.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -aux-triple 
aarch64-none-unknown-eabi -target-feature +sve -fopenmp-is-device -fopenmp 
-verify -fsyntax-only %s
+
+static __inline__ 
__attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m))) // 
expected-no-diagnostics
+void nop(void);
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -5163,7 +5163,9 @@
   return ArmBuiltinAliasValid(BuiltinID, AliasName, Map, IntrinNames);
 }
 
-static bool ArmSveAliasValid(unsigned BuiltinID, StringRef AliasName) {
+static bool ArmSveAliasValid(ASTContext , unsigned BuiltinID, 
StringRef AliasName) {
+  if (Context.BuiltinInfo.isAuxBuiltinID(BuiltinID))
+BuiltinID = Context.BuiltinInfo.getAuxBuiltinID(BuiltinID);
   return BuiltinID >= AArch64::FirstSVEBuiltin &&
  BuiltinID <= AArch64::LastSVEBuiltin;
 }
@@ -5180,7 +5182,7 @@
   StringRef AliasName = cast(D)->getIdentifier()->getName();
 
   bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64();
-  if ((IsAArch64 && !ArmSveAliasValid(BuiltinID, AliasName)) ||
+  if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
   (!IsAArch64 && !ArmMveAliasValid(BuiltinID, AliasName) &&
!ArmCdeAliasValid(BuiltinID, AliasName))) {
 S.Diag(AL.getLoc(), diag::err_attribute_arm_builtin_alias);
@@ -5210,7 +5212,7 @@
   bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64();
   bool IsARM = S.Context.getTargetInfo().getTriple().isARM();
   bool IsRISCV = S.Context.getTargetInfo().getTriple().isRISCV();
-  if ((IsAArch64 && !ArmSveAliasValid(BuiltinID, AliasName)) ||
+  if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
   (IsARM && !ArmMveAliasValid(BuiltinID, AliasName) &&
!ArmCdeAliasValid(BuiltinID, AliasName)) ||
   (IsRISCV && !RISCVAliasValid(BuiltinID, AliasName)) ||


Index: clang/test/Sema/aarch64-sve-alias-attribute.c
===
--- /dev/null
+++ clang/test/Sema/aarch64-sve-alias-attribute.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -aux-triple aarch64-none-unknown-eabi -target-feature +sve -fopenmp-is-device -fopenmp -verify -fsyntax-only %s
+
+static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m))) // expected-no-diagnostics
+void nop(void);
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -5163,7 +5163,9 @@
   return ArmBuiltinAliasValid(BuiltinID, AliasName, Map, IntrinNames);
 }
 
-static bool ArmSveAliasValid(unsigned BuiltinID, StringRef AliasName) {
+static bool ArmSveAliasValid(ASTContext , unsigned BuiltinID, StringRef AliasName) {
+  if (Context.BuiltinInfo.isAuxBuiltinID(BuiltinID))
+BuiltinID = Context.BuiltinInfo.getAuxBuiltinID(BuiltinID);
   return BuiltinID >= AArch64::FirstSVEBuiltin &&
  BuiltinID <= AArch64::LastSVEBuiltin;
 }
@@ -5180,7 +5182,7 @@
   StringRef AliasName = cast(D)->getIdentifier()->getName();
 
   bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64();
-  if ((IsAArch64 && !ArmSveAliasValid(BuiltinID, AliasName)) ||
+  if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
   (!IsAArch64 && !ArmMveAliasValid(BuiltinID, AliasName) &&
!ArmCdeAliasValid(BuiltinID, AliasName))) {
 S.Diag(AL.getLoc(), diag::err_attribute_arm_builtin_alias);
@@ -5210,7 +5212,7 @@
   bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64();
   bool IsARM = S.Context.getTargetInfo().getTriple().isARM();
   bool IsRISCV = S.Context.getTargetInfo().getTriple().isRISCV();
-  if ((IsAArch64 && !ArmSveAliasValid(BuiltinID, AliasName)) ||
+  if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) ||
   (IsARM && !ArmMveAliasValid(BuiltinID, AliasName) &&
!ArmCdeAliasValid(BuiltinID, AliasName)) ||
   (IsRISCV && !RISCVAliasValid(BuiltinID, AliasName)) ||
___
cfe-commits mailing list

[PATCH] D103702: [AArch64][SVE] Wire up vscale_range attribute to SVE min/max vector queries

2021-06-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64Subtarget.h:298-299
+   bool LittleEndian,
+   unsigned MinSVEVectorSizeInBitsOverride = 0,
+   unsigned MaxSVEVectorSizeInBitsOverride = 0);
 

paulwalker-arm wrote:
> Out of interest are these defaults ever relied upon?
Only in the case of manual construction of a subtarget, for example a unit test 
etc. For normal cases this will always be passed in based on the attribute or 
command line options.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103702/new/

https://reviews.llvm.org/D103702

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D103702: [AArch64][SVE] Wire up vscale_range attribute to SVE min/max vector queries

2021-06-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 352101.
bsmith marked an inline comment as done.
bsmith added a comment.

- Ensure user input is sanitized for when asserts are not enabled
- Fix clang format issues


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103702/new/

https://reviews.llvm.org/D103702

Files:
  clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
  llvm/test/CodeGen/AArch64/sve-vscale-attr.ll

Index: llvm/test/CodeGen/AArch64/sve-vscale-attr.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-vscale-attr.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOARG
+; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ARG
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @func_vscale_none(<16 x i32>* %a, <16 x i32>* %b) #0 {
+; CHECK-NOARG-LABEL: func_vscale_none:
+; CHECK-NOARG:   // %bb.0:
+; CHECK-NOARG-NEXT:ldp q0, q1, [x0]
+; CHECK-NOARG-NEXT:ldp q2, q3, [x1]
+; CHECK-NOARG-NEXT:ldp q4, q5, [x0, #32]
+; CHECK-NOARG-NEXT:ldp q7, q6, [x1, #32]
+; CHECK-NOARG-NEXT:add v1.4s, v1.4s, v3.4s
+; CHECK-NOARG-NEXT:add v0.4s, v0.4s, v2.4s
+; CHECK-NOARG-NEXT:add v2.4s, v5.4s, v6.4s
+; CHECK-NOARG-NEXT:add v3.4s, v4.4s, v7.4s
+; CHECK-NOARG-NEXT:stp q3, q2, [x0, #32]
+; CHECK-NOARG-NEXT:stp q0, q1, [x0]
+; CHECK-NOARG-NEXT:ret
+;
+; CHECK-ARG-LABEL: func_vscale_none:
+; CHECK-ARG:   // %bb.0:
+; CHECK-ARG-NEXT:ptrue p0.s, vl16
+; CHECK-ARG-NEXT:ld1w { z0.s }, p0/z, [x0]
+; CHECK-ARG-NEXT:ld1w { z1.s }, p0/z, [x1]
+; CHECK-ARG-NEXT:add z0.s, p0/m, z0.s, z1.s
+; CHECK-ARG-NEXT:st1w { z0.s }, p0, [x0]
+; CHECK-ARG-NEXT:ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %op2 = load <16 x i32>, <16 x i32>* %b
+  %res = add <16 x i32> %op1, %op2
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+attributes #0 = { "target-features"="+sve" }
+
+define void @func_vscale1_1(<16 x i32>* %a, <16 x i32>* %b) #1 {
+; CHECK-LABEL: func_vscale1_1:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:ldp q0, q1, [x0]
+; CHECK-NEXT:ldp q2, q3, [x1]
+; CHECK-NEXT:ldp q4, q5, [x0, #32]
+; CHECK-NEXT:ldp q7, q6, [x1, #32]
+; CHECK-NEXT:add v1.4s, v1.4s, v3.4s
+; CHECK-NEXT:add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:add v2.4s, v5.4s, v6.4s
+; CHECK-NEXT:add v3.4s, v4.4s, v7.4s
+; CHECK-NEXT:stp q3, q2, [x0, #32]
+; CHECK-NEXT:stp q0, q1, [x0]
+; CHECK-NEXT:ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %op2 = load <16 x i32>, <16 x i32>* %b
+  %res = add <16 x i32> %op1, %op2
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+attributes #1 = { "target-features"="+sve" vscale_range(1,1) }
+
+define void @func_vscale2_2(<16 x i32>* %a, <16 x i32>* %b) #2 {
+; CHECK-LABEL: func_vscale2_2:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:ptrue p0.s, vl8
+; CHECK-NEXT:add x8, x0, #32 // =32
+; CHECK-NEXT:add x9, x1, #32 // =32
+; CHECK-NEXT:ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:ld1w { z1.s }, p0/z, [x8]
+; CHECK-NEXT:ld1w { z2.s }, p0/z, [x1]
+; CHECK-NEXT:ld1w { z3.s }, p0/z, [x9]
+; CHECK-NEXT:add z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT:add z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT:st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:st1w { z1.s }, p0, [x8]
+; CHECK-NEXT:ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %op2 = load <16 x i32>, <16 x i32>* %b
+  %res = add <16 x i32> %op1, %op2
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+attributes #2 = { "target-features"="+sve" vscale_range(2,2) }
+
+define void @func_vscale2_4(<16 x i32>* %a, <16 x i32>* %b) #3 {
+; CHECK-LABEL: func_vscale2_4:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:ptrue p0.s, vl8
+; CHECK-NEXT:add x8, x0, #32 // =32
+; CHECK-NEXT:add x9, x1, #32 // =32
+; CHECK-NEXT:ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:ld1w { z1.s }, p0/z, [x8]
+; CHECK-NEXT:ld1w { z2.s }, p0/z, [x1]
+; CHECK-NEXT:ld1w { z3.s }, p0/z, [x9]
+; CHECK-NEXT:add z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT:add z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT:st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:st1w { z1.s }, p0, [x8]
+; CHECK-NEXT:ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %op2 = load <16 x i32>, <16 x i32>* %b
+  %res = add <16 x i32> %op1, %op2
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+attributes #3 = { "target-features"="+sve" vscale_range(2,4) }
+
+define void @func_vscale4_4(<16 x i32>* %a, <16 x i32>* %b) #4 {
+; CHECK-LABEL: func_vscale4_4:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:ptrue p0.s, vl16
+; CHECK-NEXT:ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:ld1w { z1.s }, 

[PATCH] D103702: [AArch64][SVE] Wire up vscale_range attribute to SVE min/max vector queries

2021-06-14 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added inline comments.



Comment at: llvm/lib/Target/AArch64/AArch64Subtarget.cpp:226
+ : SVEVectorBitsMaxOpt),
   TargetTriple(TT), FrameLowering(),
   InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),

sdesmalen wrote:
> nit: Does this need an assert that Min|MaxSVEVectorSizeInBits is zero when 
> SVE is not enabled in the feature string?
In principal you could, however I'm not sure it adds any value, as the 
accessors already assert that SVE is enabled. (And in principal this is a 
generic attribute, not an SVE one).



Comment at: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp:357
+  Attribute VScaleRangeAttr = F.getFnAttribute(Attribute::VScaleRange);
+  if (VScaleRangeAttr.isValid())
+std::tie(MinSVEVectorSize, MaxSVEVectorSize) =

paulwalker-arm wrote:
> I don't know if this is possible but I feel we need a `HasSVE` like check 
> here?
I'm not sure this is really doable here without picking apart the feature 
string, I think it makes more sense to just set the values and assert when 
using the accessors without SVE enabled.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103702/new/

https://reviews.llvm.org/D103702

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D103702: [AArch64][SVE] Wire up vscale_range attribute to SVE min/max vector queries

2021-06-14 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 351910.
bsmith marked 7 inline comments as done.
bsmith added a comment.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

- Move attribute/command line logic into AArch64TargetMachine.
- Fix issue with subtarget key appending integers.
- Add end-to-end test.
- Make new subtarget options optional.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103702/new/

https://reviews.llvm.org/D103702

Files:
  clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
  llvm/test/CodeGen/AArch64/sve-vscale-attr.ll

Index: llvm/test/CodeGen/AArch64/sve-vscale-attr.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-vscale-attr.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOARG
+; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ARG
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @func_vscale_none(<16 x i32>* %a, <16 x i32>* %b) #0 {
+; CHECK-NOARG-LABEL: func_vscale_none:
+; CHECK-NOARG:   // %bb.0:
+; CHECK-NOARG-NEXT:ldp q0, q1, [x0]
+; CHECK-NOARG-NEXT:ldp q2, q3, [x1]
+; CHECK-NOARG-NEXT:ldp q4, q5, [x0, #32]
+; CHECK-NOARG-NEXT:ldp q7, q6, [x1, #32]
+; CHECK-NOARG-NEXT:add v1.4s, v1.4s, v3.4s
+; CHECK-NOARG-NEXT:add v0.4s, v0.4s, v2.4s
+; CHECK-NOARG-NEXT:add v2.4s, v5.4s, v6.4s
+; CHECK-NOARG-NEXT:add v3.4s, v4.4s, v7.4s
+; CHECK-NOARG-NEXT:stp q3, q2, [x0, #32]
+; CHECK-NOARG-NEXT:stp q0, q1, [x0]
+; CHECK-NOARG-NEXT:ret
+;
+; CHECK-ARG-LABEL: func_vscale_none:
+; CHECK-ARG:   // %bb.0:
+; CHECK-ARG-NEXT:ptrue p0.s, vl16
+; CHECK-ARG-NEXT:ld1w { z0.s }, p0/z, [x0]
+; CHECK-ARG-NEXT:ld1w { z1.s }, p0/z, [x1]
+; CHECK-ARG-NEXT:add z0.s, p0/m, z0.s, z1.s
+; CHECK-ARG-NEXT:st1w { z0.s }, p0, [x0]
+; CHECK-ARG-NEXT:ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %op2 = load <16 x i32>, <16 x i32>* %b
+  %res = add <16 x i32> %op1, %op2
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+attributes #0 = { "target-features"="+sve" }
+
+define void @func_vscale1_1(<16 x i32>* %a, <16 x i32>* %b) #1 {
+; CHECK-LABEL: func_vscale1_1:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:ldp q0, q1, [x0]
+; CHECK-NEXT:ldp q2, q3, [x1]
+; CHECK-NEXT:ldp q4, q5, [x0, #32]
+; CHECK-NEXT:ldp q7, q6, [x1, #32]
+; CHECK-NEXT:add v1.4s, v1.4s, v3.4s
+; CHECK-NEXT:add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:add v2.4s, v5.4s, v6.4s
+; CHECK-NEXT:add v3.4s, v4.4s, v7.4s
+; CHECK-NEXT:stp q3, q2, [x0, #32]
+; CHECK-NEXT:stp q0, q1, [x0]
+; CHECK-NEXT:ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %op2 = load <16 x i32>, <16 x i32>* %b
+  %res = add <16 x i32> %op1, %op2
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+attributes #1 = { "target-features"="+sve" vscale_range(1,1) }
+
+define void @func_vscale2_2(<16 x i32>* %a, <16 x i32>* %b) #2 {
+; CHECK-LABEL: func_vscale2_2:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:ptrue p0.s, vl8
+; CHECK-NEXT:add x8, x0, #32 // =32
+; CHECK-NEXT:add x9, x1, #32 // =32
+; CHECK-NEXT:ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:ld1w { z1.s }, p0/z, [x8]
+; CHECK-NEXT:ld1w { z2.s }, p0/z, [x1]
+; CHECK-NEXT:ld1w { z3.s }, p0/z, [x9]
+; CHECK-NEXT:add z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT:add z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT:st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:st1w { z1.s }, p0, [x8]
+; CHECK-NEXT:ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %op2 = load <16 x i32>, <16 x i32>* %b
+  %res = add <16 x i32> %op1, %op2
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+attributes #2 = { "target-features"="+sve" vscale_range(2,2) }
+
+define void @func_vscale2_4(<16 x i32>* %a, <16 x i32>* %b) #3 {
+; CHECK-LABEL: func_vscale2_4:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:ptrue p0.s, vl8
+; CHECK-NEXT:add x8, x0, #32 // =32
+; CHECK-NEXT:add x9, x1, #32 // =32
+; CHECK-NEXT:ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:ld1w { z1.s }, p0/z, [x8]
+; CHECK-NEXT:ld1w { z2.s }, p0/z, [x1]
+; CHECK-NEXT:ld1w { z3.s }, p0/z, [x9]
+; CHECK-NEXT:add z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT:add z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT:st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:st1w { z1.s }, p0, [x8]
+; CHECK-NEXT:ret
+  %op1 = load <16 x i32>, <16 x i32>* %a
+  %op2 = load <16 x i32>, <16 x i32>* %b
+  %res = add <16 x i32> %op1, %op2
+  store <16 x i32> %res, <16 x i32>* %a
+  ret void
+}
+
+attributes #3 = { "target-features"="+sve" vscale_range(2,4) }
+
+define void @func_vscale4_4(<16 x i32>* %a, <16 x i32>* %b) #4 {
+; CHECK-LABEL: 

[PATCH] D103082: [AArch64][SVE] Improve codegen for dupq SVE ACLE intrinsics

2021-06-07 Thread Bradley Smith via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG60c9b5f35cae: [AArch64][SVE] Improve codegen for dupq SVE 
ACLE intrinsics (authored by bsmith).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103082/new/

https://reviews.llvm.org/D103082

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq_const.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll

Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
===
--- /dev/null
+++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
@@ -0,0 +1,397 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; DUPQ b8
+
+define  @dupq_b_0() #0 {
+; CHECK-LABEL: @dupq_b_0(
+; CHECK: ret  zeroinitializer
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_d() #0 {
+; CHECK-LABEL: @dupq_b_d(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_w() #0 {
+; CHECK-LABEL: @dupq_b_w(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_h() #0 {
+; CHECK-LABEL: @dupq_b_h(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_b() #0 {
+; CHECK-LABEL: @dupq_b_b(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+; CHECK-NEXT: ret  %1
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+; DUPQ b16
+
+define  @dupq_h_0() #0 {
+; CHECK-LABEL: @dupq_h_0(
+; CHECK: ret  zeroinitializer
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv8i16.v8i16( undef,
+<8 x i16> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv8i16( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv8i16( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_h_d() #0 {
+; CHECK-LABEL: @dupq_h_d(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: %3 = call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2)
+; CHECK-NEXT: ret  %3
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv8i16.v8i16( undef,
+<8 x i16> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv8i16( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv8i16( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_h_w() 

[clang] 60c9b5f - [AArch64][SVE] Improve codegen for dupq SVE ACLE intrinsics

2021-06-07 Thread Bradley Smith via cfe-commits

Author: Bradley Smith
Date: 2021-06-07T12:21:38+01:00
New Revision: 60c9b5f35caeb555f66d261bf5a657ab02a35fef

URL: 
https://github.com/llvm/llvm-project/commit/60c9b5f35caeb555f66d261bf5a657ab02a35fef
DIFF: 
https://github.com/llvm/llvm-project/commit/60c9b5f35caeb555f66d261bf5a657ab02a35fef.diff

LOG: [AArch64][SVE] Improve codegen for dupq SVE ACLE intrinsics

Use llvm.experimental.vector.insert instead of storing into an alloca
when generating code for these intrinsics. This defers the codegen of
the generated vector to instruction selection, allowing existing
shufflevector style optimizations to apply.

Additionally, introduce a new target transform that can recognise fixed
predicate patterns in the svbool variants of these intrinsics.

Differential Revision: https://reviews.llvm.org/D103082

Added: 
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq_const.c
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll

Modified: 
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9fdf1df9734e2..1b30d44937f3e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9061,33 +9061,32 @@ Value 
*CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
 if (IsBoolTy)
   EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
 
-Address Alloca = CreateTempAlloca(llvm::ArrayType::get(EltTy, NumOpnds),
- CharUnits::fromQuantity(16));
+SmallVector VecOps;
 for (unsigned I = 0; I < NumOpnds; ++I)
-  Builder.CreateDefaultAlignedStore(
-  IsBoolTy ? Builder.CreateZExt(Ops[I], EltTy) : Ops[I],
-  Builder.CreateGEP(Alloca.getElementType(), Alloca.getPointer(),
-{Builder.getInt64(0), Builder.getInt64(I)}));
+VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
+Value *Vec = BuildVector(VecOps);
 
 SVETypeFlags TypeFlags(Builtin->TypeModifier);
 Value *Pred = EmitSVEAllTruePred(TypeFlags);
 
 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
-Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_ld1rq, OverloadedTy);
-Value *Alloca0 = Builder.CreateGEP(
-Alloca.getElementType(), Alloca.getPointer(),
-{Builder.getInt64(0), Builder.getInt64(0)});
-Value *LD1RQ = Builder.CreateCall(F, {Pred, Alloca0});
+Value *InsertSubVec = Builder.CreateInsertVector(
+OverloadedTy, UndefValue::get(OverloadedTy), Vec, Builder.getInt64(0));
+
+Function *F =
+CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
+Value *DupQLane =
+Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
 
 if (!IsBoolTy)
-  return LD1RQ;
+  return DupQLane;
 
 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
: Intrinsic::aarch64_sve_cmpne_wide,
  OverloadedTy);
-Value *Call =
-Builder.CreateCall(F, {Pred, LD1RQ, EmitSVEDupX(Builder.getInt64(0))});
+Value *Call = Builder.CreateCall(
+F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
 return EmitSVEPredicateCast(Call, cast(Ty));
   }
 

diff  --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c 
b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
index 05223d59ea1e9..086d753870ec8 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
@@ -24,16 +24,13 @@ svbfloat16_t test_svdupq_lane_bf16(svbfloat16_t data, 
uint64_t index) {
 svbfloat16_t test_svdupq_n_bf16(bfloat16_t x0, bfloat16_t x1, bfloat16_t x2, 
bfloat16_t x3,
 bfloat16_t x4, bfloat16_t x5, bfloat16_t x6, 
bfloat16_t x7) {
   // CHECK-LABEL: test_svdupq_n_bf16
-  // CHECK: %[[ALLOCA:.*]] = alloca [8 x bfloat], align 16
-  // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [8 x bfloat], [8 x 
bfloat]* %[[ALLOCA]], i64 0, i64 0
-  // CHECK-DAG: store bfloat %x0, bfloat* %[[BASE]], align 16
-  // 
-  // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [8 x bfloat], [8 x 
bfloat]* %[[ALLOCA]], i64 0, i64 7
-  // CHECK: store bfloat %x7, bfloat* %[[GEP]], align 2
-  // CHECK-NOT: store
-  // CHECK: call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  // CHECK: %[[LOAD:.*]] = call  
@llvm.aarch64.sve.ld1rq.nxv8bf16( %{{.*}}, bfloat* nonnull 
%[[BASE]])
-  // CHECK: ret  %[[LOAD]]
+  // CHECK: insertelemen

[PATCH] D103082: [AArch64][SVE] Improve codegen for dupq SVE ACLE intrinsics

2021-06-04 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 349827.
bsmith marked an inline comment as done.
bsmith added a comment.

- Remove unnecessary complexity when zero-extending dupq operands into a vector.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103082/new/

https://reviews.llvm.org/D103082

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq_const.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll

Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
===
--- /dev/null
+++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
@@ -0,0 +1,397 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; DUPQ b8
+
+define  @dupq_b_0() #0 {
+; CHECK-LABEL: @dupq_b_0(
+; CHECK: ret  zeroinitializer
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_d() #0 {
+; CHECK-LABEL: @dupq_b_d(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_w() #0 {
+; CHECK-LABEL: @dupq_b_w(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_h() #0 {
+; CHECK-LABEL: @dupq_b_h(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_b() #0 {
+; CHECK-LABEL: @dupq_b_b(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+; CHECK-NEXT: ret  %1
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+; DUPQ b16
+
+define  @dupq_h_0() #0 {
+; CHECK-LABEL: @dupq_h_0(
+; CHECK: ret  zeroinitializer
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv8i16.v8i16( undef,
+<8 x i16> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv8i16( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv8i16( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_h_d() #0 {
+; CHECK-LABEL: @dupq_h_d(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: %3 = call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2)
+; CHECK-NEXT: ret  %3
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv8i16.v8i16( undef,
+<8 x i16> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv8i16( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv8i16( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_h_w() #0 {
+; CHECK-LABEL: @dupq_h_w(
+; CHECK: %1 = call  

[PATCH] D103082: [AArch64][SVE] Improve codegen for dupq SVE ACLE intrinsics

2021-06-03 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 349525.
bsmith added a comment.

- Use !isZero() in place of getZExtValue() != 0
- Add end to end tests for ptrue transformation


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103082/new/

https://reviews.llvm.org/D103082

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq_const.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll

Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
===
--- /dev/null
+++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
@@ -0,0 +1,397 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; DUPQ b8
+
+define  @dupq_b_0() #0 {
+; CHECK-LABEL: @dupq_b_0(
+; CHECK: ret  zeroinitializer
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_d() #0 {
+; CHECK-LABEL: @dupq_b_d(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_w() #0 {
+; CHECK-LABEL: @dupq_b_w(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_h() #0 {
+; CHECK-LABEL: @dupq_b_h(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_b() #0 {
+; CHECK-LABEL: @dupq_b_b(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+; CHECK-NEXT: ret  %1
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+; DUPQ b16
+
+define  @dupq_h_0() #0 {
+; CHECK-LABEL: @dupq_h_0(
+; CHECK: ret  zeroinitializer
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv8i16.v8i16( undef,
+<8 x i16> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv8i16( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv8i16( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_h_d() #0 {
+; CHECK-LABEL: @dupq_h_d(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: %3 = call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2)
+; CHECK-NEXT: ret  %3
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv8i16.v8i16( undef,
+<8 x i16> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv8i16( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv8i16( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_h_w() #0 {
+; CHECK-LABEL: @dupq_h_w(
+; CHECK: %1 = call  

[PATCH] D103082: [AArch64][SVE] Improve codegen for dupq SVE ACLE intrinsics

2021-06-02 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 349241.
bsmith retitled this revision from "[AArch64][SVE] Optimize svbool dupq ACLE 
intrinsic to fixed predicate patterns" to "[AArch64][SVE] Improve codegen for 
dupq SVE ACLE intrinsics".
bsmith edited the summary of this revision.
bsmith added a comment.

- Rework approach to use llvm.experimental.vector.insert instead of introducing 
a new LLVM intrinsic
- Also apply changes to all non-svbool variants.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D103082/new/

https://reviews.llvm.org/D103082

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll

Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
===
--- /dev/null
+++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
@@ -0,0 +1,397 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; DUPQ b8
+
+define  @dupq_b_0() #0 {
+; CHECK-LABEL: @dupq_b_0(
+; CHECK: ret  zeroinitializer
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_d() #0 {
+; CHECK-LABEL: @dupq_b_d(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_w() #0 {
+; CHECK-LABEL: @dupq_b_w(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_h() #0 {
+; CHECK-LABEL: @dupq_b_h(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_b_b() #0 {
+; CHECK-LABEL: @dupq_b_b(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+; CHECK-NEXT: ret  %1
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv16i8.v16i8( undef,
+<16 x i8> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv16i8( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv16i8( %1,  %3,  %4)
+  ret  %5
+}
+
+; DUPQ b16
+
+define  @dupq_h_0() #0 {
+; CHECK-LABEL: @dupq_h_0(
+; CHECK: ret  zeroinitializer
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv8i16.v8i16( undef,
+<8 x i16> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv8i16( %2 , i64 0)
+  %4 = tail call  @llvm.aarch64.sve.dup.x.nxv2i64(i64 0)
+  %5 = tail call  @llvm.aarch64.sve.cmpne.wide.nxv8i16( %1,  %3,  %4)
+  ret  %5
+}
+
+define  @dupq_h_d() #0 {
+; CHECK-LABEL: @dupq_h_d(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: %3 = call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2)
+; CHECK-NEXT: ret  %3
+  %1 = tail call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %2 = tail call  @llvm.experimental.vector.insert.nxv8i16.v8i16( undef,
+<8 x i16> , i64 0)
+  %3 = tail call  @llvm.aarch64.sve.dupq.lane.nxv8i16( %2 , i64 0)
+  %4 = tail call  

[PATCH] D103082: [AArch64][SVE] Optimize svbool dupq ACLE intrinsic to fixed predicate patterns

2021-05-25 Thread Bradley Smith via Phabricator via cfe-commits
bsmith created this revision.
bsmith added reviewers: paulwalker-arm, peterwaller-arm, joechrisellis, 
sdesmalen.
Herald added subscribers: psnobl, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: efriedma.
bsmith requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

This patch introduces a new dupq LLVM intrinsic which is emitted upon
encountering the svbool dupq ACLE intrinsics, instead of expanding them
directly.

This allows us to defer the expansion of said intrinsic until much
later when we can reasonably optimize to fixed predicates using ptrue.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D103082

Files:
  clang/include/clang/Basic/arm_sve.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
  llvm/test/CodeGen/AArch64/sve-intrinsics-dupq.ll
  llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dupq.ll

Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dupq.ll
===
--- /dev/null
+++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dupq.ll
@@ -0,0 +1,195 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; DUPQ b8
+
+define  @dupq_b_0() #0 {
+; CHECK-LABEL: @dupq_b_0(
+; CHECK: ret  zeroinitializer
+  %1 = tail call  @llvm.aarch64.sve.dupq.b8(i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false,
+  i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false)
+  ret  %1
+}
+
+define  @dupq_b_d() #0 {
+; CHECK-LABEL: @dupq_b_d(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.dupq.b8(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false,
+  i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false)
+  ret  %1
+}
+
+define  @dupq_b_w() #0 {
+; CHECK-LABEL: @dupq_b_w(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.dupq.b8(i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false,
+  i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false)
+  ret  %1
+}
+
+define  @dupq_b_h() #0 {
+; CHECK-LABEL: @dupq_b_h(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1)
+; CHECK-NEXT: ret  %2
+  %1 = tail call  @llvm.aarch64.sve.dupq.b8(i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false,
+  i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false)
+  ret  %1
+}
+
+define  @dupq_b_b() #0 {
+; CHECK-LABEL: @dupq_b_b(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+; CHECK-NEXT: ret  %1
+  %1 = tail call  @llvm.aarch64.sve.dupq.b8(i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
+  i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true)
+  ret  %1
+}
+
+; DUPQ b16
+
+define  @dupq_h_0() #0 {
+; CHECK-LABEL: @dupq_h_0(
+; CHECK: ret  zeroinitializer
+  %1 = tail call  @llvm.aarch64.sve.dupq.b16(i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false)
+  ret  %1
+}
+
+define  @dupq_h_d() #0 {
+; CHECK-LABEL: @dupq_h_d(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1)
+; CHECK-NEXT: %3 = call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2)
+; CHECK-NEXT: ret  %3
+  %1 = tail call  @llvm.aarch64.sve.dupq.b16(i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false)
+  ret  %1
+}
+
+define  @dupq_h_w() #0 {
+; CHECK-LABEL: @dupq_h_w(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: %2 = call  @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1)
+; CHECK-NEXT: %3 = call  @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2)
+; CHECK-NEXT: ret  %3
+  %1 = tail call  @llvm.aarch64.sve.dupq.b16(i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false)
+  ret  %1
+}
+
+define  @dupq_h_h() #0 {
+; CHECK-LABEL: @dupq_h_h(
+; CHECK: %1 = call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: ret  

[PATCH] D102623: [CodeGen][AArch64][SVE] Canonicalize intrinsic rdffr{ => _z}

2021-05-19 Thread Bradley Smith via Phabricator via cfe-commits
bsmith accepted this revision.
bsmith added a comment.
This revision is now accepted and ready to land.

LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102623/new/

https://reviews.llvm.org/D102623

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 48f5a39 - [IR] Add vscale_range IR function attribute

2021-03-22 Thread Bradley Smith via cfe-commits

Author: Bradley Smith
Date: 2021-03-22T12:05:06Z
New Revision: 48f5a392cb73d99a58f01448926f6964ab5b0d0a

URL: 
https://github.com/llvm/llvm-project/commit/48f5a392cb73d99a58f01448926f6964ab5b0d0a
DIFF: 
https://github.com/llvm/llvm-project/commit/48f5a392cb73d99a58f01448926f6964ab5b0d0a.diff

LOG: [IR] Add vscale_range IR function attribute

This attribute represents the minimum and maximum values vscale can
take. For now this attribute is not hooked up to anything during
codegen, this will be added in the future when such codegen is
considered stable.

Additionally hook up the -msve-vector-bits= clang option to emit this
attribute.

Differential Revision: https://reviews.llvm.org/D98030

Added: 
clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
llvm/test/Verifier/vscale_range.ll

Modified: 
clang/lib/CodeGen/CodeGenFunction.cpp
llvm/docs/BitCodeFormat.rst
llvm/docs/LangRef.rst
llvm/include/llvm/Bitcode/LLVMBitCodes.h
llvm/include/llvm/IR/Attributes.h
llvm/include/llvm/IR/Attributes.td
llvm/lib/AsmParser/LLLexer.cpp
llvm/lib/AsmParser/LLParser.cpp
llvm/lib/AsmParser/LLParser.h
llvm/lib/AsmParser/LLToken.h
llvm/lib/Bitcode/Reader/BitcodeReader.cpp
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
llvm/lib/IR/AttributeImpl.h
llvm/lib/IR/Attributes.cpp
llvm/lib/IR/Verifier.cpp
llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
llvm/lib/Transforms/Utils/CodeExtractor.cpp
llvm/test/Bitcode/attributes.ll

Removed: 




diff  --git a/clang/lib/CodeGen/CodeGenFunction.cpp 
b/clang/lib/CodeGen/CodeGenFunction.cpp
index fd708849e609..e3fdf54716ab 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -496,6 +496,13 @@ void CodeGenFunction::FinishFunction(SourceLocation 
EndLoc) {
   if (LargestVectorWidth)
 CurFn->addFnAttr("min-legal-vector-width", 
llvm::utostr(LargestVectorWidth));
 
+  // Add vscale attribute if appropriate.
+  if (getLangOpts().ArmSveVectorBits) {
+unsigned VScale = getLangOpts().ArmSveVectorBits / 128;
+CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(),
+ VScale, VScale));
+  }
+
   // If we generated an unreachable return block, delete it now.
   if (ReturnBlock.isValid() && ReturnBlock.getBlock()->use_empty()) {
 Builder.ClearInsertionPoint();

diff  --git a/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c 
b/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
new file mode 100644
index ..84541f9cb12d
--- /dev/null
+++ b/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-msve-vector-bits=128 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=128
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-msve-vector-bits=256 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=256
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-msve-vector-bits=512 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=512
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-msve-vector-bits=1024 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=1024
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-msve-vector-bits=2048 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=2048
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s 
--check-prefix=CHECK-NONE
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S 
-emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
+
+// CHECK-LABEL: @func() #0
+// CHECK: attributes #0 = { {{.*}} 
vscale_range([[#div(VBITS,128)]],[[#div(VBITS,128)]]) {{.*}} }
+// CHECK-NONE-NOT: vscale_range
+void func() {}

diff  --git a/llvm/docs/BitCodeFormat.rst b/llvm/docs/BitCodeFormat.rst
index df0e195c6809..eff9d2866a8f 100644
--- a/llvm/docs/BitCodeFormat.rst
+++ b/llvm/docs/BitCodeFormat.rst
@@ -1070,6 +1070,7 @@ The integer codes are mapped to well-known attributes as 
follows.
 * code 68: ``noundef``
 * code 69: ``byref``
 * code 70: ``mustprogress``
+* code 74: ``vscale_range([, ])``
 
 .. note::
   The ``allocsize`` attribute has a special encoding for its arguments. Its two
@@ -1077,6 +1078,12 @@ The integer codes are mapped to well-known attributes as 
follows.
   (i.e. ``(EltSizeParam << 32) | NumEltsParam``), with ``NumEltsParam`` taking 
on
   the sentinel value -1 if it is not specified.
 
+.. note::
+  The ``vscale_range`` attribute has a special encoding for its arguments. Its 
two
+  arguments, which are 32-bit integers, are packed into one 64-bit integer 
value
+  (i.e. ``(Min << 32) | Max``), with ``Max`` taking on the value of ``Min`` if
+  it is not spec

[PATCH] D98030: [IR] Add vscale_range IR function attribute

2021-03-22 Thread Bradley Smith via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG48f5a392cb73: [IR] Add vscale_range IR function attribute 
(authored by bsmith).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98030/new/

https://reviews.llvm.org/D98030

Files:
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
  llvm/docs/BitCodeFormat.rst
  llvm/docs/LangRef.rst
  llvm/include/llvm/Bitcode/LLVMBitCodes.h
  llvm/include/llvm/IR/Attributes.h
  llvm/include/llvm/IR/Attributes.td
  llvm/lib/AsmParser/LLLexer.cpp
  llvm/lib/AsmParser/LLParser.cpp
  llvm/lib/AsmParser/LLParser.h
  llvm/lib/AsmParser/LLToken.h
  llvm/lib/Bitcode/Reader/BitcodeReader.cpp
  llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
  llvm/lib/IR/AttributeImpl.h
  llvm/lib/IR/Attributes.cpp
  llvm/lib/IR/Verifier.cpp
  llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
  llvm/lib/Transforms/Utils/CodeExtractor.cpp
  llvm/test/Bitcode/attributes.ll
  llvm/test/Verifier/vscale_range.ll

Index: llvm/test/Verifier/vscale_range.ll
===
--- /dev/null
+++ llvm/test/Verifier/vscale_range.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: 'vscale_range' minimum cannot be greater than maximum
+declare i8* @b(i32*) vscale_range(8, 1)
Index: llvm/test/Bitcode/attributes.ll
===
--- llvm/test/Bitcode/attributes.ll
+++ llvm/test/Bitcode/attributes.ll
@@ -422,6 +422,31 @@
   ret void
 }
 
+; CHECK: define void @f72() #45
+define void @f72() vscale_range(8)
+{
+  ret void
+}
+
+; CHECK: define void @f73() #46
+define void @f73() vscale_range(1,8)
+{
+  ret void
+}
+
+; CHECK: define void @f74() #47
+define void @f74() vscale_range(1,0)
+{
+  ret void
+}
+
+; CHECK: define void @f75()
+; CHECK-NOT: define void @f75() #
+define void @f75() vscale_range(0,0)
+{
+  ret void
+}
+
 ; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { readnone }
@@ -467,4 +492,7 @@
 ; CHECK: attributes #42 = { nocallback }
 ; CHECK: attributes #43 = { cold }
 ; CHECK: attributes #44 = { hot }
+; CHECK: attributes #45 = { vscale_range(8,8) }
+; CHECK: attributes #46 = { vscale_range(1,8) }
+; CHECK: attributes #47 = { vscale_range(1,0) }
 ; CHECK: attributes #[[NOBUILTIN]] = { nobuiltin }
Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp
===
--- llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -970,6 +970,7 @@
   case Attribute::StackProtectStrong:
   case Attribute::StrictFP:
   case Attribute::UWTable:
+  case Attribute::VScaleRange:
   case Attribute::NoCfCheck:
   case Attribute::MustProgress:
   case Attribute::NoProfile:
Index: llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
===
--- llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -73,6 +73,7 @@
   .Case("sspstrong", Attribute::StackProtectStrong)
   .Case("strictfp", Attribute::StrictFP)
   .Case("uwtable", Attribute::UWTable)
+  .Case("vscale_range", Attribute::VScaleRange)
   .Default(Attribute::None);
 }
 
Index: llvm/lib/IR/Verifier.cpp
===
--- llvm/lib/IR/Verifier.cpp
+++ llvm/lib/IR/Verifier.cpp
@@ -1629,6 +1629,7 @@
   case Attribute::InlineHint:
   case Attribute::StackAlignment:
   case Attribute::UWTable:
+  case Attribute::VScaleRange:
   case Attribute::NonLazyBind:
   case Attribute::ReturnsTwice:
   case Attribute::SanitizeAddress:
@@ -1987,6 +1988,14 @@
   return;
   }
 
+  if (Attrs.hasFnAttribute(Attribute::VScaleRange)) {
+std::pair Args =
+Attrs.getVScaleRangeArgs(AttributeList::FunctionIndex);
+
+if (Args.first > Args.second && Args.second != 0)
+  CheckFailed("'vscale_range' minimum cannot be greater than maximum", V);
+  }
+
   if (Attrs.hasFnAttribute("frame-pointer")) {
 StringRef FP = Attrs.getAttribute(AttributeList::FunctionIndex,
   "frame-pointer").getValueAsString();
Index: llvm/lib/IR/Attributes.cpp
===
--- llvm/lib/IR/Attributes.cpp
+++ llvm/lib/IR/Attributes.cpp
@@ -78,6 +78,17 @@
   return std::make_pair(ElemSizeArg, NumElemsArg);
 }
 
+static uint64_t packVScaleRangeArgs(unsigned MinValue, unsigned MaxValue) {
+  return uint64_t(MinValue) << 32 | MaxValue;
+}
+
+static std::pair unpackVScaleRangeArgs(uint64_t Value) {
+  unsigned MaxValue = Value & std::numeric_limits::max();
+  unsigned MinValue = Value >> 32;
+
+  return std::make_pair(MinValue, MaxValue);
+}
+
 Attribute 

[clang] cf0da91 - [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-17 Thread Bradley Smith via cfe-commits

Author: Bradley Smith
Date: 2021-03-17T11:41:22Z
New Revision: cf0da91ba5e192920809e30dbb359042c2f2112a

URL: 
https://github.com/llvm/llvm-project/commit/cf0da91ba5e192920809e30dbb359042c2f2112a
DIFF: 
https://github.com/llvm/llvm-project/commit/cf0da91ba5e192920809e30dbb359042c2f2112a.diff

LOG: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed 
length SVE

Previously NEON used a target specific intrinsic for frintn, given that
the FROUNDEVEN ISD node now exists, move over to that instead and add
codegen support for that node for both NEON and fixed length SVE.

Differential Revision: https://reviews.llvm.org/D98487

Added: 
llvm/test/CodeGen/AArch64/frintn.ll

Modified: 
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/aarch64-neon-intrinsics.c
clang/test/CodeGen/aarch64-neon-misc.c
clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
clang/test/CodeGen/arm-neon-directed-rounding.c
clang/test/CodeGen/arm64-vrnd.c
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/include/llvm/Target/TargetSelectionDAG.td
llvm/lib/IR/AutoUpgrade.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/arm64-vcvt.ll
llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
llvm/test/CodeGen/AArch64/f16-instructions.ll
llvm/test/CodeGen/AArch64/fp-intrinsics.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
llvm/test/CodeGen/AArch64/vec-libcalls.ll

Removed: 




diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e5778c0c78f7..8d1d3c50870c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10620,17 +10620,23 @@ Value 
*CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   }
   case NEON::BI__builtin_neon_vrndnh_f16: {
 Ops.push_back(EmitScalarExpr(E->getArg(0)));
-Int = Intrinsic::aarch64_neon_frintn;
+Int = Builder.getIsFPConstrained()
+  ? Intrinsic::experimental_constrained_roundeven
+  : Intrinsic::roundeven;
 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
   }
   case NEON::BI__builtin_neon_vrndn_v:
   case NEON::BI__builtin_neon_vrndnq_v: {
-Int = Intrinsic::aarch64_neon_frintn;
+Int = Builder.getIsFPConstrained()
+  ? Intrinsic::experimental_constrained_roundeven
+  : Intrinsic::roundeven;
 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
   }
   case NEON::BI__builtin_neon_vrndns_f32: {
 Ops.push_back(EmitScalarExpr(E->getArg(0)));
-Int = Intrinsic::aarch64_neon_frintn;
+Int = Builder.getIsFPConstrained()
+  ? Intrinsic::experimental_constrained_roundeven
+  : Intrinsic::roundeven;
 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
   }
   case NEON::BI__builtin_neon_vrndph_f16: {

diff  --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c 
b/clang/test/CodeGen/aarch64-neon-intrinsics.c
index a56080bace0f..76f5cfd3aaa8 100644
--- a/clang/test/CodeGen/aarch64-neon-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c
@@ -18155,7 +18155,7 @@ float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
 
 // CHECK-LABEL: @test_vrndn_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[VRNDN1_I:%.*]] = call <1 x double> 
@llvm.aarch64.neon.frintn.v1f64(<1 x double> %a)
+// CHECK:   [[VRNDN1_I:%.*]] = call <1 x double> @llvm.roundeven.v1f64(<1 x 
double> %a)
 // CHECK:   ret <1 x double> [[VRNDN1_I]]
 float64x1_t test_vrndn_f64(float64x1_t a) {
   return vrndn_f64(a);

diff  --git a/clang/test/CodeGen/aarch64-neon-misc.c 
b/clang/test/CodeGen/aarch64-neon-misc.c
index 4f85f67cdaec..ed9af88b56c1 100644
--- a/clang/test/CodeGen/aarch64-neon-misc.c
+++ b/clang/test/CodeGen/aarch64-neon-misc.c
@@ -2287,7 +2287,7 @@ float64x2_t test_vcvt_high_f64_f32(float32x4_t a) {
 
 // CHECK-LABEL: @test_vrndnq_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[VRNDN1_I:%.*]] = call <2 x double> 
@llvm.aarch64.neon.frintn.v2f64(<2 x double> %a)
+// CHECK:   [[VRNDN1_I:%.*]] = call <2 x double> @llvm.roundeven.v2f64(<2 x 
double> %a)
 // CHECK:   ret <2 x double> [[VRNDN1_I]]
 float64x2_t test_vrndnq_f64(float64x2_t a) {
   return vrndnq_f64(a);

diff  --git a/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c 
b/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
index 32161146ef45..01df5b0d1930 100644
--- a/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
@@ -366,7 +366,7 @@ float16_t test_vrndmh_f16(float16_t a) {
 }
 
 // CHECK-LABEL: test_vrndnh_f16
-// CHECK:  

[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-17 Thread Bradley Smith via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGcf0da91ba5e1: [AArch64][SVE/NEON] Add support for FROUNDEVEN 
for both NEON and fixed length… (authored by bsmith).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-neon-intrinsics.c
  clang/test/CodeGen/aarch64-neon-misc.c
  clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
  clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
  clang/test/CodeGen/arm-neon-directed-rounding.c
  clang/test/CodeGen/arm64-vrnd.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/include/llvm/Target/TargetSelectionDAG.td
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/arm64-vcvt.ll
  llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
  llvm/test/CodeGen/AArch64/f16-instructions.ll
  llvm/test/CodeGen/AArch64/fp-intrinsics.ll
  llvm/test/CodeGen/AArch64/frintn.ll
  llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
  llvm/test/CodeGen/AArch64/vec-libcalls.ll

Index: llvm/test/CodeGen/AArch64/vec-libcalls.ll
===
--- llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ llvm/test/CodeGen/AArch64/vec-libcalls.ll
@@ -29,6 +29,7 @@
 declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>)
 declare <3 x float> @llvm.rint.v3f32(<3 x float>)
 declare <3 x float> @llvm.round.v3f32(<3 x float>)
+declare <3 x float> @llvm.roundeven.v3f32(<3 x float>)
 declare <3 x float> @llvm.sqrt.v3f32(<3 x float>)
 declare <3 x float> @llvm.trunc.v3f32(<3 x float>)
 
@@ -478,6 +479,15 @@
   ret <3 x float> %r
 }
 
+define <3 x float> @roundeven_v3f32(<3 x float> %x) nounwind {
+; CHECK-LABEL: roundeven_v3f32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:frintn v0.4s, v0.4s
+; CHECK-NEXT:ret
+  %r = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
+  ret <3 x float> %r
+}
+
 define <3 x float> @sqrt_v3f32(<3 x float> %x) nounwind {
 ; CHECK-LABEL: sqrt_v3f32:
 ; CHECK:   // %bb.0:
Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
===
--- llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
+++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
@@ -1255,6 +1255,253 @@
   ret void
 }
 
+;
+; ROUNDEVEN -> FRINTN
+;
+
+; Don't use SVE for 64-bit vectors.
+define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
+; CHECK-LABEL: frintn_v4f16:
+; CHECK: frintn v0.4h, v0.4h
+; CHECK-NEXT: ret
+  %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
+  ret <4 x half> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
+; CHECK-LABEL: frintn_v8f16:
+; CHECK: frintn v0.8h, v0.8h
+; CHECK-NEXT: ret
+  %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
+  ret <8 x half> %res
+}
+
+define void @frintn_v16f16(<16 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK-NEXT: ret
+  %op = load <16 x half>, <16 x half>* %a
+  %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
+  store <16 x half> %res, <16 x half>* %a
+  ret void
+}
+
+define void @frintn_v32f16(<32 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v32f16:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
+; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
+; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+  %op = load <32 x half>, <32 x half>* %a
+  %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
+  store <32 x half> %res, <32 x half>* %a
+  ret void
+}
+
+define void @frintn_v64f16(<64 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v64f16:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
+; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_1024-NEXT: 

[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-16 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 330951.
bsmith added a comment.

- Remove `SDTFPRoundEvenOp` as it's not a correct mirror of `SDTFPRoundOp` 
since that is not for `ISD::FROUND`.
- Fix comments in `include/llvm/Target/TargetSelectionDAG.td` for 
`SDTFPRoundOp` and `SDTFPExtendOp`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-neon-intrinsics.c
  clang/test/CodeGen/aarch64-neon-misc.c
  clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
  clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
  clang/test/CodeGen/arm-neon-directed-rounding.c
  clang/test/CodeGen/arm64-vrnd.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/include/llvm/Target/TargetSelectionDAG.td
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/arm64-vcvt.ll
  llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
  llvm/test/CodeGen/AArch64/f16-instructions.ll
  llvm/test/CodeGen/AArch64/fp-intrinsics.ll
  llvm/test/CodeGen/AArch64/frintn.ll
  llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
  llvm/test/CodeGen/AArch64/vec-libcalls.ll

Index: llvm/test/CodeGen/AArch64/vec-libcalls.ll
===
--- llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ llvm/test/CodeGen/AArch64/vec-libcalls.ll
@@ -29,6 +29,7 @@
 declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>)
 declare <3 x float> @llvm.rint.v3f32(<3 x float>)
 declare <3 x float> @llvm.round.v3f32(<3 x float>)
+declare <3 x float> @llvm.roundeven.v3f32(<3 x float>)
 declare <3 x float> @llvm.sqrt.v3f32(<3 x float>)
 declare <3 x float> @llvm.trunc.v3f32(<3 x float>)
 
@@ -478,6 +479,15 @@
   ret <3 x float> %r
 }
 
+define <3 x float> @roundeven_v3f32(<3 x float> %x) nounwind {
+; CHECK-LABEL: roundeven_v3f32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:frintn v0.4s, v0.4s
+; CHECK-NEXT:ret
+  %r = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
+  ret <3 x float> %r
+}
+
 define <3 x float> @sqrt_v3f32(<3 x float> %x) nounwind {
 ; CHECK-LABEL: sqrt_v3f32:
 ; CHECK:   // %bb.0:
Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
===
--- llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
+++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
@@ -1255,6 +1255,253 @@
   ret void
 }
 
+;
+; ROUNDEVEN -> FRINTN
+;
+
+; Don't use SVE for 64-bit vectors.
+define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
+; CHECK-LABEL: frintn_v4f16:
+; CHECK: frintn v0.4h, v0.4h
+; CHECK-NEXT: ret
+  %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
+  ret <4 x half> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
+; CHECK-LABEL: frintn_v8f16:
+; CHECK: frintn v0.8h, v0.8h
+; CHECK-NEXT: ret
+  %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
+  ret <8 x half> %res
+}
+
+define void @frintn_v16f16(<16 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK-NEXT: ret
+  %op = load <16 x half>, <16 x half>* %a
+  %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
+  store <16 x half> %res, <16 x half>* %a
+  ret void
+}
+
+define void @frintn_v32f16(<32 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v32f16:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
+; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
+; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+  %op = load <32 x half>, <32 x half>* %a
+  %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
+  store <32 x half> %res, <32 x half>* %a
+  ret void
+}
+
+define void @frintn_v64f16(<64 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v64f16:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
+; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]

[PATCH] D98030: [IR] Add vscale_range IR function attribute

2021-03-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 330691.
bsmith marked 3 inline comments as done.
bsmith added a comment.

- Prevent vscale_range(0,0) from crashing and instead don't add the attribute
- Improve CHECK lines in arm-sve-vector-bits-vscale-range.c test
- Test vscale_range(0,0) case and move some tests around


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98030/new/

https://reviews.llvm.org/D98030

Files:
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
  llvm/docs/BitCodeFormat.rst
  llvm/docs/LangRef.rst
  llvm/include/llvm/Bitcode/LLVMBitCodes.h
  llvm/include/llvm/IR/Attributes.h
  llvm/include/llvm/IR/Attributes.td
  llvm/lib/AsmParser/LLLexer.cpp
  llvm/lib/AsmParser/LLParser.cpp
  llvm/lib/AsmParser/LLParser.h
  llvm/lib/AsmParser/LLToken.h
  llvm/lib/Bitcode/Reader/BitcodeReader.cpp
  llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
  llvm/lib/IR/AttributeImpl.h
  llvm/lib/IR/Attributes.cpp
  llvm/lib/IR/Verifier.cpp
  llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
  llvm/lib/Transforms/Utils/CodeExtractor.cpp
  llvm/test/Bitcode/attributes.ll
  llvm/test/Verifier/vscale_range.ll

Index: llvm/test/Verifier/vscale_range.ll
===
--- /dev/null
+++ llvm/test/Verifier/vscale_range.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: 'vscale_range' minimum cannot be greater than maximum
+declare i8* @b(i32*) vscale_range(8, 1)
Index: llvm/test/Bitcode/attributes.ll
===
--- llvm/test/Bitcode/attributes.ll
+++ llvm/test/Bitcode/attributes.ll
@@ -422,6 +422,31 @@
   ret void
 }
 
+; CHECK: define void @f72() #45
+define void @f72() vscale_range(8)
+{
+  ret void
+}
+
+; CHECK: define void @f73() #46
+define void @f73() vscale_range(1,8)
+{
+  ret void
+}
+
+; CHECK: define void @f74() #47
+define void @f74() vscale_range(1,0)
+{
+  ret void
+}
+
+; CHECK: define void @f75()
+; CHECK-NOT: define void @f75() #
+define void @f75() vscale_range(0,0)
+{
+  ret void
+}
+
 ; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { readnone }
@@ -467,4 +492,7 @@
 ; CHECK: attributes #42 = { nocallback }
 ; CHECK: attributes #43 = { cold }
 ; CHECK: attributes #44 = { hot }
+; CHECK: attributes #45 = { vscale_range(8,8) }
+; CHECK: attributes #46 = { vscale_range(1,8) }
+; CHECK: attributes #47 = { vscale_range(1,0) }
 ; CHECK: attributes #[[NOBUILTIN]] = { nobuiltin }
Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp
===
--- llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -970,6 +970,7 @@
   case Attribute::StackProtectStrong:
   case Attribute::StrictFP:
   case Attribute::UWTable:
+  case Attribute::VScaleRange:
   case Attribute::NoCfCheck:
   case Attribute::MustProgress:
   case Attribute::NoProfile:
Index: llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
===
--- llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -73,6 +73,7 @@
   .Case("sspstrong", Attribute::StackProtectStrong)
   .Case("strictfp", Attribute::StrictFP)
   .Case("uwtable", Attribute::UWTable)
+  .Case("vscale_range", Attribute::VScaleRange)
   .Default(Attribute::None);
 }
 
Index: llvm/lib/IR/Verifier.cpp
===
--- llvm/lib/IR/Verifier.cpp
+++ llvm/lib/IR/Verifier.cpp
@@ -1629,6 +1629,7 @@
   case Attribute::InlineHint:
   case Attribute::StackAlignment:
   case Attribute::UWTable:
+  case Attribute::VScaleRange:
   case Attribute::NonLazyBind:
   case Attribute::ReturnsTwice:
   case Attribute::SanitizeAddress:
@@ -1987,6 +1988,14 @@
   return;
   }
 
+  if (Attrs.hasFnAttribute(Attribute::VScaleRange)) {
+std::pair Args =
+Attrs.getVScaleRangeArgs(AttributeList::FunctionIndex);
+
+if (Args.first > Args.second && Args.second != 0)
+  CheckFailed("'vscale_range' minimum cannot be greater than maximum", V);
+  }
+
   if (Attrs.hasFnAttribute("frame-pointer")) {
 StringRef FP = Attrs.getAttribute(AttributeList::FunctionIndex,
   "frame-pointer").getValueAsString();
Index: llvm/lib/IR/Attributes.cpp
===
--- llvm/lib/IR/Attributes.cpp
+++ llvm/lib/IR/Attributes.cpp
@@ -78,6 +78,17 @@
   return std::make_pair(ElemSizeArg, NumElemsArg);
 }
 
+static uint64_t packVScaleRangeArgs(unsigned MinValue, unsigned MaxValue) {
+  return uint64_t(MinValue) << 32 | MaxValue;
+}
+
+static std::pair unpackVScaleRangeArgs(uint64_t Value) {
+  unsigned MaxValue = Value & std::numeric_limits::max();
+  unsigned MinValue 

[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added inline comments.



Comment at: llvm/include/llvm/Target/TargetSelectionDAG.td:158
 ]>;
+def SDTFPRoundEvenOp  : SDTypeProfile<1, 1, [   // froundeven
+  SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, 
SDTCisSameNumEltsAs<0, 1>

dmgreen wrote:
> Is this used? The one above should maybe say `// fpround`?
No it's not, I added it for consistency, but perhaps I shouldn't? I think 
fround is correct for the one above, or at least is consistent with the others 
in this file, for example fextend below.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 330629.
bsmith added a comment.
Herald added a subscriber: dexonsmith.

- Add AutoUpgrade code to convert aarch64.neon.frintn to roundeven
- Add test for above AutoUpgrade


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-neon-intrinsics.c
  clang/test/CodeGen/aarch64-neon-misc.c
  clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
  clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
  clang/test/CodeGen/arm-neon-directed-rounding.c
  clang/test/CodeGen/arm64-vrnd.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/include/llvm/Target/TargetSelectionDAG.td
  llvm/lib/IR/AutoUpgrade.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/arm64-vcvt.ll
  llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
  llvm/test/CodeGen/AArch64/f16-instructions.ll
  llvm/test/CodeGen/AArch64/fp-intrinsics.ll
  llvm/test/CodeGen/AArch64/frintn.ll
  llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
  llvm/test/CodeGen/AArch64/vec-libcalls.ll

Index: llvm/test/CodeGen/AArch64/vec-libcalls.ll
===
--- llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ llvm/test/CodeGen/AArch64/vec-libcalls.ll
@@ -29,6 +29,7 @@
 declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>)
 declare <3 x float> @llvm.rint.v3f32(<3 x float>)
 declare <3 x float> @llvm.round.v3f32(<3 x float>)
+declare <3 x float> @llvm.roundeven.v3f32(<3 x float>)
 declare <3 x float> @llvm.sqrt.v3f32(<3 x float>)
 declare <3 x float> @llvm.trunc.v3f32(<3 x float>)
 
@@ -478,6 +479,15 @@
   ret <3 x float> %r
 }
 
+define <3 x float> @roundeven_v3f32(<3 x float> %x) nounwind {
+; CHECK-LABEL: roundeven_v3f32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:frintn v0.4s, v0.4s
+; CHECK-NEXT:ret
+  %r = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
+  ret <3 x float> %r
+}
+
 define <3 x float> @sqrt_v3f32(<3 x float> %x) nounwind {
 ; CHECK-LABEL: sqrt_v3f32:
 ; CHECK:   // %bb.0:
Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
===
--- llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
+++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
@@ -1255,6 +1255,253 @@
   ret void
 }
 
+;
+; ROUNDEVEN -> FRINTN
+;
+
+; Don't use SVE for 64-bit vectors.
+define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
+; CHECK-LABEL: frintn_v4f16:
+; CHECK: frintn v0.4h, v0.4h
+; CHECK-NEXT: ret
+  %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
+  ret <4 x half> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
+; CHECK-LABEL: frintn_v8f16:
+; CHECK: frintn v0.8h, v0.8h
+; CHECK-NEXT: ret
+  %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
+  ret <8 x half> %res
+}
+
+define void @frintn_v16f16(<16 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK-NEXT: ret
+  %op = load <16 x half>, <16 x half>* %a
+  %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
+  store <16 x half> %res, <16 x half>* %a
+  ret void
+}
+
+define void @frintn_v32f16(<32 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v32f16:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
+; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
+; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+  %op = load <32 x half>, <32 x half>* %a
+  %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
+  store <32 x half> %res, <32 x half>* %a
+  ret void
+}
+
+define void @frintn_v64f16(<64 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v64f16:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
+; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_1024-NEXT: ret
+  %op = load <64 x half>, <64 x half>* %a
+  %res 

[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added a comment.

> Why is this patch only changing int_aarch64_neon_frintn and not 
> int_aarch64_sve_frintn?
> Is there a particular reason to do so?

Things are done slightly differently for SVE in this regard, in principal yes, 
we could emit roundeven instead of frintn from the ACLE intrinsic, however all 
of the other ACLE intrinsics also emit SVE specific LLVM intrinsics rather than 
the arch-indep nodes. This patch doesn't change that in order to stay 
consistent, if we did want to change that it should be done as a separate patch 
that changes all of them.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98487/new/

https://reviews.llvm.org/D98487

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98030: [IR] Add vscale_range IR function attribute

2021-03-12 Thread Bradley Smith via Phabricator via cfe-commits
bsmith added inline comments.



Comment at: llvm/lib/IR/Attributes.cpp:570
+Result += utostr(MinValue);
+Result += ',';
+Result += utostr(MaxValue);

peterwaller-arm wrote:
> Nit: The only other precedent I can see for this is `allocsize`. Grepping the 
> code I found this is always written in tests as `allocsize(x, y)`, however, 
> it prints as `allocsize(x,y)` (no space) when done with `-emit-llvm`, 
> regardless of how it was formatted as input. I figure that is an oversight 
> and this should have the space.
I'm reluctant to change `allocsize` and given that is the only other example of 
multiple parameters to an attribute like this, I'm inclined to stay consistent 
with it. I also think there is an argument to be made about not intruding extra 
spaces in a potentially already cluttered attributes section.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98030/new/

https://reviews.llvm.org/D98030

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98030: [IR] Add vscale_range IR function attribute

2021-03-12 Thread Bradley Smith via Phabricator via cfe-commits
bsmith updated this revision to Diff 330205.
bsmith marked 3 inline comments as done.
bsmith added a comment.

- State what lack of vscale_range attribute means in LangRef
- Minor formatting change


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98030/new/

https://reviews.llvm.org/D98030

Files:
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
  llvm/docs/BitCodeFormat.rst
  llvm/docs/LangRef.rst
  llvm/include/llvm/Bitcode/LLVMBitCodes.h
  llvm/include/llvm/IR/Attributes.h
  llvm/include/llvm/IR/Attributes.td
  llvm/lib/AsmParser/LLLexer.cpp
  llvm/lib/AsmParser/LLParser.cpp
  llvm/lib/AsmParser/LLParser.h
  llvm/lib/AsmParser/LLToken.h
  llvm/lib/Bitcode/Reader/BitcodeReader.cpp
  llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
  llvm/lib/IR/AttributeImpl.h
  llvm/lib/IR/Attributes.cpp
  llvm/lib/IR/Verifier.cpp
  llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
  llvm/lib/Transforms/Utils/CodeExtractor.cpp
  llvm/test/Bitcode/attributes.ll
  llvm/test/Verifier/vscale_range.ll

Index: llvm/test/Verifier/vscale_range.ll
===
--- /dev/null
+++ llvm/test/Verifier/vscale_range.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK-NOT: 'vscale_range' minimum cannot be greater than maximum
+declare i8* @a(i32) vscale_range(1, 0)
+
+; CHECK: 'vscale_range' minimum cannot be greater than maximum
+declare i8* @b(i32*) vscale_range(8, 1)
Index: llvm/test/Bitcode/attributes.ll
===
--- llvm/test/Bitcode/attributes.ll
+++ llvm/test/Bitcode/attributes.ll
@@ -422,6 +422,18 @@
   ret void
 }
 
+; CHECK: define void @f72() #45
+define void @f72() vscale_range(8)
+{
+  ret void
+}
+
+; CHECK: define void @f73() #46
+define void @f73() vscale_range(1,8)
+{
+  ret void
+}
+
 ; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { readnone }
@@ -467,4 +479,6 @@
 ; CHECK: attributes #42 = { nocallback }
 ; CHECK: attributes #43 = { cold }
 ; CHECK: attributes #44 = { hot }
+; CHECK: attributes #45 = { vscale_range(8,8) }
+; CHECK: attributes #46 = { vscale_range(1,8) }
 ; CHECK: attributes #[[NOBUILTIN]] = { nobuiltin }
Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp
===
--- llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -970,6 +970,7 @@
   case Attribute::StackProtectStrong:
   case Attribute::StrictFP:
   case Attribute::UWTable:
+  case Attribute::VScaleRange:
   case Attribute::NoCfCheck:
   case Attribute::MustProgress:
   case Attribute::NoProfile:
Index: llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
===
--- llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -73,6 +73,7 @@
   .Case("sspstrong", Attribute::StackProtectStrong)
   .Case("strictfp", Attribute::StrictFP)
   .Case("uwtable", Attribute::UWTable)
+  .Case("vscale_range", Attribute::VScaleRange)
   .Default(Attribute::None);
 }
 
Index: llvm/lib/IR/Verifier.cpp
===
--- llvm/lib/IR/Verifier.cpp
+++ llvm/lib/IR/Verifier.cpp
@@ -1629,6 +1629,7 @@
   case Attribute::InlineHint:
   case Attribute::StackAlignment:
   case Attribute::UWTable:
+  case Attribute::VScaleRange:
   case Attribute::NonLazyBind:
   case Attribute::ReturnsTwice:
   case Attribute::SanitizeAddress:
@@ -1987,6 +1988,14 @@
   return;
   }
 
+  if (Attrs.hasFnAttribute(Attribute::VScaleRange)) {
+std::pair Args =
+Attrs.getVScaleRangeArgs(AttributeList::FunctionIndex);
+
+if (Args.first > Args.second && Args.second != 0)
+  CheckFailed("'vscale_range' minimum cannot be greater than maximum", V);
+  }
+
   if (Attrs.hasFnAttribute("frame-pointer")) {
 StringRef FP = Attrs.getAttribute(AttributeList::FunctionIndex,
   "frame-pointer").getValueAsString();
Index: llvm/lib/IR/Attributes.cpp
===
--- llvm/lib/IR/Attributes.cpp
+++ llvm/lib/IR/Attributes.cpp
@@ -78,6 +78,17 @@
   return std::make_pair(ElemSizeArg, NumElemsArg);
 }
 
+static uint64_t packVScaleRangeArgs(unsigned MinValue, unsigned MaxValue) {
+  return uint64_t(MinValue) << 32 | MaxValue;
+}
+
+static std::pair unpackVScaleRangeArgs(uint64_t Value) {
+  unsigned MaxValue = Value & std::numeric_limits::max();
+  unsigned MinValue = Value >> 32;
+
+  return std::make_pair(MinValue, MaxValue);
+}
+
 Attribute Attribute::get(LLVMContext , Attribute::AttrKind Kind,
  uint64_t Val) {
   LLVMContextImpl *pImpl = Context.pImpl;
@@ -192,6 +203,12 @@
   return get(Context, 

[PATCH] D98487: [AArch64][SVE/NEON] Add support for FROUNDEVEN for both NEON and fixed length SVE

2021-03-12 Thread Bradley Smith via Phabricator via cfe-commits
bsmith created this revision.
bsmith added reviewers: paulwalker-arm, peterwaller-arm, joechrisellis, 
CarolineConcatto, dmgreen.
Herald added subscribers: hiraditya, kristof.beyls, tschuett.
bsmith requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Previously NEON used a target specific intrinsic for frintn, given that
the FROUNDEVEN ISD node now exists, move over to that instead and add
codegen support for that node for both NEON and fixed length SVE.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D98487

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-neon-intrinsics.c
  clang/test/CodeGen/aarch64-neon-misc.c
  clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
  clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
  clang/test/CodeGen/arm-neon-directed-rounding.c
  clang/test/CodeGen/arm64-vrnd.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/include/llvm/Target/TargetSelectionDAG.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/arm64-vcvt.ll
  llvm/test/CodeGen/AArch64/arm64-vfloatintrinsics.ll
  llvm/test/CodeGen/AArch64/f16-instructions.ll
  llvm/test/CodeGen/AArch64/fp-intrinsics.ll
  llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
  llvm/test/CodeGen/AArch64/vec-libcalls.ll

Index: llvm/test/CodeGen/AArch64/vec-libcalls.ll
===
--- llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ llvm/test/CodeGen/AArch64/vec-libcalls.ll
@@ -29,6 +29,7 @@
 declare <3 x float> @llvm.nearbyint.v3f32(<3 x float>)
 declare <3 x float> @llvm.rint.v3f32(<3 x float>)
 declare <3 x float> @llvm.round.v3f32(<3 x float>)
+declare <3 x float> @llvm.roundeven.v3f32(<3 x float>)
 declare <3 x float> @llvm.sqrt.v3f32(<3 x float>)
 declare <3 x float> @llvm.trunc.v3f32(<3 x float>)
 
@@ -478,6 +479,15 @@
   ret <3 x float> %r
 }
 
+define <3 x float> @roundeven_v3f32(<3 x float> %x) nounwind {
+; CHECK-LABEL: roundeven_v3f32:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:frintn v0.4s, v0.4s
+; CHECK-NEXT:ret
+  %r = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x)
+  ret <3 x float> %r
+}
+
 define <3 x float> @sqrt_v3f32(<3 x float> %x) nounwind {
 ; CHECK-LABEL: sqrt_v3f32:
 ; CHECK:   // %bb.0:
Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
===
--- llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
+++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll
@@ -1255,6 +1255,253 @@
   ret void
 }
 
+;
+; ROUNDEVEN -> FRINTN
+;
+
+; Don't use SVE for 64-bit vectors.
+define <4 x half> @frintn_v4f16(<4 x half> %op) #0 {
+; CHECK-LABEL: frintn_v4f16:
+; CHECK: frintn v0.4h, v0.4h
+; CHECK-NEXT: ret
+  %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
+  ret <4 x half> %res
+}
+
+; Don't use SVE for 128-bit vectors.
+define <8 x half> @frintn_v8f16(<8 x half> %op) #0 {
+; CHECK-LABEL: frintn_v8f16:
+; CHECK: frintn v0.8h, v0.8h
+; CHECK-NEXT: ret
+  %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
+  ret <8 x half> %res
+}
+
+define void @frintn_v16f16(<16 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v16f16:
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK-NEXT: ret
+  %op = load <16 x half>, <16 x half>* %a
+  %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
+  store <16 x half> %res, <16 x half>* %a
+  ret void
+}
+
+define void @frintn_v32f16(<32 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v32f16:
+; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
+; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h
+; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; VBITS_GE_512-NEXT: ret
+
+; Ensure sensible type legalisation.
+; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
+; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32
+; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0]
+; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]]
+; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h
+; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h
+; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0]
+; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]
+; VBITS_EQ_256-NEXT: ret
+  %op = load <32 x half>, <32 x half>* %a
+  %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op)
+  store <32 x half> %res, <32 x half>* %a
+  ret void
+}
+
+define void @frintn_v64f16(<64 x half>* %a) #0 {
+; CHECK-LABEL: frintn_v64f16:
+; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
+; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; 

[PATCH] D98030: [IR] Add vscale_range IR function attribute

2021-03-05 Thread Bradley Smith via Phabricator via cfe-commits
bsmith created this revision.
bsmith added reviewers: paulwalker-arm, joechrisellis, peterwaller-arm.
Herald added subscribers: dexonsmith, jdoerfert, steven_wu, hiraditya.
bsmith requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

This attribute represents the minimum and maximum values vscale can
take. For now this attribute is not hooked up to anything during
codegen, this will be added in the future when such codegen is
considered stable.

Additionally hook up the -msve-vector-bits= clang option to emit this
attribute.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D98030

Files:
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
  llvm/docs/BitCodeFormat.rst
  llvm/docs/LangRef.rst
  llvm/include/llvm/Bitcode/LLVMBitCodes.h
  llvm/include/llvm/IR/Attributes.h
  llvm/include/llvm/IR/Attributes.td
  llvm/lib/AsmParser/LLLexer.cpp
  llvm/lib/AsmParser/LLParser.cpp
  llvm/lib/AsmParser/LLParser.h
  llvm/lib/AsmParser/LLToken.h
  llvm/lib/Bitcode/Reader/BitcodeReader.cpp
  llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
  llvm/lib/IR/AttributeImpl.h
  llvm/lib/IR/Attributes.cpp
  llvm/lib/IR/Verifier.cpp
  llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
  llvm/lib/Transforms/Utils/CodeExtractor.cpp
  llvm/test/Bitcode/attributes.ll
  llvm/test/Verifier/vscale_range.ll

Index: llvm/test/Verifier/vscale_range.ll
===
--- /dev/null
+++ llvm/test/Verifier/vscale_range.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK-NOT: 'vscale_range' minimum cannot be greater than maximum
+declare i8* @a(i32) vscale_range(1, 0)
+
+; CHECK: 'vscale_range' minimum cannot be greater than maximum
+declare i8* @b(i32*) vscale_range(8, 1)
Index: llvm/test/Bitcode/attributes.ll
===
--- llvm/test/Bitcode/attributes.ll
+++ llvm/test/Bitcode/attributes.ll
@@ -422,6 +422,18 @@
   ret void
 }
 
+; CHECK: define void @f72() #45
+define void @f72() vscale_range(8)
+{
+  ret void
+}
+
+; CHECK: define void @f73() #46
+define void @f73() vscale_range(1,8)
+{
+  ret void
+}
+
 ; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { readnone }
@@ -467,4 +479,6 @@
 ; CHECK: attributes #42 = { nocallback }
 ; CHECK: attributes #43 = { cold }
 ; CHECK: attributes #44 = { hot }
+; CHECK: attributes #45 = { vscale_range(8,8) }
+; CHECK: attributes #46 = { vscale_range(1,8) }
 ; CHECK: attributes #[[NOBUILTIN]] = { nobuiltin }
Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp
===
--- llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -970,6 +970,7 @@
   case Attribute::StackProtectStrong:
   case Attribute::StrictFP:
   case Attribute::UWTable:
+  case Attribute::VScaleRange:
   case Attribute::NoCfCheck:
   case Attribute::MustProgress:
   case Attribute::NoProfile:
Index: llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
===
--- llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -73,6 +73,7 @@
   .Case("sspstrong", Attribute::StackProtectStrong)
   .Case("strictfp", Attribute::StrictFP)
   .Case("uwtable", Attribute::UWTable)
+  .Case("vscale_range", Attribute::VScaleRange)
   .Default(Attribute::None);
 }
 
Index: llvm/lib/IR/Verifier.cpp
===
--- llvm/lib/IR/Verifier.cpp
+++ llvm/lib/IR/Verifier.cpp
@@ -1622,6 +1622,7 @@
   case Attribute::InlineHint:
   case Attribute::StackAlignment:
   case Attribute::UWTable:
+  case Attribute::VScaleRange:
   case Attribute::NonLazyBind:
   case Attribute::ReturnsTwice:
   case Attribute::SanitizeAddress:
@@ -1980,6 +1981,14 @@
   return;
   }
 
+  if (Attrs.hasFnAttribute(Attribute::VScaleRange)) {
+std::pair Args =
+Attrs.getVScaleRangeArgs(AttributeList::FunctionIndex);
+
+if (Args.first > Args.second && Args.second != 0)
+  CheckFailed("'vscale_range' minimum cannot be greater than maximum", V);
+  }
+
   if (Attrs.hasFnAttribute("frame-pointer")) {
 StringRef FP = Attrs.getAttribute(AttributeList::FunctionIndex,
   "frame-pointer").getValueAsString();
Index: llvm/lib/IR/Attributes.cpp
===
--- llvm/lib/IR/Attributes.cpp
+++ llvm/lib/IR/Attributes.cpp
@@ -78,6 +78,17 @@
   return std::make_pair(ElemSizeArg, NumElemsArg);
 }
 
+static uint64_t packVScaleRangeArgs(unsigned MinValue, unsigned MaxValue) {
+  return uint64_t(MinValue) << 32 | MaxValue;
+}
+
+static std::pair unpackVScaleRangeArgs(uint64_t Value) {
+  

[llvm-branch-commits] [llvm] e0b9c5d - [CostModel] Add costs for llvm.experimental.vector.{extract, insert} intrinsics

2020-12-16 Thread Bradley Smith via llvm-branch-commits

Author: Bradley Smith
Date: 2020-12-16T13:39:04Z
New Revision: e0b9c5df260c9d778be3b6ee56e6d0ffd7af9be7

URL: 
https://github.com/llvm/llvm-project/commit/e0b9c5df260c9d778be3b6ee56e6d0ffd7af9be7
DIFF: 
https://github.com/llvm/llvm-project/commit/e0b9c5df260c9d778be3b6ee56e6d0ffd7af9be7.diff

LOG: [CostModel] Add costs for llvm.experimental.vector.{extract,insert} 
intrinsics

Adds cost model support for the new llvm.experimental.vector.{extract,insert}
intrinsics, using the existing getExtractSubvectorOverhead and
getInsertSubvectorOverhead functions for shuffles.

Previously this case would throw an assertion.

Differential Revision: https://reviews.llvm.org/D93043

Added: 

llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll

Modified: 
llvm/include/llvm/CodeGen/BasicTTIImpl.h

Removed: 




diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 7dca7cd291c9..02f1b73226fc 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -114,12 +114,14 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 
   /// Estimate a cost of subvector extraction as a sequence of extract and
   /// insert operations.
-  unsigned getExtractSubvectorOverhead(FixedVectorType *VTy, int Index,
+  unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index,
FixedVectorType *SubVTy) {
 assert(VTy && SubVTy &&
"Can only extract subvectors from vectors");
 int NumSubElts = SubVTy->getNumElements();
-assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&
+assert((!isa(VTy) ||
+(Index + NumSubElts) <=
+(int)cast(VTy)->getNumElements()) &&
"SK_ExtractSubvector index out of range");
 
 unsigned Cost = 0;
@@ -137,12 +139,14 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 
   /// Estimate a cost of subvector insertion as a sequence of extract and
   /// insert operations.
-  unsigned getInsertSubvectorOverhead(FixedVectorType *VTy, int Index,
+  unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index,
   FixedVectorType *SubVTy) {
 assert(VTy && SubVTy &&
"Can only insert subvectors into vectors");
 int NumSubElts = SubVTy->getNumElements();
-assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&
+assert((!isa(VTy) ||
+(Index + NumSubElts) <=
+(int)cast(VTy)->getNumElements()) &&
"SK_InsertSubvector index out of range");
 
 unsigned Cost = 0;
@@ -723,10 +727,10 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case TTI::SK_PermuteTwoSrc:
   return getPermuteShuffleOverhead(cast(Tp));
 case TTI::SK_ExtractSubvector:
-  return getExtractSubvectorOverhead(cast(Tp), Index,
+  return getExtractSubvectorOverhead(Tp, Index,
  cast(SubTp));
 case TTI::SK_InsertSubvector:
-  return getInsertSubvectorOverhead(cast(Tp), Index,
+  return getInsertSubvectorOverhead(Tp, Index,
 cast(SubTp));
 }
 llvm_unreachable("Unknown TTI::ShuffleKind");
@@ -1255,6 +1259,26 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
  VarMask, Alignment, CostKind, I);
 }
+case Intrinsic::experimental_vector_extract: {
+  // FIXME: Handle case where a scalable vector is extracted from a 
scalable
+  // vector
+  if (isa(RetTy))
+return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+  unsigned Index = cast(Args[1])->getZExtValue();
+  return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
+ cast(Args[0]->getType()),
+ Index, cast(RetTy));
+}
+case Intrinsic::experimental_vector_insert: {
+  // FIXME: Handle case where a scalable vector is inserted into a scalable
+  // vector
+  if (isa(Args[1]->getType()))
+return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+  unsigned Index = cast(Args[2])->getZExtValue();
+  return thisT()->getShuffleCost(
+  TTI::SK_InsertSubvector, cast(Args[0]->getType()), Index,
+  cast(Args[1]->getType()));
+}
 case Intrinsic::vector_reduce_add:
 case Intrinsic::vector_reduce_mul:
 case Intrinsic::vector_reduce_and:

diff  --git 
a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll
 
b/llvm/te

[PATCH] D1810: [ARM] Fix AArch32 and pre-v8 poly types to be unsigned

2020-12-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith abandoned this revision.
bsmith added a comment.
Herald added subscribers: danielkiss, kristof.beyls.

This change is very old and almost certainly out of date, therefore abandoning.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D1810/new/

https://reviews.llvm.org/D1810

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D93206: [AArch64][NEON] Remove undocumented vceqz{,q}_p16, vml{a,s}q_n_f64 intrinsics

2020-12-15 Thread Bradley Smith via Phabricator via cfe-commits
bsmith accepted this revision.
bsmith added a comment.
This revision is now accepted and ready to land.

Changes look good to me, also can confirm these are in fact not part of the 
ACLE specification.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93206/new/

https://reviews.llvm.org/D93206

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[Bug 1493587] Re: Trash does not restore files

2018-09-01 Thread Richard Bradley Smith via ubuntu-bugs
*** This bug is a duplicate of bug 1495943 ***
https://bugs.launchpad.net/bugs/1495943

I just went to Ubuntu 8 LTS (yes, The Beaver). Trash does not restore.

-- 
You received this bug notification because you are a member of Ubuntu
Bugs, which is subscribed to Ubuntu.
https://bugs.launchpad.net/bugs/1493587

Title:
  Trash does not restore files

To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu/+source/nautilus/+bug/1493587/+subscriptions

-- 
ubuntu-bugs mailing list
ubuntu-bugs@lists.ubuntu.com
https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs

[Bug 1495943] Re: gvfs-trash doesn't add "trash::" attrs to file which is moved to Trash

2018-09-01 Thread Richard Bradley Smith via ubuntu-bugs
I just went to Ubuntu 8 LTS (yes, The Beaver). Trash does not restore.

-- 
You received this bug notification because you are a member of Ubuntu
Bugs, which is subscribed to Ubuntu.
https://bugs.launchpad.net/bugs/1495943

Title:
  gvfs-trash doesn't add "trash::" attrs to file which is moved to Trash

To manage notifications about this bug go to:
https://bugs.launchpad.net/glib/+bug/1495943/+subscriptions

-- 
ubuntu-bugs mailing list
ubuntu-bugs@lists.ubuntu.com
https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs

[Desktop-packages] [Bug 1495943] Re: gvfs-trash doesn't add "trash::" attrs to file which is moved to Trash

2018-09-01 Thread Richard Bradley Smith
I just went to Ubuntu 8 LTS (yes, The Beaver). Trash does not restore.

-- 
You received this bug notification because you are a member of Desktop
Packages, which is subscribed to glib2.0 in Ubuntu.
https://bugs.launchpad.net/bugs/1495943

Title:
  gvfs-trash doesn't add "trash::" attrs to file which is moved to Trash

Status in GLib:
  Fix Released
Status in glib2.0 package in Ubuntu:
  Fix Released
Status in gvfs package in Ubuntu:
  Fix Released
Status in glib2.0 package in Debian:
  Fix Released

Bug description:
  [System]

  Ubuntu MATE 15.10, fully updated.

  [Steps to reproduce]

  1. Create an empty file: touch ~/test
  2. Move it to trash: gvfs-trash ~/test
  3. Launch some file manager, browse Trash folder and try to restore the file. 
I've tested it with the following file managers: Caja, Nemo, Nautilus and 
Thunar.

  [Expected result]

  The file should be restored to its original location.

  [What happens instead]

  The file manager complains that it can't determine the original
  location of the file, and so doesn't restore it.

  [Details]

  When moving file to Trash, gvfs-trash does not add trash::orig-path
  and trash::deletion-date attributes to it. Without that the file
  manager can't determine the original location of the file, and hence
  can't restore it.

  You can check the file attributes by printing the moved file's info:
  gvfs-info trash:///test

  [Regression description]

  Restoring the files has been working fine in Ubuntu 14.04 and 15.04.
  This bug appeared only in 15.10, so it's a regression.

  You can see the gvfs-info output from 15.04 and 15.10 in the
  attachments below.

To manage notifications about this bug go to:
https://bugs.launchpad.net/glib/+bug/1495943/+subscriptions

-- 
Mailing list: https://launchpad.net/~desktop-packages
Post to : desktop-packages@lists.launchpad.net
Unsubscribe : https://launchpad.net/~desktop-packages
More help   : https://help.launchpad.net/ListHelp


[Desktop-packages] [Bug 1493587] Re: Trash does not restore files

2018-09-01 Thread Richard Bradley Smith
*** This bug is a duplicate of bug 1495943 ***
https://bugs.launchpad.net/bugs/1495943

I just went to Ubuntu 8 LTS (yes, The Beaver). Trash does not restore.

-- 
You received this bug notification because you are a member of Desktop
Packages, which is subscribed to nautilus in Ubuntu.
https://bugs.launchpad.net/bugs/1493587

Title:
  Trash does not restore files

Status in nautilus package in Ubuntu:
  New

Bug description:
  For some reason, files inside "Trash" cannot be restored. Whenever I
  try restoring a file that was deleted or moved to trash, I get this
  error,

  "Could not determine original location of 'My file'. The item cannot
  be restored from trash".

  nautilus:
Installed: 1:3.14.2-0ubuntu10
Candidate: 1:3.14.2-0ubuntu10
Version table:
   *** 1:3.14.2-0ubuntu10 0
  500 http://us.archive.ubuntu.com/ubuntu/ wily/main i386 Packages
  100 /var/lib/dpkg/status

  Distributor ID:   Ubuntu
  Description:  Ubuntu Wily Werewolf (development branch)
  Release:  15.10
  Codename: wily

  I expected files in trash can be restored to their original location.

  Trash was unable to restore files.

  ProblemType: Bug
  DistroRelease: Ubuntu 15.10
  Package: nautilus 1:3.14.2-0ubuntu10
  ProcVersionSignature: Ubuntu 4.1.0-3.3-generic 4.1.3
  Uname: Linux 4.1.0-3-generic i686
  ApportVersion: 2.18-0ubuntu9
  Architecture: i386
  Date: Tue Sep  8 18:56:44 2015
  GsettingsChanges:
   
  InstallationDate: Installed on 2015-05-18 (113 days ago)
  InstallationMedia: Lubuntu 15.10 "Wily Werewolf" - Alpha i386 (20150517)
  ProcEnviron:
   LANGUAGE=en_US
   TERM=xterm-256color
   PATH=(custom, no user)
   LANG=en_US.UTF-8
   SHELL=/bin/bash
  SourcePackage: nautilus
  UpgradeStatus: No upgrade log present (probably fresh install)

To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu/+source/nautilus/+bug/1493587/+subscriptions

-- 
Mailing list: https://launchpad.net/~desktop-packages
Post to : desktop-packages@lists.launchpad.net
Unsubscribe : https://launchpad.net/~desktop-packages
More help   : https://help.launchpad.net/ListHelp


[Touch-packages] [Bug 1495943] Re: gvfs-trash doesn't add "trash::" attrs to file which is moved to Trash

2018-09-01 Thread Richard Bradley Smith
I just went to Ubuntu 8 LTS (yes, The Beaver). Trash does not restore.

-- 
You received this bug notification because you are a member of Ubuntu
Touch seeded packages, which is subscribed to glib2.0 in Ubuntu.
https://bugs.launchpad.net/bugs/1495943

Title:
  gvfs-trash doesn't add "trash::" attrs to file which is moved to Trash

Status in GLib:
  Fix Released
Status in glib2.0 package in Ubuntu:
  Fix Released
Status in gvfs package in Ubuntu:
  Fix Released
Status in glib2.0 package in Debian:
  Fix Released

Bug description:
  [System]

  Ubuntu MATE 15.10, fully updated.

  [Steps to reproduce]

  1. Create an empty file: touch ~/test
  2. Move it to trash: gvfs-trash ~/test
  3. Launch some file manager, browse Trash folder and try to restore the file. 
I've tested it with the following file managers: Caja, Nemo, Nautilus and 
Thunar.

  [Expected result]

  The file should be restored to its original location.

  [What happens instead]

  The file manager complains that it can't determine the original
  location of the file, and so doesn't restore it.

  [Details]

  When moving file to Trash, gvfs-trash does not add trash::orig-path
  and trash::deletion-date attributes to it. Without that the file
  manager can't determine the original location of the file, and hence
  can't restore it.

  You can check the file attributes by printing the moved file's info:
  gvfs-info trash:///test

  [Regression description]

  Restoring the files has been working fine in Ubuntu 14.04 and 15.04.
  This bug appeared only in 15.10, so it's a regression.

  You can see the gvfs-info output from 15.04 and 15.10 in the
  attachments below.

To manage notifications about this bug go to:
https://bugs.launchpad.net/glib/+bug/1495943/+subscriptions

-- 
Mailing list: https://launchpad.net/~touch-packages
Post to : touch-packages@lists.launchpad.net
Unsubscribe : https://launchpad.net/~touch-packages
More help   : https://help.launchpad.net/ListHelp


[Bug 1493587] Re: Trash does not restore files

2018-09-01 Thread Richard Bradley Smith
*** This bug is a duplicate of bug 1495943 ***
https://bugs.launchpad.net/bugs/1495943

I just went to Ubuntu 8 LTS (yes, The Beaver). Trash does not restore.

-- 
You received this bug notification because you are a member of Ubuntu
Desktop Bugs, which is subscribed to nautilus in Ubuntu.
https://bugs.launchpad.net/bugs/1493587

Title:
  Trash does not restore files

To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu/+source/nautilus/+bug/1493587/+subscriptions

-- 
desktop-bugs mailing list
desktop-bugs@lists.ubuntu.com
https://lists.ubuntu.com/mailman/listinfo/desktop-bugs

[Bug 1495943] Re: gvfs-trash doesn't add "trash::" attrs to file which is moved to Trash

2018-09-01 Thread Richard Bradley Smith
I just went to Ubuntu 8 LTS (yes, The Beaver). Trash does not restore.

-- 
You received this bug notification because you are a member of Ubuntu
Desktop Bugs, which is subscribed to gvfs in Ubuntu.
https://bugs.launchpad.net/bugs/1495943

Title:
  gvfs-trash doesn't add "trash::" attrs to file which is moved to Trash

To manage notifications about this bug go to:
https://bugs.launchpad.net/glib/+bug/1495943/+subscriptions

-- 
desktop-bugs mailing list
desktop-bugs@lists.ubuntu.com
https://lists.ubuntu.com/mailman/listinfo/desktop-bugs

[Bug 1736013] [NEW] package cryptsetup 2:1.6.6-5ubuntu2.1 failed to install/upgrade: subprocess installed post-installation script returned error exit status 1

2017-12-03 Thread Bradley Smith
*** This bug is a duplicate of bug 1717845 ***
https://bugs.launchpad.net/bugs/1717845

Public bug reported:

Was updating Ubuntu and this happened

ProblemType: Package
DistroRelease: Ubuntu 16.04
Package: cryptsetup 2:1.6.6-5ubuntu2.1
ProcVersionSignature: Ubuntu 4.10.0-28.32~16.04.2-generic 4.10.17
Uname: Linux 4.10.0-28-generic i686
ApportVersion: 2.20.1-0ubuntu2.13
Architecture: i386
CasperVersion: 1.376.2
Date: Sun Dec  3 17:22:16 2017
ErrorMessage: subprocess installed post-installation script returned error exit 
status 1
LiveMediaBuild: Ubuntu 16.04.3 LTS "Xenial Xerus" - Release i386 (20170801)
RelatedPackageVersions:
 dpkg 1.18.4ubuntu1.3
 apt  1.2.24
SourcePackage: cryptsetup
Title: package cryptsetup 2:1.6.6-5ubuntu2.1 failed to install/upgrade: 
subprocess installed post-installation script returned error exit status 1
UpgradeStatus: No upgrade log present (probably fresh install)
crypttab: #   

fstab:
 aufs / aufs rw 0 0
 tmpfs /tmp tmpfs nosuid,nodev 0 0

** Affects: cryptsetup (Ubuntu)
 Importance: Undecided
 Status: New


** Tags: apport-package i386 xenial

-- 
You received this bug notification because you are a member of Ubuntu
Bugs, which is subscribed to Ubuntu.
https://bugs.launchpad.net/bugs/1736013

Title:
  package cryptsetup 2:1.6.6-5ubuntu2.1 failed to install/upgrade:
  subprocess installed post-installation script returned error exit
  status 1

To manage notifications about this bug go to:
https://bugs.launchpad.net/ubuntu/+source/cryptsetup/+bug/1736013/+subscriptions

-- 
ubuntu-bugs mailing list
ubuntu-bugs@lists.ubuntu.com
https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs

[Desktop-packages] [Bug 1584457] Re: gsd-backlight-helper spamming the logs since upate to 16.04

2017-11-23 Thread bradley smith
This appears to be fixed for me following update to latest release.

:-)
Thanks

Artful
ASUS UX305FA

-- 
You received this bug notification because you are a member of Desktop
Packages, which is subscribed to gnome-settings-daemon in Ubuntu.
https://bugs.launchpad.net/bugs/1584457

Title:
  gsd-backlight-helper spamming the logs since upate to 16.04

Status in GNOME Settings Daemon:
  Fix Released
Status in gnome-settings-daemon package in Ubuntu:
  Fix Released
Status in gnome-settings-daemon source package in Xenial:
  Confirmed

Bug description:
  Since updating to 16.04 from 14.04, I have the syslog spammed by:

  SEC: May 22 13:26:43 samsung-romano pkexec[21248]: gdm: Error executing 
command as another user: Not authorized [USER=root] [TTY=unknown] 
[CWD=/var/lib/gdm3] 
[COMMAND=/usr/lib/gnome-settings-daemon/gsd-backlight-helper --set-brightness 
1766]
  SEC: May 22 13:26:43 samsung-romano pkexec: pam_unix(polkit-1:session): 
session opened for user root by (uid=1153)
  SEC: May 22 13:26:43 samsung-romano pkexec: pam_systemd(polkit-1:session): 
Cannot create session: Already running in a session
  SEC: May 22 13:26:43 samsung-romano pkexec: 
pam_ck_connector(polkit-1:session): cannot determine display-device
  SEC: May 22 13:26:43 samsung-romano pkexec[21249]: romano: Executing command 
[USER=root] [TTY=unknown] [CWD=/home/romano] 
[COMMAND=/usr/lib/gnome-settings-daemon/gsd-backlight-helper --set-brightness 
4322]
  SYS: May 22 13:26:43 samsung-romano gnome-session[1454]: Error executing 
command as another user: Not authorized
  SYS: May 22 13:26:43 samsung-romano gnome-session[1454]: This incident has 
been reported.

  every couple of seconds or so. There is no really need to change
  brightness... how can I stop it?

To manage notifications about this bug go to:
https://bugs.launchpad.net/gnome-settings-daemon/+bug/1584457/+subscriptions

-- 
Mailing list: https://launchpad.net/~desktop-packages
Post to : desktop-packages@lists.launchpad.net
Unsubscribe : https://launchpad.net/~desktop-packages
More help   : https://help.launchpad.net/ListHelp


[Bug 1584457] Re: gsd-backlight-helper spamming the logs since upate to 16.04

2017-11-23 Thread bradley smith
This appears to be fixed for me following update to latest release.

:-)
Thanks

Artful
ASUS UX305FA

-- 
You received this bug notification because you are a member of Ubuntu
Bugs, which is subscribed to Ubuntu.
https://bugs.launchpad.net/bugs/1584457

Title:
  gsd-backlight-helper spamming the logs since upate to 16.04

To manage notifications about this bug go to:
https://bugs.launchpad.net/gnome-settings-daemon/+bug/1584457/+subscriptions

-- 
ubuntu-bugs mailing list
ubuntu-bugs@lists.ubuntu.com
https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs

[Bug 1584457] Re: gsd-backlight-helper spamming the logs since upate to 16.04

2017-11-23 Thread bradley smith
This appears to be fixed for me following update to latest release.

:-)
Thanks

Artful
ASUS UX305FA

-- 
You received this bug notification because you are a member of Ubuntu
Desktop Bugs, which is subscribed to gnome-settings-daemon in Ubuntu.
https://bugs.launchpad.net/bugs/1584457

Title:
  gsd-backlight-helper spamming the logs since upate to 16.04

To manage notifications about this bug go to:
https://bugs.launchpad.net/gnome-settings-daemon/+bug/1584457/+subscriptions

-- 
desktop-bugs mailing list
desktop-bugs@lists.ubuntu.com
https://lists.ubuntu.com/mailman/listinfo/desktop-bugs

[Bug 1584457] Re: gsd-backlight-helper spamming the logs since upate to 16.04

2017-11-10 Thread bradley smith
I just upgraded to Artful (17.10) and now my syslog is getting spammed
with this. On an Asus UX305FA.

-- 
You received this bug notification because you are a member of Ubuntu
Bugs, which is subscribed to Ubuntu.
https://bugs.launchpad.net/bugs/1584457

Title:
  gsd-backlight-helper spamming the logs since upate to 16.04

To manage notifications about this bug go to:
https://bugs.launchpad.net/gnome-settings-daemon/+bug/1584457/+subscriptions

-- 
ubuntu-bugs mailing list
ubuntu-bugs@lists.ubuntu.com
https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs

[Bug 1584457] Re: gsd-backlight-helper spamming the logs since upate to 16.04

2017-11-10 Thread bradley smith
I just upgraded to Artful (17.10) and now my syslog is getting spammed
with this. On an Asus UX305FA.

-- 
You received this bug notification because you are a member of Ubuntu
Desktop Bugs, which is subscribed to gnome-settings-daemon in Ubuntu.
https://bugs.launchpad.net/bugs/1584457

Title:
  gsd-backlight-helper spamming the logs since upate to 16.04

To manage notifications about this bug go to:
https://bugs.launchpad.net/gnome-settings-daemon/+bug/1584457/+subscriptions

-- 
desktop-bugs mailing list
desktop-bugs@lists.ubuntu.com
https://lists.ubuntu.com/mailman/listinfo/desktop-bugs

[Desktop-packages] [Bug 1584457] Re: gsd-backlight-helper spamming the logs since upate to 16.04

2017-11-10 Thread bradley smith
I just upgraded to Artful (17.10) and now my syslog is getting spammed
with this. On an Asus UX305FA.

-- 
You received this bug notification because you are a member of Desktop
Packages, which is subscribed to gnome-settings-daemon in Ubuntu.
https://bugs.launchpad.net/bugs/1584457

Title:
  gsd-backlight-helper spamming the logs since upate to 16.04

Status in GNOME Settings Daemon:
  Fix Released
Status in gnome-settings-daemon package in Ubuntu:
  Fix Released
Status in gnome-settings-daemon source package in Xenial:
  Confirmed

Bug description:
  Since updating to 16.04 from 14.04, I have the syslog spammed by:

  SEC: May 22 13:26:43 samsung-romano pkexec[21248]: gdm: Error executing 
command as another user: Not authorized [USER=root] [TTY=unknown] 
[CWD=/var/lib/gdm3] 
[COMMAND=/usr/lib/gnome-settings-daemon/gsd-backlight-helper --set-brightness 
1766]
  SEC: May 22 13:26:43 samsung-romano pkexec: pam_unix(polkit-1:session): 
session opened for user root by (uid=1153)
  SEC: May 22 13:26:43 samsung-romano pkexec: pam_systemd(polkit-1:session): 
Cannot create session: Already running in a session
  SEC: May 22 13:26:43 samsung-romano pkexec: 
pam_ck_connector(polkit-1:session): cannot determine display-device
  SEC: May 22 13:26:43 samsung-romano pkexec[21249]: romano: Executing command 
[USER=root] [TTY=unknown] [CWD=/home/romano] 
[COMMAND=/usr/lib/gnome-settings-daemon/gsd-backlight-helper --set-brightness 
4322]
  SYS: May 22 13:26:43 samsung-romano gnome-session[1454]: Error executing 
command as another user: Not authorized
  SYS: May 22 13:26:43 samsung-romano gnome-session[1454]: This incident has 
been reported.

  every couple of seconds or so. There is no really need to change
  brightness... how can I stop it?

To manage notifications about this bug go to:
https://bugs.launchpad.net/gnome-settings-daemon/+bug/1584457/+subscriptions

-- 
Mailing list: https://launchpad.net/~desktop-packages
Post to : desktop-packages@lists.launchpad.net
Unsubscribe : https://launchpad.net/~desktop-packages
More help   : https://help.launchpad.net/ListHelp


[issue31537] Bug in readline module documentation example

2017-10-08 Thread Bradley Smith

Bradley Smith <bra...@gmail.com> added the comment:

I ran into the same bug in the documentation recently. I've opened a pull 
request here that fixes it:

https://github.com/python/cpython/pull/3925

As I was trying to figure out why the example was broken, I wrote up a little 
more context to explain the current behavior and the fix:

https://docs.python.org/3.7/library/readline.html#example
https://docs.python.org/3.6/library/readline.html#example

In the "Example" section, the second example that "supports concurrent 
interactive sessions, by only appending the new history" will actually *never* 
write any lines to a custom history file. This is conveniently masked by the 
fact that the file path used in the example code also happens to be the default 
path that readline automatically writes history to, but if you specify *any* 
other file path, you will see that the new file is created but never has any 
content written to it.

The problem in this example is the use of `get_history_length` to get 
"previous" and "current" history lengths for determining how many lines to 
append to the file. Both calls to `get_history_length` always return `-1` here. 
Thus, when `append_history_file` is called, it always receives a first argument 
of `0` (because `-1 - -1 == 0`), resulting in zero lines written to the file.

Instead of `get_history_length`, the example code *should* call 
`get_current_history_length`. Swapping that function call makes the example 
behave as expected, appending new lines to the file.

--
nosy: +infinitewarp

___
Python tracker <rep...@bugs.python.org>
<https://bugs.python.org/issue31537>
___
___
Python-bugs-list mailing list
Unsubscribe: 
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com



[issue31537] Bug in readline module documentation example

2017-10-08 Thread Bradley Smith

Change by Bradley Smith <bra...@gmail.com>:


--
keywords: +patch
pull_requests: +3898
stage:  -> patch review

___
Python tracker <rep...@bugs.python.org>
<https://bugs.python.org/issue31537>
___
___
Python-bugs-list mailing list
Unsubscribe: 
https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com



Re: [PATCH] D20089: Adding a TargetParser for AArch64

2016-05-10 Thread Bradley Smith via cfe-commits
bsmith added a comment.

In http://reviews.llvm.org/D20089#425541, @rengolin wrote:

> http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20150824/296862.html
>
> One option is to add the duplication, solve all the platform problems first, 
> then move to a class based design.
>
> Another is to go directly to a class based design, but that would mean a much 
> bigger move.
>
> I'm ok with either, but I'd prefer a slow and steady change stream.


I think that made sense when we only had ARM using this, but not so much now 
since we essentially have two implementations of the same thing.


Repository:
  rL LLVM

http://reviews.llvm.org/D20089



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20088: Using AArch64TargetParser in clang

2016-05-10 Thread Bradley Smith via cfe-commits
bsmith added inline comments.


Comment at: lib/Driver/Tools.cpp:707
@@ -696,3 +706,3 @@
   std::string MArch = arm::getARMArch(ArchName, Triple);
-  if (llvm::ARM::parseArch(MArch) == llvm::ARM::AK_INVALID ||
+  if (!checkARMArchValid(MArch) || llvm::ARM::parseArch(MArch) == 
llvm::ARM::AK_INVALID ||
   (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features)))

Why do we need the call to checkARMArchValid here? Isn't is sufficient that 
parseArch returns a valid architecture?


Comment at: lib/Driver/Tools.cpp:2280
@@ -2276,12 +2279,3 @@
 
-  if (Split.first == "armv8-a" || Split.first == "armv8a") {
-// ok, no additional features.
-  } else if (Split.first == "armv8.1-a" || Split.first == "armv8.1a") {
-Features.push_back("+v8.1a");
-  } else if (Split.first == "armv8.2-a" || Split.first == "armv8.2a" ) {
-Features.push_back("+v8.2a");
-  } else {
-return false;
-  }
-
-  if (Split.second.size() && !DecodeAArch64Features(D, Split.second, Features))
+  if (!checkAArch64ArchValid(Split.first) || 
llvm::AArch64::parseArch(Split.first, Features) == llvm::ARM::AK_INVALID ||
+ (Split.second.size() && !DecodeAArch64Features(D, Split.second, 
Features)))

Same here, why do we need checkAArch64ArchValid?


Repository:
  rL LLVM

http://reviews.llvm.org/D20088



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20089: Adding a TargetParser for AArch64

2016-05-10 Thread Bradley Smith via cfe-commits
bsmith added a comment.

There is an awful lot of duplication/passing through to another class in this, 
it strikes me that this whole thing could benefit from some level of 
inheritance. I think it would be good to have a base class that defines the 
interface and have both ARM/AArch64 (and any other architectures that want to 
use this in the future) implement this interface. That way all of this code can 
be called generically from clang/wherever.



Comment at: lib/Support/TargetParser.cpp:441
@@ +440,3 @@
+  if (Extensions & AArch64::AEK_PROFILE)
+Features.push_back("+spe");
+

For ARM there is a table that defines these extensions and how they map to 
backend features, it would be good to do this in a similar manner.


Comment at: lib/Support/TargetParser.cpp:471
@@ +470,3 @@
+  if (ArchKind >= ARM::AK_LAST)
+return ARMBuildAttrs::CPUArch::Pre_v4;
+  return AArch64ARCHNames[ArchKind].ArchAttr;

This doesn't make sense for AArch64


Comment at: lib/Support/TargetParser.cpp:770
@@ +769,3 @@
+  if (A.ID == ARM::AK_ARMV8_2A)
+Features.push_back("+v8.2a");
+  return A.ID;

Why do we need to add these features explicitly, can't we just pass through the 
correct triple?


Repository:
  rL LLVM

http://reviews.llvm.org/D20089



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[Callers] Challenging Contras

2016-04-08 Thread Bradley Smith via Callers
Hello all! I'll be calling a session of challenging contras at a dance
weekend next month, and find myself lacking in the aforementioned
challenging contras. I want to keep the session balanced, with a mix of
dances that are mind-bending or heavily technical as well as some dances
that are a step or so below that but that still have something interesting
or unique. Would any of you be so kind as to share some of your favorite
challenging dances? Thank you in advance!


r262619 - [ARM] Add Clang targeting for ARMv8-M Baseline/Mainline

2016-03-03 Thread Bradley Smith via cfe-commits
Author: brasmi01
Date: Thu Mar  3 07:52:22 2016
New Revision: 262619

URL: http://llvm.org/viewvc/llvm-project?rev=262619=rev
Log:
[ARM] Add Clang targeting for ARMv8-M Baseline/Mainline

Modified:
cfe/trunk/lib/Basic/Targets.cpp
cfe/trunk/lib/Driver/Tools.cpp
cfe/trunk/test/Driver/arm-alignment.c
cfe/trunk/test/Driver/arm-cortex-cpus.c
cfe/trunk/test/Driver/arm-features.c
cfe/trunk/test/Preprocessor/arm-target-features.c

Modified: cfe/trunk/lib/Basic/Targets.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=262619=262618=262619=diff
==
--- cfe/trunk/lib/Basic/Targets.cpp (original)
+++ cfe/trunk/lib/Basic/Targets.cpp Thu Mar  3 07:52:22 2016
@@ -4536,7 +4536,8 @@ class ARMTargetInfo : public TargetInfo
   }
 
   bool supportsThumb2() const {
-return CPUAttr.equals("6T2") || ArchVersion >= 7;
+return CPUAttr.equals("6T2") ||
+   (ArchVersion >= 7 && !CPUAttr.equals("8M_BASE"));
   }
 
   StringRef getCPUAttr() const {
@@ -4563,6 +4564,10 @@ class ARMTargetInfo : public TargetInfo
   return "8_1A";
 case llvm::ARM::AK_ARMV8_2A:
   return "8_2A";
+case llvm::ARM::AK_ARMV8MBaseline:
+  return "8M_BASE";
+case llvm::ARM::AK_ARMV8MMainline:
+  return "8M_MAIN";
 }
   }
 
@@ -4852,13 +4857,14 @@ public:
 
 // __ARM_ARCH_ISA_ARM is defined to 1 if the core supports the ARM ISA.  It
 // is not defined for the M-profile.
-// NOTE that the deffault profile is assumed to be 'A'
-if (CPUProfile.empty() || CPUProfile != "M")
+// NOTE that the default profile is assumed to be 'A'
+if (CPUProfile.empty() || ArchProfile != llvm::ARM::PK_M)
   Builder.defineMacro("__ARM_ARCH_ISA_ARM", "1");
 
-// __ARM_ARCH_ISA_THUMB is defined to 1 if the core supporst the original
-// Thumb ISA (including v6-M).  It is set to 2 if the core supports the
-// Thumb-2 ISA as found in the v6T2 architecture and all v7 architecture.
+// __ARM_ARCH_ISA_THUMB is defined to 1 if the core supports the original
+// Thumb ISA (including v6-M and v8-M Baseline).  It is set to 2 if the
+// core supports the Thumb-2 ISA as found in the v6T2 architecture and all
+// v7 and v8 architectures excluding v8-M Baseline.
 if (supportsThumb2())
   Builder.defineMacro("__ARM_ARCH_ISA_THUMB", "2");
 else if (supportsThumb())
@@ -4978,7 +4984,7 @@ public:
 Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM",
 Opts.ShortEnums ? "1" : "4");
 
-if (ArchVersion >= 6 && CPUAttr != "6M") {
+if (ArchVersion >= 6 && CPUAttr != "6M" && CPUAttr != "8M_BASE") {
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");

Modified: cfe/trunk/lib/Driver/Tools.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Tools.cpp?rev=262619=262618=262619=diff
==
--- cfe/trunk/lib/Driver/Tools.cpp (original)
+++ cfe/trunk/lib/Driver/Tools.cpp Thu Mar  3 07:52:22 2016
@@ -963,6 +963,10 @@ static void getARMTargetFeatures(const T
   // No v6M core supports unaligned memory access (v6M ARM ARM A3.2).
   if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m)
 D.Diag(diag::err_target_unsupported_unaligned) << "v6m";
+  // v8M Baseline follows on from v6M, so doesn't support unaligned memory
+  // access either.
+  else if (Triple.getSubArch() == 
llvm::Triple::SubArchType::ARMSubArch_v8m_baseline)
+D.Diag(diag::err_target_unsupported_unaligned) << "v8m.base";
 } else
   Features.push_back("+strict-align");
   } else {

Modified: cfe/trunk/test/Driver/arm-alignment.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/arm-alignment.c?rev=262619=262618=262619=diff
==
--- cfe/trunk/test/Driver/arm-alignment.c (original)
+++ cfe/trunk/test/Driver/arm-alignment.c Thu Mar  3 07:52:22 2016
@@ -83,11 +83,13 @@
 // CHECK-ALIGNED-ARM: "-target-feature" "+strict-align"
 // CHECK-ALIGNED-AARCH64: "-target-feature" "+strict-align"
 
-// Make sure that v6M cores always trigger the unsupported aligned accesses 
error
-// for all supported architecture triples.
+// Make sure that v6M cores and v8M Baseline always trigger the unsupported
+// aligned accesses error for all supported architecture triples.
 // RUN: not %clang -c -target thumbv6m-none-gnueabi -mcpu=cortex-m0 
-munaligned-access %s 2>&1 | \
 // RUN:   FileCheck --check-prefix CHECK-UNALIGN-NOT-SUPPORTED %s
 // RUN: not %clang -c -target thumb-none-gnueabi -mcpu=cortex-m0 
-munaligned-access %s 2>&1 | \
 // RUN:   FileCheck --check-prefix CHECK-UNALIGN-NOT-SUPPORTED %s
+// RUN: not %clang -c -target 

Re: [PATCH] D15283: [ARMv8-M] Add Clang targeting for ARMv8-M Baseline/Mainline

2016-03-03 Thread Bradley Smith via cfe-commits
bsmith closed this revision.
bsmith added a comment.

Committed as r262619.


Repository:
  rL LLVM

http://reviews.llvm.org/D15283



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D15283: [ARMv8-M] Add Clang targeting for ARMv8-M Baseline/Mainline

2016-03-01 Thread Bradley Smith via cfe-commits
bsmith added a comment.

Ping. This change has been waiting for review for over a month now, it would be 
great to get this committed. Thanks.


Repository:
  rL LLVM

http://reviews.llvm.org/D15283



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D15283: [ARMv8-M] Add Clang targeting for ARMv8-M Baseline/Mainline

2016-02-16 Thread Bradley Smith via cfe-commits
bsmith added a comment.

Ping, it would be nice to get this committed to complete ARMv8-M support.


Repository:
  rL LLVM

http://reviews.llvm.org/D15283



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D15283: [ARMv8-M] Add Clang targeting for ARMv8-M Baseline/Mainline

2016-02-02 Thread Bradley Smith via cfe-commits
bsmith added a comment.

Ping.


Repository:
  rL LLVM

http://reviews.llvm.org/D15283



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D15283: [ARMv8-M] Add Clang targeting for ARMv8-M Baseline/Mainline

2016-01-26 Thread Bradley Smith via cfe-commits
bsmith added a comment.

Now that the LLVM side of this is committed, it would be great to get this 
reviewed also, thanks.


Repository:
  rL LLVM

http://reviews.llvm.org/D15283



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D15283: [ARMv8-M] Add Clang targeting for ARMv8-M Baseline/Mainline

2016-01-04 Thread Bradley Smith via cfe-commits
bsmith updated this revision to Diff 43884.
bsmith added a comment.

Rebase patch against latest changes made to ARMv8-M targeting, specifically the 
removal of 'B' as a profile.


Repository:
  rL LLVM

http://reviews.llvm.org/D15283

Files:
  lib/Basic/Targets.cpp
  lib/Driver/Tools.cpp
  test/Driver/arm-alignment.c
  test/Driver/arm-cortex-cpus.c
  test/Driver/arm-features.c
  test/Preprocessor/arm-target-features.c

Index: test/Preprocessor/arm-target-features.c
===
--- test/Preprocessor/arm-target-features.c
+++ test/Preprocessor/arm-target-features.c
@@ -95,6 +95,42 @@
 // THUMBV8A-EABI:#define __ARM_ARCH_EXT_IDIV__ 1
 // THUMBV8A-EABI: #define __ARM_FP 0xE
 
+// RUN: %clang -target armv8m.base-none-linux-gnu -x c -E -dM %s -o - | FileCheck --check-prefix=V8M_BASELINE %s
+// V8M_BASELINE: __ARM_ARCH 8
+// V8M_BASELINE: __ARM_ARCH_8M_BASE__ 1
+// V8M_BASELINE: __ARM_ARCH_EXT_IDIV__ 1
+// V8M_BASELINE-NOT: __ARM_ARCH_ISA_ARM
+// V8M_BASELINE: __ARM_ARCH_ISA_THUMB 1
+// V8M_BASELINE: __ARM_ARCH_PROFILE 'M'
+// V8M_BASELINE-NOT: __ARM_FEATURE_CRC32
+// V8M_BASELINE-NOT: __ARM_FEATURE_DSP
+// V8M_BASELINE-NOT: __ARM_FP 0x{{.*}}
+// V8M_BASELINE-NOT: __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
+
+// RUN: %clang -target armv8m.main-none-linux-gnu -x c -E -dM %s -o - | FileCheck --check-prefix=V8M_MAINLINE %s
+// V8M_MAINLINE: __ARM_ARCH 8
+// V8M_MAINLINE: __ARM_ARCH_8M_MAIN__ 1
+// V8M_MAINLINE: __ARM_ARCH_EXT_IDIV__ 1
+// V8M_MAINLINE-NOT: __ARM_ARCH_ISA_ARM
+// V8M_MAINLINE: __ARM_ARCH_ISA_THUMB 2
+// V8M_MAINLINE: __ARM_ARCH_PROFILE 'M'
+// V8M_MAINLINE-NOT: __ARM_FEATURE_CRC32
+// V8M_MAINLINE-NOT: __ARM_FEATURE_DSP
+// V8M_MAINLINE: __ARM_FP 0xE
+// V8M_MAINLINE: __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
+
+// RUN: %clang -target arm-none-linux-gnu -march=armv8-m.main+dsp -x c -E -dM %s -o - | FileCheck --check-prefix=V8M_MAINLINE_DSP %s
+// V8M_MAINLINE_DSP: __ARM_ARCH 8
+// V8M_MAINLINE_DSP: __ARM_ARCH_8M_MAIN__ 1
+// V8M_MAINLINE_DSP: __ARM_ARCH_EXT_IDIV__ 1
+// V8M_MAINLINE_DSP-NOT: __ARM_ARCH_ISA_ARM
+// V8M_MAINLINE_DSP: __ARM_ARCH_ISA_THUMB 2
+// V8M_MAINLINE_DSP: __ARM_ARCH_PROFILE 'M'
+// V8M_MAINLINE_DSP-NOT: __ARM_FEATURE_CRC32
+// V8M_MAINLINE_DSP: __ARM_FEATURE_DSP 1
+// V8M_MAINLINE_DSP: __ARM_FP 0xE
+// V8M_MAINLINE_DSP: __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
+
 // RUN: %clang -target arm-none-linux-gnu -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-DEFS %s
 // CHECK-DEFS:#define __ARM_PCS 1
 // CHECK-DEFS:#define __ARM_SIZEOF_MINIMAL_ENUM 4
Index: test/Driver/arm-features.c
===
--- test/Driver/arm-features.c
+++ test/Driver/arm-features.c
@@ -4,10 +4,16 @@
 // RUN: %clang -target arm-none-none-eabi -mcpu=generic+crypto -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CRYPTO %s
 // RUN: %clang -target arm-none-none-eabi -mcpu=generic -march=armv8a+crypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CRYPTO %s
 // CHECK-CRYPTO: "-cc1"{{.*}} "-triple" "armv8-{{.*}} "-target-cpu" "generic"{{.*}} "-target-feature" "+crypto"
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic+dsp -march=armv8m.main -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-DSP %s
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic -march=armv8m.main+dsp -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-DSP %s
+// CHECK-DSP: "-cc1"{{.*}} "-triple" "thumbv8m.main-{{.*}} "-target-cpu" "generic"{{.*}} "-target-feature" "+dsp"
 
 // RUN: %clang -target arm-none-none-eabi -mcpu=generic+nocrc -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NOCRC %s
 // RUN: %clang -target arm-none-none-eabi -mcpu=generic -march=armv8a+nocrc -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NOCRC %s
 // CHECK-NOCRC: "-cc1"{{.*}} "-triple" "armv8-{{.*}} "-target-cpu" "generic"{{.*}} "-target-feature" "-crc"
 // RUN: %clang -target arm-none-none-eabi -mcpu=generic+nocrypto -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NOCRYPTO %s
 // RUN: %clang -target arm-none-none-eabi -mcpu=generic -march=armv8a+nocrypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NOCRYPTO %s
 // CHECK-NOCRYPTO: "-cc1"{{.*}} "-triple" "armv8-{{.*}} "-target-cpu" "generic"{{.*}} "-target-feature" "-crypto"
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic+nodsp -march=armv8m.main -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NODSP %s
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic -march=armv8m.main+nodsp -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NODSP %s
+// CHECK-NODSP: "-cc1"{{.*}} "-triple" "thumbv8m.main-{{.*}} "-target-cpu" "generic"{{.*}} "-target-feature" "-dsp"
Index: test/Driver/arm-cortex-cpus.c
===
--- test/Driver/arm-cortex-cpus.c
+++ test/Driver/arm-cortex-cpus.c
@@ -230,6 +230,24 @@
 // RUN: %clang -target arm -march=armebv8.1-a -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck 

[PATCH] D15283: [ARMv8-M] Add Clang targeting for ARMv8-M Baseline/Mainline

2015-12-07 Thread Bradley Smith via cfe-commits
bsmith created this revision.
bsmith added a reviewer: t.p.northover.
bsmith added a subscriber: cfe-commits.
bsmith set the repository for this revision to rL LLVM.
Herald added subscribers: rengolin, aemerson.

This patch forms part of the ARMv8-M Baseline/Mainline support, adding Clang 
targeting for ARMv8-M Baseline/Mainline.

ARMv8-M Mainline is a superset of ARMv7-M, containing all ARMv7-M instructions 
plus ARMv8-A semaphores and atomics and the ARMv8-M security extensions.

ARMv8-M Baseline is a superset of ARMv6-M, containing all ARMv6-M instructions 
plus ARMv8-A semaphores and atomics, ARMv7-M exclusives, various code 
optimization instructions (wide branches, CBZ, hardware divide), MOVW/MOVT and 
the ARMv8-M security extensions.

Repository:
  rL LLVM

http://reviews.llvm.org/D15283

Files:
  lib/Basic/Targets.cpp
  lib/Driver/ToolChain.cpp
  lib/Driver/Tools.cpp
  test/Driver/arm-alignment.c
  test/Driver/arm-cortex-cpus.c
  test/Driver/arm-features.c
  test/Preprocessor/arm-target-features.c

Index: test/Preprocessor/arm-target-features.c
===
--- test/Preprocessor/arm-target-features.c
+++ test/Preprocessor/arm-target-features.c
@@ -95,6 +95,43 @@
 // THUMBV8A-EABI:#define __ARM_ARCH_EXT_IDIV__ 1
 // THUMBV8A-EABI: #define __ARM_FP 0xE
 
+// RUN: %clang -target armv8m.base-none-linux-gnu -x c -E -dM %s -o - | FileCheck --check-prefix=V8M_BASELINE %s
+// V8M_BASELINE: __ARM_ARCH 8
+// V8M_BASELINE: __ARM_ARCH_8M__ 1
+// V8M_BASELINE: __ARM_ARCH_EXT_IDIV__ 1
+// V8M_BASELINE-NOT: __ARM_ARCH_ISA_ARM
+// FIXME: ABI-85 might change __ARM_ARCH_ISA_THUMB
+// V8M_BASELINE: __ARM_ARCH_ISA_THUMB 1
+// V8M_BASELINE: __ARM_ARCH_PROFILE 'B'
+// V8M_BASELINE-NOT: __ARM_FEATURE_CRC32
+// V8M_BASELINE-NOT: __ARM_FEATURE_DSP
+// V8M_BASELINE-NOT: __ARM_FP 0x{{.*}}
+// V8M_BASELINE-NOT: __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
+
+// RUN: %clang -target armv8m.main-none-linux-gnu -x c -E -dM %s -o - | FileCheck --check-prefix=V8M_MAINLINE %s
+// V8M_MAINLINE: __ARM_ARCH 8
+// V8M_MAINLINE: __ARM_ARCH_8M__ 1
+// V8M_MAINLINE: __ARM_ARCH_EXT_IDIV__ 1
+// V8M_MAINLINE-NOT: __ARM_ARCH_ISA_ARM
+// V8M_MAINLINE: __ARM_ARCH_ISA_THUMB 2
+// V8M_MAINLINE: __ARM_ARCH_PROFILE 'M'
+// V8M_MAINLINE-NOT: __ARM_FEATURE_CRC32
+// V8M_MAINLINE-NOT: __ARM_FEATURE_DSP
+// V8M_MAINLINE: __ARM_FP 0xE
+// V8M_MAINLINE: __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
+
+// RUN: %clang -target arm-none-linux-gnu -march=armv8-m.main+dsp -x c -E -dM %s -o - | FileCheck --check-prefix=V8M_MAINLINE_DSP %s
+// V8M_MAINLINE_DSP: __ARM_ARCH 8
+// V8M_MAINLINE_DSP: __ARM_ARCH_8M__ 1
+// V8M_MAINLINE_DSP: __ARM_ARCH_EXT_IDIV__ 1
+// V8M_MAINLINE_DSP-NOT: __ARM_ARCH_ISA_ARM
+// V8M_MAINLINE_DSP: __ARM_ARCH_ISA_THUMB 2
+// V8M_MAINLINE_DSP: __ARM_ARCH_PROFILE 'M'
+// V8M_MAINLINE_DSP-NOT: __ARM_FEATURE_CRC32
+// V8M_MAINLINE_DSP: __ARM_FEATURE_DSP 1
+// V8M_MAINLINE_DSP: __ARM_FP 0xE
+// V8M_MAINLINE_DSP: __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
+
 // RUN: %clang -target arm-none-linux-gnu -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-DEFS %s
 // CHECK-DEFS:#define __ARM_PCS 1
 // CHECK-DEFS:#define __ARM_SIZEOF_MINIMAL_ENUM 4
Index: test/Driver/arm-features.c
===
--- test/Driver/arm-features.c
+++ test/Driver/arm-features.c
@@ -4,6 +4,9 @@
 // RUN: %clang -target arm-none-none-eabi -mcpu=generic+crypto -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CRYPTO %s
 // RUN: %clang -target arm-none-none-eabi -mcpu=generic -march=armv8a+crypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CRYPTO %s
 // CHECK-CRYPTO: "-cc1"{{.*}} "-triple" "armv8-{{.*}} "-target-cpu" "generic"{{.*}} "-target-feature" "+crypto"
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic+dsp -march=armv8m.main -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-DSP %s
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic -march=armv8m.main+dsp -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-DSP %s
+// CHECK-DSP: "-cc1"{{.*}} "-triple" "thumbv8m.main-{{.*}} "-target-cpu" "generic"{{.*}} "-target-feature" "+dsp"
 
 // RUN: %clang -target arm-none-none-eabi -mcpu=generic+nocrc -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NOCRC %s
 // RUN: %clang -target arm-none-none-eabi -mcpu=generic -march=armv8a+nocrc -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NOCRC %s
@@ -11,3 +14,6 @@
 // RUN: %clang -target arm-none-none-eabi -mcpu=generic+nocrypto -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NOCRYPTO %s
 // RUN: %clang -target arm-none-none-eabi -mcpu=generic -march=armv8a+nocrypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NOCRYPTO %s
 // CHECK-NOCRYPTO: "-cc1"{{.*}} "-triple" "armv8-{{.*}} "-target-cpu" "generic"{{.*}} "-target-feature" "-crypto"
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic+nodsp -march=armv8m.main -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NODSP %s
+// RUN: %clang -target 

Re: [PATCH] D15040: [ARM] Add command-line options for ARMv8.2-A

2015-11-27 Thread Bradley Smith via cfe-commits
bsmith added a subscriber: bsmith.


Comment at: lib/Driver/Tools.cpp:868-876
@@ -867,4 +867,11 @@
 
-  if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8_1a) {
+  switch (Triple.getSubArch()) {
+  case llvm::Triple::SubArchType::ARMSubArch_v8_1a:
 Features.insert(Features.begin(), "+v8.1a");
+break;
+  case llvm::Triple::SubArchType::ARMSubArch_v8_2a:
+Features.insert(Features.begin(), "+v8.2a");
+break;
+  default:
+break;
   }

Now that -mcpu=generic works correctly and isn't hardcoded to ARMv8.1-A, I 
don't believe we need this hardcoded logic to add these features.


Repository:
  rL LLVM

http://reviews.llvm.org/D15040



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r253471 - [ARM] Support +feature targeting in -mcpu/-march

2015-11-18 Thread Bradley Smith via cfe-commits
Author: brasmi01
Date: Wed Nov 18 10:33:48 2015
New Revision: 253471

URL: http://llvm.org/viewvc/llvm-project?rev=253471=rev
Log:
[ARM] Support +feature targeting in -mcpu/-march

Added:
cfe/trunk/test/Driver/arm-features.c   (with props)
Modified:
cfe/trunk/lib/Driver/Tools.cpp

Modified: cfe/trunk/lib/Driver/Tools.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Tools.cpp?rev=253471=253470=253471=diff
==
--- cfe/trunk/lib/Driver/Tools.cpp (original)
+++ cfe/trunk/lib/Driver/Tools.cpp Wed Nov 18 10:33:48 2015
@@ -589,23 +589,47 @@ static void getARMFPUFeatures(const Driv
 D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
+// Decode ARM features from string like +[no]featureA+[no]featureB+...
+static bool DecodeARMFeatures(const Driver , StringRef text,
+  std::vector ) {
+  SmallVector Split;
+  text.split(Split, StringRef("+"), -1, false);
+
+  for (StringRef Feature : Split) {
+const char *FeatureName = llvm::ARM::getArchExtFeature(Feature);
+if (FeatureName)
+  Features.push_back(FeatureName);
+else
+  return false;
+  }
+  return true;
+}
+
 // Check if -march is valid by checking if it can be canonicalised and parsed.
 // getARMArch is used here instead of just checking the -march value in order
 // to handle -march=native correctly.
 static void checkARMArchName(const Driver , const Arg *A, const ArgList 
,
  llvm::StringRef ArchName,
+ std::vector ,
  const llvm::Triple ) {
+  std::pair Split = ArchName.split("+");
+
   std::string MArch = arm::getARMArch(ArchName, Triple);
-  if (llvm::ARM::parseArch(MArch) == llvm::ARM::AK_INVALID)
+  if (llvm::ARM::parseArch(MArch) == llvm::ARM::AK_INVALID ||
+  (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features)))
 D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
 // Check -mcpu=. Needs ArchName to handle -mcpu=generic.
 static void checkARMCPUName(const Driver , const Arg *A, const ArgList ,
 llvm::StringRef CPUName, llvm::StringRef ArchName,
+std::vector ,
 const llvm::Triple ) {
+  std::pair Split = CPUName.split("+");
+
   std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple);
-  if (arm::getLLVMArchSuffixForARM(CPU, ArchName, Triple).empty())
+  if (arm::getLLVMArchSuffixForARM(CPU, ArchName, Triple).empty() ||
+  (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features)))
 D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
@@ -771,12 +795,12 @@ static void getARMTargetFeatures(const T
   D.Diag(clang::diag::warn_drv_unused_argument)
   << ArchArg->getAsString(Args);
 ArchName = StringRef(WaArch->getValue()).substr(7);
-checkARMArchName(D, WaArch, Args, ArchName, Triple);
+checkARMArchName(D, WaArch, Args, ArchName, Features, Triple);
 // FIXME: Set Arch.
 D.Diag(clang::diag::warn_drv_unused_argument) << WaArch->getAsString(Args);
   } else if (ArchArg) {
 ArchName = ArchArg->getValue();
-checkARMArchName(D, ArchArg, Args, ArchName, Triple);
+checkARMArchName(D, ArchArg, Args, ArchName, Features, Triple);
   }
 
   // Check -mcpu. ClangAs gives preference to -Wa,-mcpu=.
@@ -787,10 +811,10 @@ static void getARMTargetFeatures(const T
   D.Diag(clang::diag::warn_drv_unused_argument)
   << CPUArg->getAsString(Args);
 CPUName = StringRef(WaCPU->getValue()).substr(6);
-checkARMCPUName(D, WaCPU, Args, CPUName, ArchName, Triple);
+checkARMCPUName(D, WaCPU, Args, CPUName, ArchName, Features, Triple);
   } else if (CPUArg) {
 CPUName = CPUArg->getValue();
-checkARMCPUName(D, CPUArg, Args, CPUName, ArchName, Triple);
+checkARMCPUName(D, CPUArg, Args, CPUName, ArchName, Features, Triple);
   }
 
   // Add CPU features for generic CPUs
@@ -6274,7 +6298,7 @@ const std::string arm::getARMArch(String
 MArch = Arch;
   else
 MArch = Triple.getArchName();
-  MArch = StringRef(MArch).lower();
+  MArch = StringRef(MArch).split("+").first.lower();
 
   // Handle -march=native.
   if (MArch == "native") {
@@ -6313,7 +6337,7 @@ std::string arm::getARMTargetCPU(StringR
   // FIXME: Warn on inconsistent use of -mcpu and -march.
   // If we have -mcpu=, use that.
   if (!CPU.empty()) {
-std::string MCPU = StringRef(CPU).lower();
+std::string MCPU = StringRef(CPU).split("+").first.lower();
 // Handle -mcpu=native.
 if (MCPU == "native")
   return llvm::sys::getHostCPUName();

Added: cfe/trunk/test/Driver/arm-features.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/arm-features.c?rev=253471=auto

  1   2   3   4   5   6   7   8   9   >