[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-17 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG1f057e365f1f: [X86] AMD Zen 4 Initial enablement (authored 
by GGanesh).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Frontend/x86-target-cpu.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86PfmCounters.td
  llvm/test/CodeGen/X86/cpus-amd.ll
  llvm/test/CodeGen/X86/rdpru.ll
  llvm/test/CodeGen/X86/slow-unaligned-mem.ll
  llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
  llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
  llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
  llvm/test/MC/X86/x86_long_nop.s

Index: llvm/test/MC/X86/x86_long_nop.s
===
--- llvm/test/MC/X86/x86_long_nop.s
+++ llvm/test/MC/X86/x86_long_nop.s
@@ -17,6 +17,8 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver2 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver3 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver3 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver4 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver4 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
Index: llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
===
--- llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
+++ llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -15,6 +15,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s
 
 ; Verify that for the X86_64 processors that are known to have poor latency
 ; double precision shift instructions we do not generate 'shld' or 'shrd'
Index: llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
===
--- llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
+++ llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
@@ -7,6 +7,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver1 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver2 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver3 | FileCheck %s --check-prefixes=FAST
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skx | FileCheck %s --check-prefixes=FAST
 
Index: llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
===
--- llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
+++ llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
@@ -5,6 +5,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1  | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3  | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4  | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64  | FileCheck %s --check-prefixes=X86-64
 
 define float @f32_no_daz(float %f) #0 {
Index: llvm/test/CodeGen/X86/slow-unaligned-mem.ll
===

[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-15 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.

cheers


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-15 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh updated this revision to Diff 483141.
GGanesh added a comment.

Fixed the tests

tools/llvm-mca/X86/cpus.s
tools/llvm-mca/X86/read-after-ld-1.s
tools/llvm-mca/X86/register-file-statistics.s
tools/llvm-mca/X86/scheduler-queue-usage.s


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Frontend/x86-target-cpu.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86PfmCounters.td
  llvm/test/CodeGen/X86/cpus-amd.ll
  llvm/test/CodeGen/X86/rdpru.ll
  llvm/test/CodeGen/X86/slow-unaligned-mem.ll
  llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
  llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
  llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
  llvm/test/MC/X86/x86_long_nop.s

Index: llvm/test/MC/X86/x86_long_nop.s
===
--- llvm/test/MC/X86/x86_long_nop.s
+++ llvm/test/MC/X86/x86_long_nop.s
@@ -17,6 +17,8 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver2 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver3 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver3 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver4 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver4 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
Index: llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
===
--- llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
+++ llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -15,6 +15,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s
 
 ; Verify that for the X86_64 processors that are known to have poor latency
 ; double precision shift instructions we do not generate 'shld' or 'shrd'
Index: llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
===
--- llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
+++ llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
@@ -7,6 +7,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver1 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver2 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver3 | FileCheck %s --check-prefixes=FAST
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skx | FileCheck %s --check-prefixes=FAST
 
Index: llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
===
--- llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
+++ llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
@@ -5,6 +5,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1  | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3  | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4  | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64  | FileCheck %s --check-prefixes=X86-64
 
 define float @f32_no_daz(float %f) #0 {
Index: llvm/test/CodeGen/X86/slow-unaligned-mem.ll

[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-15 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh added a comment.

In D139073#3997418 , @RKSimon wrote:

> Thanks @GGanesh  - LGTM with one minor - the orphan ZNVER4 checks from the 
> llvm-mca tests need removing

Thank you! I left them intentionally as this is a stop-gap patch. Agreed! Will 
remove them.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-15 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon accepted this revision.
RKSimon added a comment.
This revision is now accepted and ready to land.

Thanks @GGanesh  - LGTM with one minor - the orphan ZNVER4 checks from the 
llvm-mca tests need removing


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-15 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh updated this revision to Diff 483092.
GGanesh added a comment.

Update to use 'no' processor model.
Fixed the below tests
tools/llvm-mca/X86/cpus.s
tools/llvm-mca/X86/read-after-ld-1.s
tools/llvm-mca/X86/register-file-statistics.s
tools/llvm-mca/X86/scheduler-queue-usage.s

Addressed comments from @RKSimon


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Frontend/x86-target-cpu.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86PfmCounters.td
  llvm/test/CodeGen/X86/cpus-amd.ll
  llvm/test/CodeGen/X86/rdpru.ll
  llvm/test/CodeGen/X86/slow-unaligned-mem.ll
  llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
  llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
  llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
  llvm/test/MC/X86/x86_long_nop.s
  llvm/test/tools/llvm-mca/X86/cpus.s
  llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
  llvm/test/tools/llvm-mca/X86/register-file-statistics.s
  llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s

Index: llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
===
--- llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
+++ llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
@@ -112,6 +112,12 @@
 # ZNVER3-NEXT: [3] Maximum number of used buffer entries.
 # ZNVER3-NEXT: [4] Total number of buffer entries.
 
+# ZNVER4:  Scheduler's queue usage:
+# ZNVER4-NEXT: [1] Resource name.
+# ZNVER4-NEXT: [2] Average number of used buffer entries.
+# ZNVER4-NEXT: [3] Maximum number of used buffer entries.
+# ZNVER4-NEXT: [4] Total number of buffer entries.
+
 # BARCELONA:[1][2][3][4]
 # BARCELONA-NEXT:  SBPortAny0  1  54
 
@@ -165,3 +171,9 @@
 # ZNVER3-NEXT: Zn3Int   0  1  96
 # ZNVER3-NEXT: Zn3Load  0  0  72
 # ZNVER3-NEXT: Zn3Store 0  0  64
+
+# ZNVER4:   [1][2][3][4]
+# ZNVER4-NEXT: Zn3FP0  0  64
+# ZNVER4-NEXT: Zn3Int   0  1  96
+# ZNVER4-NEXT: Zn3Load  0  0  72
+# ZNVER4-NEXT: Zn3Store 0  0  64
Index: llvm/test/tools/llvm-mca/X86/register-file-statistics.s
===
--- llvm/test/tools/llvm-mca/X86/register-file-statistics.s
+++ llvm/test/tools/llvm-mca/X86/register-file-statistics.s
@@ -54,6 +54,11 @@
 # ZNVER3-NEXT:   Total number of mappings created: 0
 # ZNVER3-NEXT:   Max number of mappings used:  0
 
+# ZNVER4: *  Register File #1 -- Zn3FpPRF:
+# ZNVER4-NEXT:   Number of physical registers: 160
+# ZNVER4-NEXT:   Total number of mappings created: 0
+# ZNVER4-NEXT:   Max number of mappings used:  0
+
 # BDVER2: *  Register File #2 -- PdIntegerPRF:
 # BDVER2-NEXT:   Number of physical registers: 96
 # BDVER2-NEXT:   Total number of mappings created: 2
@@ -78,3 +83,8 @@
 # ZNVER3-NEXT:   Number of physical registers: 192
 # ZNVER3-NEXT:   Total number of mappings created: 2
 # ZNVER3-NEXT:   Max number of mappings used:  2
+
+# ZNVER4: *  Register File #2 -- Zn3IntegerPRF:
+# ZNVER4-NEXT:   Number of physical registers: 192
+# ZNVER4-NEXT:   Total number of mappings created: 2
+# ZNVER4-NEXT:   Max number of mappings used:  2
Index: llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
===
--- llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
+++ llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
@@ -47,6 +47,9 @@
 # ZNVER3-NEXT:Total Cycles:  17
 # ZNVER3-NEXT:Total uOps:2
 
+# ZNVER4-NEXT:Total Cycles:  17
+# ZNVER4-NEXT:Total uOps:2
+
 # BARCELONA:  Dispatch Width:4
 # BARCELONA-NEXT: uOps Per Cycle:0.15
 # BARCELONA-NEXT: IPC:   0.10
@@ -97,6 +100,11 @@
 # ZNVER3-NEXT:IPC:   0.12
 # ZNVER3-NEXT:Block RThroughput: 3.0
 
+# ZNVER4: Dispatch Width:6
+# ZNVER4-NEXT:uOps Per Cycle:0.12
+# ZNVER4-NEXT:IPC:   0.12
+# ZNVER4-NEXT:Block RThroughput: 3.0
+
 # ALL:Timeline view:
 
 # BARCELONA-NEXT: 0123456789
@@ -129,6 +137,9 @@
 # ZNVER3-NEXT:0123456
 # ZNVER3-NEXT:Index 0123456789
 
+# ZNVER4-NEXT:01

[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-14 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh added a comment.

In D139073#3995741 , @RKSimon wrote:

> @GGanesh reverse-ping

@RKSimon Thanks a lot! We were trying to get the libpfm patch posted and 
subsequently enabling the zen4 scheduler model. Probably a week or two from 
there. The intel model isn't approved for obvious reasons. Yes I will post a 
stopgap patch!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-14 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

@GGanesh reverse-ping




Comment at: llvm/lib/Target/X86/X86.td:1619
 ProcessorFeatures.ZN3Tuning>;
+def : ProcModel<"znver4", Znver3Model, ProcessorFeatures.ZN4Features,
+ProcessorFeatures.ZN4Tuning>;

RKSimon wrote:
> This might sound strange - but its probably better to use either the IceLake 
> or SkylakeServer model initially - as they have AVX512 instruction coverage, 
> the znver3 model will assert in llvm-mca etc when it encounters an 
> unsupported instruction (any of the Z sched classes).
If the use of an Intel model isn't acceptable, using the Proc<> macro (no 
model) would be the most straightforward - but if you wish to use the znver3 
model as a stopgap then I don't want to stop you, just bear in mind that it 
will cause breaks, particularly where the models are used for codegen analysis 
etc.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-01 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

Thank you @gganesh I thought you'd forgotten about us :)

Please add znver4 test coverage to:

llvm/test/CodeGen/X86/cpus-amd.ll
llvm/test/CodeGen/X86/slow-unaligned-mem.ll




Comment at: llvm/lib/Target/X86/X86.td:1619
 ProcessorFeatures.ZN3Tuning>;
+def : ProcModel<"znver4", Znver3Model, ProcessorFeatures.ZN4Features,
+ProcessorFeatures.ZN4Tuning>;

This might sound strange - but its probably better to use either the IceLake or 
SkylakeServer model initially - as they have AVX512 instruction coverage, the 
znver3 model will assert in llvm-mca etc when it encounters an unsupported 
instruction (any of the Z sched classes).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-11-30 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh created this revision.
GGanesh added reviewers: RKSimon, craig.topper, andreadb.
Herald added subscribers: Enna1, StephenFan, pengfei, gbedwell, javed.absar, 
hiraditya.
Herald added a project: All.
GGanesh requested review of this revision.
Herald added projects: clang, Sanitizers, LLVM.
Herald added subscribers: llvm-commits, Sanitizers, cfe-commits.

This patch has the initial skeleton that enables AMD znver4!

AMD znver4 belongs to Family 19h with model numbers as below
Models 0x10 to 0x1f 
Models 0x60 to 0x74
Models 0x78 to 0x7b
Models 0xA0 to 0xAf

The patch

1. Includes ISAs that already have target descriptions are added.
2. Uses znver3 scheduler model as of now. (We have update this later)
3. Updates few tests as per the initial enablement.
4. ISAs that are added are

avx512f, 
avx512dq, 
avx512ifma, 
avx512cd, 
avx512bw, 
avx512vl, 
avx512_bf16, 
avx512vbmi, 
avx512vbmi2,
avx512vl,
avx512_vnni, 
avx512_bitalg, 
avx512_vpopcntdq/vl


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D139073

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Frontend/x86-target-cpu.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86PfmCounters.td
  llvm/test/CodeGen/X86/rdpru.ll
  llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
  llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
  llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
  llvm/test/MC/X86/x86_long_nop.s
  llvm/test/tools/llvm-mca/X86/cpus.s
  llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
  llvm/test/tools/llvm-mca/X86/register-file-statistics.s
  llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s

Index: llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
===
--- llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
+++ llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
@@ -5,6 +5,7 @@
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER1 %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER2 %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver3 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER3 %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER4 %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,SNB %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,IVB %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,HSW %s
@@ -112,6 +113,12 @@
 # ZNVER3-NEXT: [3] Maximum number of used buffer entries.
 # ZNVER3-NEXT: [4] Total number of buffer entries.
 
+# ZNVER4:  Scheduler's queue usage:
+# ZNVER4-NEXT: [1] Resource name.
+# ZNVER4-NEXT: [2] Average number of used buffer entries.
+# ZNVER4-NEXT: [3] Maximum number of used buffer entries.
+# ZNVER4-NEXT: [4] Total number of buffer entries.
+
 # BARCELONA:[1][2][3][4]
 # BARCELONA-NEXT:  SBPortAny0  1  54
 
@@ -165,3 +172,9 @@
 # ZNVER3-NEXT: Zn3Int   0  1  96
 # ZNVER3-NEXT: Zn3Load  0  0  72
 # ZNVER3-NEXT: Zn3Store 0  0  64
+
+# ZNVER4:   [1][2][3][4]
+# ZNVER4-NEXT: Zn3FP0  0  64
+# ZNVER4-NEXT: Zn3Int   0  1  96
+# ZNVER4-NEXT: Zn3Load  0  0  72
+# ZNVER4-NEXT: Zn3Store 0  0  64
Index: llvm/test/tools/llvm-mca/X86/register-file-statistics.s
===
--- llvm/test/tools/llvm-mca/X86/register-file-statistics.s
+++ llvm/test/tools/llvm-mca/X86/register-file-statistics.s
@@ -6,6 +6,7 @@
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL,ZNVER1 %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=