Re: [PATCH v2 7/7] tcg/ppc: Use prefixed instructions for tcg_out_goto_tb

2023-08-09 Thread Nicholas Piggin
On Tue Aug 8, 2023 at 1:02 PM AEST, Richard Henderson wrote:
> When a direct branch is out of range, we can load the destination for
> the indirect branch using PLA (for 16GB worth of buffer) and PLD from
> the TranslationBlock for everything larger.
>
> This means the patch affects exactly one instruction: B (plus filler),
> PLA or PLD.  Which means we can update and execute the patch atomically.
>

Aside from changelog that Jordan pointed out,

Reviewed-by: Nicholas Piggin 

> Signed-off-by: Richard Henderson 
> ---
>  tcg/ppc/tcg-target.c.inc | 31 +++
>  1 file changed, 19 insertions(+), 12 deletions(-)
>
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 63fe4ef995..b686a68247 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -2646,31 +2646,38 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
>  uintptr_t ptr = get_jmp_target_addr(s, which);
>  
>  if (USE_REG_TB) {
> +/*
> + * With REG_TB, we must always use indirect branching,
> + * so that the branch destination and TCG_REG_TB match.
> + */
>  ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
>  tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
> -
> -/* TODO: Use direct branches when possible. */
> -set_jmp_insn_offset(s, which);
>  tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
> -
>  tcg_out32(s, BCCTR | BO_ALWAYS);
>  
>  /* For the unlinked case, need to reset TCG_REG_TB.  */
>  set_jmp_reset_offset(s, which);
>  tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
>   -tcg_current_code_size(s));
> -} else {
> -/* Direct branch will be patched by tb_target_set_jmp_target. */
> -set_jmp_insn_offset(s, which);
> -tcg_out32(s, NOP);
> +return;
> +}
>  
> -/* When branch is out of range, fall through to indirect. */
> +/* Direct branch will be patched by tb_target_set_jmp_target. */
> +set_jmp_insn_offset(s, which);
> +tcg_out32(s, NOP);
> +
> +/* When branch is out of range, fall through to indirect. */
> +if (have_isa_3_10) {
> +ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
> +tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
> +} else {
>  tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
>  tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 
> (int16_t)ptr);
> -tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
> -tcg_out32(s, BCCTR | BO_ALWAYS);
> -set_jmp_reset_offset(s, which);
>  }
> +
> +tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
> +tcg_out32(s, BCCTR | BO_ALWAYS);
> +set_jmp_reset_offset(s, which);
>  }
>  
>  void tb_target_set_jmp_target(const TranslationBlock *tb, int n,




Re: [PATCH v2 7/7] tcg/ppc: Use prefixed instructions for tcg_out_goto_tb

2023-08-08 Thread Richard Henderson

On 8/8/23 19:56, Jordan Niethe wrote:

On Tue, Aug 8, 2023 at 1:02 PM Richard Henderson
 wrote:


When a direct branch is out of range, we can load the destination for
the indirect branch using PLA (for 16GB worth of buffer) and PLD from
the TranslationBlock for everything larger.

This means the patch affects exactly one instruction: B (plus filler),
PLA or PLD.  Which means we can update and execute the patch atomically.


I think the commit message needs to be updated for Nick's changes.


Whoops, yes.


r~



Re: [PATCH v2 7/7] tcg/ppc: Use prefixed instructions for tcg_out_goto_tb

2023-08-08 Thread Jordan Niethe
On Tue, Aug 8, 2023 at 1:02 PM Richard Henderson
 wrote:
>
> When a direct branch is out of range, we can load the destination for
> the indirect branch using PLA (for 16GB worth of buffer) and PLD from
> the TranslationBlock for everything larger.
>
> This means the patch affects exactly one instruction: B (plus filler),
> PLA or PLD.  Which means we can update and execute the patch atomically.

I think the commit message needs to be updated for Nick's changes.

>
> Signed-off-by: Richard Henderson 
> ---
>  tcg/ppc/tcg-target.c.inc | 31 +++
>  1 file changed, 19 insertions(+), 12 deletions(-)
>
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 63fe4ef995..b686a68247 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -2646,31 +2646,38 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
>  uintptr_t ptr = get_jmp_target_addr(s, which);
>
>  if (USE_REG_TB) {
> +/*
> + * With REG_TB, we must always use indirect branching,
> + * so that the branch destination and TCG_REG_TB match.
> + */
>  ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
>  tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
> -
> -/* TODO: Use direct branches when possible. */
> -set_jmp_insn_offset(s, which);
>  tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
> -
>  tcg_out32(s, BCCTR | BO_ALWAYS);
>
>  /* For the unlinked case, need to reset TCG_REG_TB.  */
>  set_jmp_reset_offset(s, which);
>  tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
>   -tcg_current_code_size(s));
> -} else {
> -/* Direct branch will be patched by tb_target_set_jmp_target. */
> -set_jmp_insn_offset(s, which);
> -tcg_out32(s, NOP);
> +return;
> +}
>
> -/* When branch is out of range, fall through to indirect. */
> +/* Direct branch will be patched by tb_target_set_jmp_target. */
> +set_jmp_insn_offset(s, which);
> +tcg_out32(s, NOP);
> +
> +/* When branch is out of range, fall through to indirect. */
> +if (have_isa_3_10) {
> +ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
> +tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
> +} else {
>  tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
>  tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, 
> (int16_t)ptr);
> -tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
> -tcg_out32(s, BCCTR | BO_ALWAYS);
> -set_jmp_reset_offset(s, which);
>  }
> +
> +tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
> +tcg_out32(s, BCCTR | BO_ALWAYS);
> +set_jmp_reset_offset(s, which);
>  }
>
>  void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
> --
> 2.34.1
>

Thank you for implementing this Richard.

I was able to boot mttcg guests on P9 and P10 hosts.

Tested-by: Jordan Niethe 
Reviewed-by: Jordan Niethe 



[PATCH v2 7/7] tcg/ppc: Use prefixed instructions for tcg_out_goto_tb

2023-08-07 Thread Richard Henderson
When a direct branch is out of range, we can load the destination for
the indirect branch using PLA (for 16GB worth of buffer) and PLD from
the TranslationBlock for everything larger.

This means the patch affects exactly one instruction: B (plus filler),
PLA or PLD.  Which means we can update and execute the patch atomically.

Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target.c.inc | 31 +++
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 63fe4ef995..b686a68247 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -2646,31 +2646,38 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
 uintptr_t ptr = get_jmp_target_addr(s, which);
 
 if (USE_REG_TB) {
+/*
+ * With REG_TB, we must always use indirect branching,
+ * so that the branch destination and TCG_REG_TB match.
+ */
 ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
 tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
-
-/* TODO: Use direct branches when possible. */
-set_jmp_insn_offset(s, which);
 tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
-
 tcg_out32(s, BCCTR | BO_ALWAYS);
 
 /* For the unlinked case, need to reset TCG_REG_TB.  */
 set_jmp_reset_offset(s, which);
 tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
  -tcg_current_code_size(s));
-} else {
-/* Direct branch will be patched by tb_target_set_jmp_target. */
-set_jmp_insn_offset(s, which);
-tcg_out32(s, NOP);
+return;
+}
 
-/* When branch is out of range, fall through to indirect. */
+/* Direct branch will be patched by tb_target_set_jmp_target. */
+set_jmp_insn_offset(s, which);
+tcg_out32(s, NOP);
+
+/* When branch is out of range, fall through to indirect. */
+if (have_isa_3_10) {
+ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
+tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
+} else {
 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
 tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
-tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
-tcg_out32(s, BCCTR | BO_ALWAYS);
-set_jmp_reset_offset(s, which);
 }
+
+tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
+tcg_out32(s, BCCTR | BO_ALWAYS);
+set_jmp_reset_offset(s, which);
 }
 
 void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
-- 
2.34.1