aarch64: implement new TCG target for aarch64

Laurent Desnogues Mon, 27 May 2013 03:28:51 -0700

On Mon, May 27, 2013 at 12:13 PM, Claudio Fontana
<claudio.font...@huawei.com> wrote:
> Hello,
>
> On 27.05.2013 11:47, Laurent Desnogues wrote:
>> Hi,
>>
>> basically pointing out what I pointed for v1.
>>
>> On Thu, May 23, 2013 at 10:18 AM, Claudio Fontana
>> <claudio.font...@huawei.com> wrote:
>>>
>>> add preliminary support for TCG target aarch64.
>>>
>>> Signed-off-by: Claudio Fontana <claudio.font...@huawei.com>
>>> ---
>>>  include/exec/exec-all.h  |    5 +-
>>>  tcg/aarch64/tcg-target.c | 1185 
>>> ++++++++++++++++++++++++++++++++++++++++++++++
>>>  tcg/aarch64/tcg-target.h |   99 ++++
>>>  translate-all.c          |    2 +
>>>  4 files changed, 1290 insertions(+), 1 deletion(-)
>>>  create mode 100644 tcg/aarch64/tcg-target.c
>>>  create mode 100644 tcg/aarch64/tcg-target.h
>>>
>> [...]
>>> +++ b/tcg/aarch64/tcg-target.c
>> [...]
>>> +static void tcg_target_qemu_prologue(TCGContext *s)
>>> +{
>>> +    /* NB: frame sizes are in 16 byte stack units! */
>>> +    int frame_size_callee_saved, frame_size_tcg_locals;
>>> +    int r;
>>> +
>>> +    /* save pairs             (FP, LR) and (X19, X20) .. (X27, X28) */
>>> +    frame_size_callee_saved = (1) + (TCG_REG_X28 - TCG_REG_X19) / 2 + 1;
>>
>> Please add a comment about this computation.
>
> Frame sizes are calculated in 16 byte units, which is the first comment.
> What this computation does is commented on the line above the computation.
> Each unit represents a pair, (FP, LR), (X19, X20) .. (X27, X28).
> The calculation just counts the number of units to save, there seems to be 
> little else to comment about as far as I can see.


You really should read my original message, where I explained
why your computation is odd (pun intended) ;-)

>>> +
>>> +    /* frame size requirement for TCG local variables */
>>> +    frame_size_tcg_locals = TCG_STATIC_CALL_ARGS_SIZE
>>> +        + CPU_TEMP_BUF_NLONGS * sizeof(long)
>>> +        + (TCG_TARGET_STACK_ALIGN - 1);
>>> +    frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1);
>>> +    frame_size_tcg_locals /= TCG_TARGET_STACK_ALIGN;
>>> +
>>> +    /* push (FP, LR) and update sp */
>>> +    tcg_out_push_pair(s, TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
>>> +
>>> +    /* FP -> callee_saved */
>>> +    tcg_out_movr_sp(s, 1, TCG_REG_FP, TCG_REG_SP);
>>> +
>>> +    /* store callee-preserved regs x19..x28 using FP -> callee_saved */
>>> +    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
>>
>> TCG_REG_X27 -> TCG_REG_X28.
>
> That would be a mistake: we are using X19 as the first element of couple 
> (X19,X20),
> so the last element is identified by X27 as the first element of couple 
> (X27,X28).
> Note the r += 2, and the fact that we are storing r and r+1, as you can see...

Sorry but using TCG_REG_X28 wouldn't be a mistake, try it by
hand.

Now imagine the PCS would have said X19-X29 should be saved.
If you had used X28 as end of loop test, the code would have
been wrong.

My comments aren't pointing at real bugs, but just try to improve
readability. You can safely ignore them as the PCS isn't likely to
change :)


Laurent


>>> +        int idx; idx = (r - TCG_REG_X19) / 2 + 1;
>>> +        tcg_out_store_pair(s, r, r + 1, idx);
>
> ... here.
>
>>> +    }
>>> +
>>> +    /* make stack space for TCG locals */
>>> +    tcg_out_subi(s, 1, TCG_REG_SP, TCG_REG_SP,
>>> +                 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
>>> +    /* inform TCG about how to find TCG locals with register, offset, size 
>>> */
>>> +    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
>>> +                  CPU_TEMP_BUF_NLONGS * sizeof(long));
>>> +
>>> +    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
>>> +    tcg_out_gotor(s, tcg_target_call_iarg_regs[1]);
>>> +
>>> +    tb_ret_addr = s->code_ptr;
>>> +
>>> +    /* remove TCG locals stack space */
>>> +    tcg_out_addi(s, 1, TCG_REG_SP, TCG_REG_SP,
>>> +                 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
>>> +
>>> +    /* restore registers x19..x28.
>>> +       FP must be preserved, so it still points to callee_saved area */
>>> +    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
>>
>> TCG_REG_X27 -> TCG_REG_X28.
>> Thanks,
>>
>> Laurent
>>
>>> +        int idx; idx = (r - TCG_REG_X19) / 2 + 1;
>>> +        tcg_out_load_pair(s, r, r + 1, idx);
>>> +    }
>>> +
>>> +    /* pop (FP, LR), restore SP to previous frame, return */
>>> +    tcg_out_pop_pair(s, TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
>>> +    tcg_out_ret(s);
>>> +}
>>> diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
>>> new file mode 100644
>>> index 0000000..075ab2a
>>> --- /dev/null
>>> +++ b/tcg/aarch64/tcg-target.h
>>> @@ -0,0 +1,99 @@
>>> +/*
>>> + * Initial TCG Implementation for aarch64
>>> + *
>>> + * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
>>> + * Written by Claudio Fontana
>>> + *
>>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>>> + * (at your option) any later version.
>>> + *
>>> + * See the COPYING file in the top-level directory for details.
>>> + */
>>> +
>>> +#ifndef TCG_TARGET_AARCH64
>>> +#define TCG_TARGET_AARCH64 1
>>> +
>>> +#undef TCG_TARGET_WORDS_BIGENDIAN
>>> +#undef TCG_TARGET_STACK_GROWSUP
>>> +
>>> +typedef enum {
>>> +    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, TCG_REG_X4,
>>> +    TCG_REG_X5, TCG_REG_X6, TCG_REG_X7, TCG_REG_X8, TCG_REG_X9,
>>> +    TCG_REG_X10, TCG_REG_X11, TCG_REG_X12, TCG_REG_X13, TCG_REG_X14,
>>> +    TCG_REG_X15, TCG_REG_X16, TCG_REG_X17, TCG_REG_X18, TCG_REG_X19,
>>> +    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, TCG_REG_X24,
>>> +    TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, TCG_REG_X28,
>>> +    TCG_REG_FP,  /* frame pointer */
>>> +    TCG_REG_LR, /* link register */
>>> +    TCG_REG_SP,  /* stack pointer or zero register */
>>> +    TCG_REG_XZR = TCG_REG_SP /* same register number */
>>> +    /* program counter is not directly accessible! */
>>> +} TCGReg;
>>> +
>>> +#define TCG_TARGET_NB_REGS 32
>>> +
>>> +/* used for function call generation */
>>> +#define TCG_REG_CALL_STACK              TCG_REG_SP
>>> +#define TCG_TARGET_STACK_ALIGN          16
>>> +#define TCG_TARGET_CALL_ALIGN_ARGS      1
>>> +#define TCG_TARGET_CALL_STACK_OFFSET    0
>>> +
>>> +/* optional instructions */
>>> +#define TCG_TARGET_HAS_div_i32          0
>>> +#define TCG_TARGET_HAS_ext8s_i32        0
>>> +#define TCG_TARGET_HAS_ext16s_i32       0
>>> +#define TCG_TARGET_HAS_ext8u_i32        0
>>> +#define TCG_TARGET_HAS_ext16u_i32       0
>>> +#define TCG_TARGET_HAS_bswap16_i32      0
>>> +#define TCG_TARGET_HAS_bswap32_i32      0
>>> +#define TCG_TARGET_HAS_not_i32          0
>>> +#define TCG_TARGET_HAS_neg_i32          0
>>> +#define TCG_TARGET_HAS_rot_i32          1
>>> +#define TCG_TARGET_HAS_andc_i32         0
>>> +#define TCG_TARGET_HAS_orc_i32          0
>>> +#define TCG_TARGET_HAS_eqv_i32          0
>>> +#define TCG_TARGET_HAS_nand_i32         0
>>> +#define TCG_TARGET_HAS_nor_i32          0
>>> +#define TCG_TARGET_HAS_deposit_i32      0
>>> +#define TCG_TARGET_HAS_movcond_i32      0
>>> +#define TCG_TARGET_HAS_add2_i32         0
>>> +#define TCG_TARGET_HAS_sub2_i32         0
>>> +#define TCG_TARGET_HAS_mulu2_i32        0
>>> +#define TCG_TARGET_HAS_muls2_i32        0
>>> +
>>> +#define TCG_TARGET_HAS_div_i64          0
>>> +#define TCG_TARGET_HAS_ext8s_i64        0
>>> +#define TCG_TARGET_HAS_ext16s_i64       0
>>> +#define TCG_TARGET_HAS_ext32s_i64       0
>>> +#define TCG_TARGET_HAS_ext8u_i64        0
>>> +#define TCG_TARGET_HAS_ext16u_i64       0
>>> +#define TCG_TARGET_HAS_ext32u_i64       0
>>> +#define TCG_TARGET_HAS_bswap16_i64      0
>>> +#define TCG_TARGET_HAS_bswap32_i64      0
>>> +#define TCG_TARGET_HAS_bswap64_i64      0
>>> +#define TCG_TARGET_HAS_not_i64          0
>>> +#define TCG_TARGET_HAS_neg_i64          0
>>> +#define TCG_TARGET_HAS_rot_i64          1
>>> +#define TCG_TARGET_HAS_andc_i64         0
>>> +#define TCG_TARGET_HAS_orc_i64          0
>>> +#define TCG_TARGET_HAS_eqv_i64          0
>>> +#define TCG_TARGET_HAS_nand_i64         0
>>> +#define TCG_TARGET_HAS_nor_i64          0
>>> +#define TCG_TARGET_HAS_deposit_i64      0
>>> +#define TCG_TARGET_HAS_movcond_i64      0
>>> +#define TCG_TARGET_HAS_add2_i64         0
>>> +#define TCG_TARGET_HAS_sub2_i64         0
>>> +#define TCG_TARGET_HAS_mulu2_i64        0
>>> +#define TCG_TARGET_HAS_muls2_i64        0
>>> +
>>> +enum {
>>> +    TCG_AREG0 = TCG_REG_X19,
>>> +};
>>> +
>>> +static inline void flush_icache_range(tcg_target_ulong start,
>>> +                                      tcg_target_ulong stop)
>>> +{
>>> +    __builtin___clear_cache((char *)start, (char *)stop);
>>> +}
>>> +
>>> +#endif /* TCG_TARGET_AARCH64 */
>>> diff --git a/translate-all.c b/translate-all.c
>>> index da93608..9d265bf 100644
>>> --- a/translate-all.c
>>> +++ b/translate-all.c
>>> @@ -461,6 +461,8 @@ static inline PageDesc *page_find(tb_page_addr_t index)
>>>  # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
>>>  #elif defined(__sparc__)
>>>  # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
>>> +#elif defined(__aarch64__)
>>> +# define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
>>>  #elif defined(__arm__)
>>>  # define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
>>>  #elif defined(__s390x__)
>>> --
>>> 1.8.1
>
>

Re: [Qemu-devel] [PATCH 2/4] tcg/aarch64: implement new TCG target for aarch64

Reply via email to