In addition to Segher's comments:

jema...@gnu.org (Jose E. Marchesi) writes:
> [...]
> +/* This file contains the definition of the kernel helpers that are
> +   available to eBPF programs.
> +
> +   The primary source for information on kernel helpers is the
> +   linux/include/uapi/linux/bpf.h file in the Linux source tree.
> +   Please keep this database in sync.
> +
> +   The first column is the first kernel version featuring the helper
> +   function.  This should be an enumerate from bpf_kernel_version,
> +   defined in bpf-opts.h.  Note that the backend assumes that helpers
> +   never get deprecated in the kernel.  If that eventually happens,
> +   then we will need to use a bitmask here instead of an enumerate.
> +
> +   The second column is the constant-name for the helper.
> +   The third column is the program-name of the helper.
> +
> +   The fourth column is a list of names describing the types of the
> +   values returned and accepted by the helper, in one of these forms:
> +
> +     TYPES (type1, type2, ..., 0)
> +     VTYPES (type1, type2, ..., 0)
> +
> +   VTYPES should be used should the helper accept a variable number of
> +   arguments, TYPES otherwise.  The valid type names are:
> +
> +     `vt' for void.
> +     `it' for signed int.
> +     `ut' for unsigned int.
> +     `pt' for *void.
> +     `cpt' for const *void.

"*" after "void" in both cases.

> +     `st' for short int.
> +     `ust' for unsigned short int.
> +     `cst' for const char *.

Very minor, but it might be less confusing to pick something other than "s"
for "cst" given the above.

> [...]
> +/* Functions to emit BPF_LD_ABS and BPF_LD_IND instructions.  We
> +   provide the "standard" names as synonyms of the corresponding GCC
> +   builtins.  Note how the SKB argument is ignored.  */
> +
> +static inline long long
> +load_byte (void *skb, unsigned long long off)
> +{
> +  return __builtin_bpf_load_byte (off);
> +}
> [etc]

It might be worth adding __attribute__((unused)) to them, in case
anyone compiles with -Wsystem-headers.

> [...]
> +/* Supported versions of the Linux kernel.  */
> +enum bpf_kernel_version
> +{
> + /* Linux 4.x */
> + LINUX_V4_0,
> [etc.]

The contents should be indented by two spaces.

> [...]
> +enum bpf_builtins
> +{
> + BPF_BUILTIN_UNUSED = 0,
> + /* Built-ins for kernel helpers.  */
> +#define DEF_HELPER(V,D,N,T) BPF_BUILTIN_HELPER_##D,
> +#  include "bpf-helpers.def"
> +#undef DEF_HELPER
> + BPF_BUILTIN_HELPER_MAX,
> + /* Built-ins for non-generic loads and stores.  */
> + BPF_BUILTIN_LOAD_BYTE = BPF_BUILTIN_HELPER_MAX,
> + BPF_BUILTIN_LOAD_HALF,
> + BPF_BUILTIN_LOAD_WORD,
> + BPF_BUILTIN_MAX,
> +};
> +
> +/* This table is indexed by an enum bpf_builtin.  */
> +static const char *bpf_helper_names[] =
> +{
> + NULL,
> +#define DEF_HELPER(V,D,N,T) #N,
> +#  include "bpf-helpers.def"
> +#undef DEF_HELPER
> + NULL,
> + NULL,
> + NULL,
> + NULL
> +};

Same for these two.

> [...]
> +#define INCLUDE_STRING

You didn't seem to rely on this (i.e. std::string).

> [...]
> +/* Override options and do some other initialization.  */
> +
> +static void
> +bpf_option_override (void)
> +{
> +  /* Set the default target kernel if no -mkernel was specified.  */
> +  if (!global_options_set.x_bpf_kernel)
> +    bpf_kernel = LINUX_LATEST;

LINUX_LATEST is the default in the .opt file, so when is this needed?

> [...]
> +  /* Define BPF_KERNEL_VERSION_CODE */
> +  {
> +    const char *version_code;
> +    char *kernel_version_code;
> +
> +    switch (bpf_kernel)
> +      {
> +      case LINUX_V4_0: version_code = "0x40000"; break;
> +      case LINUX_V4_1: version_code = "0x40100"; break;
> +      case LINUX_V4_2: version_code = "0x40200"; break;
> +      case LINUX_V4_3: version_code = "0x40300"; break;
> +      case LINUX_V4_4: version_code = "0x40400"; break;
> +      case LINUX_V4_5: version_code = "0x40500"; break;
> +      case LINUX_V4_6: version_code = "0x40600"; break;
> +      case LINUX_V4_7: version_code = "0x40700"; break;
> +      case LINUX_V4_8: version_code = "0x40800"; break;
> +      case LINUX_V4_9: version_code = "0x40900"; break;
> +      case LINUX_V4_10: version_code = "0x40a00"; break;
> +      case LINUX_V4_11: version_code = "0x40b00"; break;
> +      case LINUX_V4_12: version_code = "0x40c00"; break;
> +      case LINUX_V4_13: version_code = "0x40d00"; break;
> +      case LINUX_V4_14: version_code = "0x40e00"; break;
> +      case LINUX_V4_15: version_code = "0x40f00"; break;
> +      case LINUX_V4_16: version_code = "0x41000"; break;
> +      case LINUX_V4_17: version_code = "0x42000"; break;
> +      case LINUX_V4_18: version_code = "0x43000"; break;
> +      case LINUX_V4_19: version_code = "0x44000"; break;
> +      case LINUX_V4_20: version_code = "0x45000"; break;
> +      case LINUX_V5_0: version_code = "0x50000"; break;
> +      case LINUX_V5_1: version_code = "0x50100"; break;
> +      case LINUX_V5_2: version_code = "0x50200"; break;
> +      default:
> +     gcc_unreachable ();      
> +      }
> +
> +#define KERNEL_VERSION_CODE "__BPF_KERNEL_VERSION_CODE__="    
> +    kernel_version_code
> +      = (char *) alloca (strlen (KERNEL_VERSION_CODE) + 7 + 1);
> +    strcpy (kernel_version_code, KERNEL_VERSION_CODE);
> +#undef KERNEL_VERSION_CODE
> +    strcat (kernel_version_code, version_code);
> +    builtin_define (kernel_version_code);

FWIW, a slightly easier way of writing this is:

    kernel_version_code = ACONCAT (("__BPF_KERNEL_VERSION_CODE__=",
                                    version_code, NULL));

> [...]
> +/* Determine whether the port is prepared to handle insns involving
> +   scalar mode MODE.  For a scalar mode to be considered supported,
> +   all the basic arithmetic and comparisons must work.  */
> +
> +static bool
> +bpf_scalar_mode_supported_p (scalar_mode mode)
> +{
> +  switch (mode)
> +    {
> +    case E_QImode:
> +    case E_HImode:
> +    case E_SImode:
> +    case E_DImode:
> +    case E_TImode:
> +      return true;
> +
> +    default:
> +      return false;
> +    }
> +
> +  return false;
> +}

Are you overriding this just to exclude floating-point modes?
If so, what specifically doesn't work?

Would be worth a comment.

> [...]
> +/* Return true if REGNO is th enumber of a hard register in which the

typo: "th enumber".

> [...]
> +/* Compute the size of the function's stack frame, including the local
> +   area and the register-save area.  */
> +
> +static void
> +bpf_compute_frame (void)
> +{
> +  int stack_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
> +  int padding_locals, regno;
> +
> +  /* Set the space used in the stack by local variables.  This is
> +     rounded up to respect the minimum stack alignment.  */
> +  cfun->machine->local_vars_size = get_frame_size ();
> +
> +  padding_locals = cfun->machine->local_vars_size % stack_alignment;
> +  if (padding_locals)
> +    padding_locals = stack_alignment - padding_locals;
> +
> +  cfun->machine->local_vars_size += padding_locals;
> +
> +  /* Set the space used in the stack by callee-saved used registers in
> +     the current function.  There is no need to round up, since the
> +     registers are all 8 bytes wide.  */
> +  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
> +    if ((!fixed_regs[regno]
> +      && df_regs_ever_live_p (regno)
> +      && !call_used_regs[regno])
> +     || (cfun->calls_alloca
> +         && regno == STACK_POINTER_REGNUM))
> +      cfun->machine->callee_saved_reg_size += 8;
> +
> +  /* Check that the total size of the frame doesn't exceed the limit
> +     imposed by eBPF: currently 512 bytes.  */
> +  if ((cfun->machine->local_vars_size
> +       + cfun->machine->callee_saved_reg_size) > 512)
> +    {
> +      static int stack_limit_exceeded = 0;
> +
> +      if (!stack_limit_exceeded)
> +     error ("eBPF stack limit of 512 bytes exceeded");
> +      stack_limit_exceeded = 1;
> +    }
> +}

I think this does what TARGET_COMPUTE_FRAME_LAYOUT expects.
It'd be good to define the hook to bpf_compute_frame and avoid calling
it explicitly in the prologue, epilogue and elimination routines.

(The documentation says the hook's optional, but when it's such a
natural fit...)

> [...]
> +  /* Save callee-saved hard registes.  The register-save-area starts
> +     right after the local variables.  */
> +  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
> +    {
> +      if ((!fixed_regs[regno]
> +        && df_regs_ever_live_p (regno)
> +        && !call_used_regs[regno])
> +       || (cfun->calls_alloca
> +           && regno == STACK_POINTER_REGNUM))
> +     {
> +       rtx mem;
> +
> +       if (!IN_RANGE (fp_offset, -1 - 0x7fff, 0x7fff))
> +         /* This has been already reported as an error in
> +            bpf_compute_frame. */
> +         break;
> +       else
> +         {
> +           mem = gen_frame_mem (DImode,
> +                                plus_constant (DImode,
> +                                               gen_rtx_REG (DImode, 
> FRAME_POINTER_REGNUM),

hard_frame_pointer_rtx here and elsewhere.

> [...]
> +/* Return true if a value of mode MODE1 is accessible in mode MODE2
> +   without copying.  */
> +
> +static bool
> +bpf_modes_tieable_p (enum machine_mode mode1,
> +                  enum machine_mode mode2)
> +{
> +  return (mode1 == mode2
> +       || GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2));
> +}

The second part makes the first part redundant.  But why do you
need to restrict it based on classes?  It seems like eBPF is an
example of an architecture where any modes are tieable, so using
the default would be better if possible.

If the restriction is needed, it would be worth having a comment
explaining which case you're excluding and why.

> [...]
> +    case PLUS:
> +      {
> +     /* The valid patterns here are:
> +        
> +        (PLUS ADDR_BASE CONST_INT)
> +        (PLUS CONST_INT ADDR_BASE)

The second one isn't canonical rtl, so you shouldn't (need to) handle it.
Please raise a bug if you find a case where it's being generated. :-)

> [...]
> +/* Split an out-of-range address displacement into hi and lo parts.
> +   The hi part will have to be loaded into a register separately, but
> +   the low part will be folded into the memory operand.  */
> +
> +static bool
> +bpf_legitimize_address_displacement (rtx *off1, rtx *off2,
> +                                  poly_int64 poly_offset, machine_mode)
> +{
> +  HOST_WIDE_INT orig_offset = poly_offset;
> +
> +  /* Our case is very easy: the REG part of an indirect address is
> +     64-bit wide, so it can hold any address.  This always leads to
> +     REG+0 */
> +
> +  *off1 = GEN_INT (orig_offset);
> +  *off2 = GEN_INT (0);
> +  return true;
> +}
> +
> +#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
> +#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT 
> bpf_legitimize_address_displacement

But then do you need to define the hook at all?  I think this is what
LRA does by default.  The hook is only really there to get better spill
code (via common anchor points) on targets with limited offset ranges.

> [...]
> +/* Return true if memory address ADDR in address space AS can have
> +   different meanings depending on the machine mode of the memory
> +   reference it is used for or if the address is valid for some modes
> +   but not others.  */
> +
> +static bool
> +bpf_mode_dependent_address_p (const_rtx addr ATTRIBUTE_UNUSED,
> +                              addr_space_t as ATTRIBUTE_UNUSED)
> +{
> +  return true;
> +}
> +
> +#undef TARGET_MODE_DEPENDENT_ADDRESS_P
> +#define TARGET_MODE_DEPENDENT_ADDRESS_P bpf_mode_dependent_address_p

Why does this need to be true?  False is the better answer if you can
give it. :-)  And it looks like the set of legitimate addresses doesn't
really care about modes.

> [...]
> +/* Return true if X is a legitimate constant for a MODE-mode immediate
> +   operand on the target machine.  */
> +
> +static bool
> +bpf_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
> +                        rtx x ATTRIBUTE_UNUSED)
> +{
> +  return true;
> +}
> +
> +#undef TARGET_LEGITIMATE_CONSTANT_P
> +#define TARGET_LEGITIMATE_CONSTANT_P bpf_legitimate_constant_p

This is the default, no real need to define it.

> [...]
> +/* Return a RTX indicating whether a function argument is passed in a
> +   register and if so, which register.  */
> +
> +static rtx
> +bpf_function_arg (cumulative_args_t ca, enum machine_mode mode 
> ATTRIBUTE_UNUSED,
> +                  const_tree type ATTRIBUTE_UNUSED, bool named 
> ATTRIBUTE_UNUSED)
> +{
> +  CUMULATIVE_ARGS *cum = get_cumulative_args (ca);
> +
> +  if (*cum < 5)
> +    return gen_rtx_REG (mode, *cum + 1);
> +  else
> +    /* An error have been emitted for this in
> +       bpf_function_arg_advance.  */
> +    return NULL_RTX;

This hook is called first, so "will be" rather than "has been".

(BTW, I just submitted a series of patches to change this interface,
but it should be a trivial change for whichever of us gets to make it.)

> [...]
> +/* Update the summarizer variable pointed by CA to advance past an
> +   argument in the argument list.  */
> +
> +static void
> +bpf_function_arg_advance (cumulative_args_t ca, enum machine_mode mode 
> ATTRIBUTE_UNUSED,
> +                          const_tree type ATTRIBUTE_UNUSED, bool named 
> ATTRIBUTE_UNUSED)
> +{
> +  CUMULATIVE_ARGS *cum = get_cumulative_args (ca);
> +
> +  if (*cum > 4)
> +    error ("eBPF doesn't support functions with more than 5 arguments");
> +  (*cum)++;
> +}

You allow TImode (int128_t) support, and arguments might be aggregates,
so shouldn't this be incrementing by the number of words rather than 1?

I guess the error logic also needs to be tweaked to report an error for
(say) a TImode argument passed after 4 DImode arguments, which would
need 6 registers in total.  It would also be good to avoid multiple
errors for the same argument list.

> [...]
> +/* Return true if an argument at the position indicated by CUM should
> +   be passed by reference.  If the hook returns true, a copy of that
> +   argument is made in memory and a pointer to the argument is passed
> +   instead of the argument itself.  */
> +
> +static bool
> +bpf_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
> +                    enum machine_mode mode, const_tree type,
> +                    bool named ATTRIBUTE_UNUSED)
> +{
> +  unsigned HOST_WIDE_INT size;
> +
> +  if (type)
> +    {
> +      if (AGGREGATE_TYPE_P (type))
> +     return true;
> +      size = int_size_in_bytes (type);
> +    }
> +  else
> +    size = GET_MODE_SIZE (mode);
> +
> +  return (size > 8*5);
> +}
> +
> +#undef TARGET_PASS_BY_REFERENCE
> +#define TARGET_PASS_BY_REFERENCE bpf_pass_by_reference

I might have misunderstood, but I thought from an earlier (IRC?)
message, it wasn't possible for the callee to access the caller's
frame, which was why you had the error about running out of argument
registers.  If so, won't passing by reference make the argument
inaccessible in practice?  I don't see what you gain by defining
the hook, since I'd have assumed (at least after the fix above)
that it would be better to pass by value and get an error about
having no argument registers left.

> [...]
> +/* Diagnostics on function contents.  */
> +
> +static void
> +bpf_set_current_function (tree decl)
> +{
> +  if (decl == NULL_TREE
> +      || current_function_decl == NULL_TREE
> +      || current_function_decl == error_mark_node
> +      || !cfun->machine
> +      || cfun->machine->diagnostics_checked_p)
> +    return;
> +
> +  /* Currently we don't do anything meaningful here.  To be
> +     changed.  */
> +
> +  /* Don't print the above diagnostics more than once.  */
> +  cfun->machine->diagnostics_checked_p = 1;
> +}
> +
> +#undef TARGET_SET_CURRENT_FUNCTION
> +#define TARGET_SET_CURRENT_FUNCTION bpf_set_current_function

IMO it'd be better to leave this undefined until it needs to do
something.  (Same for diagnostics_checked_p itself.)

> [...]
> +/* Output the assembly code for a constructor.  Since eBPF doesn't
> +   support indirect calls, constructors are not supported.  */
> +
> +static void
> +bpf_output_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
> +{
> +  fatal_insn ("no support for constructors sorry", symbol);

Would be better to use:

  sorry ("no support for constructors");

Even better, when SYMBOL_REF_DECL is nonnull, use it to provide an
alternative, more helpful message :-)

> [...]
> +/* Output the assembly code for a destructor.  Since eBPF doesn't
> +   support indirect calls, destructors are not supported.  */
> +
> +static void
> +bpf_output_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
> +{
> +  fatal_insn ("no support for destructors sorry", symbol);

Same idea here.

> [...]
> +/* Return the appropriate instruction to CALL to a function.  TARGET
> +   is a `mem' RTX denoting the address of the called function.
> +
> +   The main purposes of this function are:
> +   - To reject indirect CALL instructions, which are not supported by
> +     eBPf.

typo: eBPF

> +   - To recognize calls to kernel helper functions and emit the
> +     corresponding CALL N instruction.
> +
> +   This function is called from the expansion of the 'call' pattern in
> +   bpf.md.  */
> +
> +const char *
> +bpf_output_call (rtx target)
> +{
> +  static char *insn;
> +  rtx op;
> +
> +  op = XEXP (target, 0);
> +  switch (GET_CODE (op))
> +    {
> +    case CONST_INT:
> +      insn = (char *) xmalloc (5 + 6 + 1);
> +      sprintf (insn, "call\t%ld", INTVAL (op));

Would be good to avoid the memory leak.  Two ways of doing that are:

(1) output the instructions via output_asm_insn here and return "".

(2) strip the MEM wrapper from operands[0] before printing.  You can
    then return "call %0" for the above.  For...

> +      break;
> +    case SYMBOL_REF:
> +      {
> +     const char *function_name = XSTR (op, 0);
> +     int code;
> +      
> +     if (strncmp (function_name, "__builtin_bpf_helper_", 21) == 0
> +         && ((code = bpf_helper_code (function_name + 21)) != 0))
> +       {
> +         insn = (char *) xmalloc (5 + 6 + 1);
> +         sprintf (insn, "call\t%d", code);
> +       }
> +     else
> +       {       
> +         insn = (char *) xmalloc (strlen (function_name) + 5 + 1);
> +         sprintf (insn, "call\t%s", function_name);
> +       }
> +     break;

...this you could define a new prefix letter for printing the SYMBOL_REF
operand appropriately ("f" say) and return "call %f0".

But (1) is easiest. :-)

> +      }
> +    default:
> +      error ("indirect call in function, which are not supported by eBPF");
> +      insn = xstrdup ("call 0");

Can just return "call 0" without the xstrdup.

> [...]
> +/* Print an instruction operand.  This function is called in the macro
> +   PRINT_OPERAND defined in bpf.h */
> +
> +void
> +bpf_print_operand (FILE *file, rtx op, int code ATTRIBUTE_UNUSED)
> +{
> +  switch (GET_CODE (op))
> +    {
> +    case REG:
> +      fprintf (file, "%s", reg_names[REGNO (op)]);
> +      break;
> +    case MEM:
> +      output_address (GET_MODE (op), XEXP (op, 0));
> +      break;
> +    case CONST_DOUBLE:
> +      if (CONST_DOUBLE_HIGH (op))
> +     fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
> +              CONST_DOUBLE_HIGH (op), CONST_DOUBLE_LOW (op));
> +      else if (CONST_DOUBLE_LOW (op) < 0)
> +     fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (op));
> +      else
> +     fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (op));
> +      break;
> +    case LABEL_REF:
> +      /* This is for label values.  */
> +      /* Fallthrough. */
> +    default:
> +      output_addr_const (file, op);
> +    }
> +}

The three lines above the "default:" look redundant.

> [...]
> +/* Print an operand which is an address.  This function should handle
> +   any legit address, as accepted by bpf_legitimate_address_p.
> +
> +   This function is called in the PRINT_OPERAND_ADDRESS macro defined
> +   in bpf.h */
> +
> +void
> +bpf_print_operand_address (FILE *file, rtx addr)
> +{
> +  switch (GET_CODE (addr))
> +    {
> +    case REG:
> +      fprintf (file, "[%s+0]", reg_names[REGNO (addr)]);
> +      break;
> +    case PLUS:
> +      {
> +     rtx op0 = XEXP (addr, 0);
> +     rtx op1 = XEXP (addr, 1);
> +
> +     if (GET_CODE (op0) == REG && CONSTANT_ADDRESS_P (op1))
> +       {
> +         fprintf (file, "[%s+", reg_names[REGNO (op0)]);
> +         output_addr_const (file, op1);
> +         fputs ("]", file);
> +       }
> +     else if (GET_CODE (op1) == REG && CONSTANT_ADDRESS_P (op0))
> +       {
> +         fprintf (file, "[%s+", reg_names[REGNO (op1)]);
> +         output_addr_const (file, op0);
> +         fputs ("]", file);
> +       }

As above, you shouldn't (need to) handle the case in which the constant
comes before the register.

> [...]
> +/* Add a BPF builtin function with NAME, CODE and TYPE.  Return
> +   the function decl or NULL_TREE if the builtin was not added.  */
> +
> +static tree
> +def_builtin (const char *name, enum bpf_builtins code, tree type)
> +{
> +  tree t
> +    = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
> +
> +  if (t)
> +    bpf_builtins[code] = t;

I don't think add_builtin_function is allowed to return null,
and assigning null would just reassert the initial value anyway.

> [...]
> +      tree offset_arg = CALL_EXPR_ARG (exp, 0);
> +      struct expand_operand ops[2];
> +
> +      create_input_operand (&ops[0], expand_normal (offset_arg),
> +                         TYPE_MODE (TREE_TYPE (offset_arg)));
> +      create_input_operand (&ops[1], gen_rtx_CONST_INT (SImode, 0),
> +                         SImode);

const0_rtx (or use create_integer_operand).

> [...]
> +/* Always promote arguments and return values in function calls.  */
> +
> +#undef TARGET_PROMOTE_FUNCTION_MODE
> +#define TARGET_PROMOTE_FUNCTION_MODE 
> default_promote_function_mode_always_promote

Not a review comment, just curious: why did you go for this choice?
Is it already enshrined in the ABI?

One of the historical problems with caller promotion from a security
perspective is that the callee then trusts the caller to do the right
thing.  See e.g. the code in combine.c that optimises away "redundant"
extensions that the caller is assumed to have done for us.

E.g. a rogue caller could induce an out-of-bounds access for:

  unsigned int a[256];
  unsigned int f (unsigned char c) { return a[c]; }

because the zero-extension of "c" in the address calculation might be
optimised away.  This might not matter in an eBPF context though...

> [...]
> +/* This should not be needed, because ptr_mode, Pmode and word_mode
> +   are all the same width.  */
> +#define POINTERS_EXTEND_UNSIGNED 1

Yeah, IMO it would be better not to define it.

> [...]
> +/* The widest floating-point format supported by the hardware is
> +   64-bit.  */
> +#define WIDEST_HARDWARE_FP_SIZE 64

Normally soft-fp targets don't need to define this.  Is this related
to the special conversion libcalls you install for eBPF?

> [...]
> +/*** Order of Allocation of Registers.  */
> +
> +/* We generally want to put call-clobbered registers ahead of
> +   call-saved ones.  (IRA expects this.)  */
> +#define REG_ALLOC_ORDER                                      \
> +  {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}

Do you gain much by defining this?  I would have expected better
code without, given the architecture is so regular.

> [...]
> +/**** Register Classes.  */
> +
> +enum reg_class
> +{
> +  NO_REGS,           /* no registers in set.  */
> +  GR_REGS,           /* general-purpose integer registers.  */
> +  ALL_REGS,          /* all registers.  */
> +  LIM_REG_CLASSES    /* max value + 1.  */
> +};
> +
> +#define N_REG_CLASSES (int) LIM_REG_CLASSES
> +#define GENERAL_REGS GR_REGS
> +
> +/* An initializer containing the names of the register classes as C
> +   string constants.  These names are used in writing some of the
> +   debugging dumps.  */
> +#define REG_CLASS_NAMES                              \
> +{                                            \
> +  "NO_REGS",                                 \
> +  "GR_REGS",                                 \
> +  "ALL_REGS"                                 \
> +}
> +
> +/* An initializer containing the contents of the register classes, as
> +   integers which are bit masks.  The Nth integer specifies the
> +   contents of class N.  The way the integer MASK is interpreted is
> +   that register R is in the class if `MASK & (1 << R)' is 1.  */
> +#define REG_CLASS_CONTENTS                   \
> +{                                            \
> +   0x00000000, /* NO_REGS */                 \
> +   0x000007ff, /* GR_REGS */                 \
> +   0x000007ff, /* ALL_REGS */                        \
> +}
> +
> +/* A C expression whose value is a register class containing hard
> +   register REGNO.  In general there is more that one such class;
> +   choose a class which is "minimal", meaning that no smaller class
> +   also contains the register.  */
> +#define REGNO_REG_CLASS(REGNO) ((REGNO) < 11 ? GR_REGS : ALL_REGS)

Did you mean to include register 11 in ALL_REGS in REG_CLASS_CONTENTS?
If not, then there doesn't seem to be any distinction between ALL_REGS
and GR_REGS, and it'd be better to make one the alias of the other
(and make REGNO_REG_CLASS return NO_REGS for 11).

> [...]
> +/* A macro whose definition is the name of the class to which a
> +   valid index register must belong.  An index register is one used
> +   in an address where its value is either multiplied by a scale
> +   factor or added to another register (as well as added to a
> +   displacement).  */
> +#define INDEX_REG_CLASS GR_REGS

It looked like you didn't support register-indexed addressing,
so NO_REGS would be better.

> [...]
> +/* C expression which is nonzero if register number REGNO is suitable
> +   for use as a base register in operand addresses.  In eBPF every
> +   hard register can be used for this purpose.  */
> +#define REGNO_OK_FOR_BASE_P(REGNO)                   \
> +  ((REGNO) < FIRST_PSEUDO_REGISTER                   \
> +   || (unsigned)reg_renumber[REGNO] < FIRST_PSEUDO_REGISTER)

The reg_regnumber stuff isn't needed for modern (LRA) targets.

> [...]
> +/* C expression which is nonzero if register number REGNO is suitable
> +   for use as an index register in operand addresses.  */
> +#define REGNO_OK_FOR_INDEX_P(REGNO)          \
> +  REGNO_OK_FOR_BASE_P(REGNO)

As above, this should be false if you don't support register-indexed
addressing.

> [...]
> +/* It is safe to return CLASS here.  No more restrictive class is
> +   needed.  */
> +#define PREFERRED_RELOAD_CLASS(X,CLASS) CLASS

This is a legacy macro.  The associated target hook is
TARGET_PREFERRED_RELOAD_CLASS, which defaults to the above,
so I think you can just delete this.

> [...]
> +/* Maximum number of consecutive registers of class CLASS needed to
> +   hold a value of mode MODE.  */
> +#define CLASS_MAX_NREGS(CLASS, MODE) \
> +  (((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))

Same here for TARGET_CLASS_MAX_NREGS.

> [...]
> +/* We cannot support DWARF2 because of the limitations of eBPF.  */
> +#define DBX_DEBUGGING_INFO

(I shed tears at this point, but still I continued...)

> [...]
> +/* Cost of moving data of mode MODE from a register in class FROM to a
> +   register in class TO.  Note that 2 is the default.  */
> +#define REGISTER_MOVE_COST(MODE,FROM,TO) 2
> +
> +/* Cost of moving data of mode MODE between a register of class CLASS
> +   and memory. IN is zero if the value is to be written to memory,
> +   nonzero if it is to be read in.  */
> +#define MEMORY_MOVE_COST(MODE,CLASS,IN) 4

These two are now target hooks.

> [...]
> +;;; Subtraction
> +(define_insn "sub<AM:mode>3"
> +  [(set (match_operand:AM          0 "register_operand"   "=r,r")
> +        (plus:AM (match_operand:AM 1 "register_operand"   " 0,0")
> +                 (match_operand:AM 2 "reg_or_imm_operand" " r,I")))]
> +  "1"
> +  "sub<msuffix>\t%0,%2"
> +  [(set_attr "type" "<mtype>")])

This should only (need to) handle subtractions of registers.
Subtractions of constants become additions.

> [...]
> +(define_insn "*mulsi3_extended"
> +  [(set (match_operand:DI       0 "register_operand" "=r,r")
> +        (sign_extend:DI
> +         (mult:SI (match_operand:SI 1 "register_operand" "0,0")
> +                  (match_operand:SI 2 "reg_or_imm_operand" "r,I"))))]
> +  ""
> +  "mul32\t%0,%2"
> +  [(set_attr "type" "alu32")])

There's a named pattern for this: mulsidi3.  You might get better
code by using that name instead.

> [...]
> +;; Division
> +(define_insn "div<AM:mode>3"
> +  [(set (match_operand:AM 0 "register_operand" "=r,r")
> +        (div:AM (match_operand:AM 1 "register_operand" " 0,0")
> +                (match_operand:AM 2 "reg_or_imm_operand" "r,I")))]
> +  ""
> +  "div<msuffix>\t%0,%2"
> +  [(set_attr "type" "<mtype>")])
> +
> +(define_insn "udiv<AM:mode>3"
> +  [(set (match_operand:AM 0 "register_operand" "=r,r")
> +        (div:AM (match_operand:AM 1 "register_operand" " 0,0")
> +                (match_operand:AM 2 "reg_or_imm_operand" "r,I")))]
> +  ""
> +  "div<msuffix>\t%0,%2"
> +  [(set_attr "type" "<mtype>")])

div and udiv are two different operations.  I don't see how we can
use the same eBPF instruction for both.  The rtl for udiv should also
use the udiv rtx code.

> +;;; Modulus
> +(define_insn "mod<AM:mode>3"
> +  [(set (match_operand:AM 0 "register_operand" "=r,r")
> +        (mod:AM (match_operand:AM 1 "register_operand" " 0,0")
> +                (match_operand:AM 2 "reg_or_imm_operand" "r,I")))]
> +  ""
> +  "mod<msuffix>\t%0,%2"
> +  [(set_attr "type" "<mtype>")])
> +
> +(define_insn "umod<AM:mode>3"
> +  [(set (match_operand:AM 0 "register_operand" "=r,r")
> +        (mod:AM (match_operand:AM 1 "register_operand" " 0,0")
> +                (match_operand:AM 2 "reg_or_imm_operand" "r,I")))]
> +  ""
> +  "mod<msuffix>\t%0,%2"
> +  [(set_attr "type" "<mtype>")])

Same here, with umod for the rtx code.

> [...]
> +;;; Sign-extension
> +
> +;; Sign-extending a 32-bit value into a 64-bit value is achieved using
> +;; shifting, with instructions generated by the expand below.
> +
> +(define_expand "extendsidi2"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> +     (sign_extend:DI (match_operand:SI 1 "register_operand" "r")))]

define_expands shouldn't have constraints.  (Same for the rest of the file.)

> [...]
> +(define_expand "mov<AMM:mode>"
> +  [(set (match_operand:AMM 0 "general_operand" "")
> +        (match_operand:AMM 1 "general_operand" ""))]
> +        ""
> +        "
> +{
> +    if (!register_operand(operands[0], <AMM:MODE>mode)
> +        && !register_operand(operands[1], <AMM:MODE>mode))
> +         operands[1] = force_reg (<AMM:MODE>mode, operands[1]); 

Some odd indentation here.  The code should be indented in the same
way as for .c files.

> +    /* In cases where the moved entity is a constant address, we
> +       need to emit an extra mov and modify the second operand to
> +       obtain something like:
> +
> +         lddw %T, %1
> +         ldxw %0, [%T+0]
> +
> +       Ditto for stores.  */
> +
> +    if (MEM_P (operands[1])
> +        && CONSTANT_ADDRESS_P (XEXP (operands[1], 0)))
> +      {
> +         rtx tmp = gen_reg_rtx (DImode);
> +
> +         emit_move_insn (tmp, XEXP (operands[1], 0));
> +         operands[1] = gen_rtx_MEM (<AMM:MODE>mode, tmp);
> +      }
> +
> +    if (MEM_P (operands[0])
> +        && CONSTANT_ADDRESS_P (XEXP (operands[0], 0)))
> +      {
> +         rtx tmp = gen_reg_rtx (DImode);
> +  
> +         emit_move_insn (tmp, XEXP (operands[0], 0));
> +         operands[0] = gen_rtx_MEM (<AMM:MODE>mode, tmp);
> +      }

But in that case, why not just say that constant addresses aren't
legitimate?  That will make it easier for the optimisers to do the
right thing and (hopefully) generate better code.

> +(define_constraint "B"
> +  "A constant argument for LDDW."
> +  (ior (match_code "const,symbol_ref,label_ref,const_double")
> +       (and (match_code "const_int")
> +            (match_test "IN_RANGE (ival, -1 - 0x7fffffffffffffff, 
> 0x7fffffffffffffff)"))))

Like Segher says, the IN_RANGE check is always true.  So just:

  (match_code "const,symbol_ref,label_ref,const_double,const_int")

should be enough.

> +(define_predicate "imm32_operand"
> +  (ior (and (match_code "const_int")
> +            (match_test "IN_RANGE (INTVAL (op), 0, 0xffffffff)"))
> +       (match_code "symbol_ref,label_ref,const")))

This is only used with SImode.  const_ints are represented in
sign-extended form, so the INTVAL will have the range of int32_t
rather than uint32_t.  I.e.:

   IN_RANGE (INTVAL (op), 0x80000000, 0xffffffff)

is always false for const_ints interpreted as SImode.

> +(define_predicate "lddw_operand"
> +  (ior (and (match_code "const_int")
> +            (match_test "IN_RANGE (INTVAL (op), 0, 0xffffffffffffffff)"))
> +       (match_code "symbol_ref,label_ref,const,const_double")))

Same point here: the INTVAL will have the range of int64_t rather
than uint64_t.

> [...]
> +(define_predicate "call_operand"
> +  (match_code "mem")
> +{
> +  if (GET_CODE (op) != MEM)
> +    return 0;
> +
> +  op = XEXP (op, 0);
> +
> +  if (GET_MODE (op) != mode
> +      && GET_MODE (op) != VOIDmode
> +      && mode != VOIDmode)
> +    return 0;

No need for the check against "mode".  That logically applies to "op" rather
than the address, and is generated automatically.

> +
> +  switch (GET_CODE (op))
> +  {
> +  case REG:
> +  case CONST_INT:
> +  case SYMBOL_REF:
> +  case LABEL_REF:
> +    return 1;
> +    break;
> +  case CONST:
> +    {
> +      switch (GET_CODE (XEXP (op, 0)))
> +     {
> +     case SYMBOL_REF:
> +     case LABEL_REF:
> +     case CONST_INT:
> +       return 1;
> +     default:
> +       break;
> +     }
> +      break;
> +    }
> +  default:
> +    break;
> +  }

Stripping CONST via:

  if (GET_CODE (op) == CONST)
    op = XEXP (op, 0);

would be simpler.  Please also use true/false these days (realise a lot
of existing code doesn't).

It really would be nice to avoid the imports of elfos.h stuff,
but you know that already. :-)

Generally looks really good, thanks.

Richard

Reply via email to