https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106598

            Bug ID: 106598
           Summary: s390: Inefficient branchless conditionals for int
           Product: gcc
           Version: 11.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: jens.seifert at de dot ibm.com
  Target Milestone: ---

int lt(int a, int b)
{
    return a < b;
}

generates:
        cr      %r2,%r3
        lhi     %r1,1
        lhi     %r2,0
        locrnl  %r1,%r2
        lgfr    %r2,%r1
        br      %r14

int ltOpt(int a, int b)
{
    long long x = a;
    long long y = b;
    return ((unsigned long long)(x - y)) >> 63;
}

better:
        sgr     %r2,%r3
        srlg    %r2,%r2,63
        br      %r14

int ltMask(int a, int b)
{
    return -(a < b);
}

generates:
        cr      %r2,%r3
        lhi     %r1,1
        lhi     %r2,0
        locrnl  %r1,%r2
        sllg    %r1,%r1,63
        srag    %r2,%r1,63


int ltMaskOpt(int a, int b)
{
    long long x = a;
    long long y = b;
    return (x - y) >> 63;
}

better:
        sgr     %r2,%r3
        srag    %r2,%r2,63
        br      %r14

int leMask(int a, int b)
{
    return -(a <= b);
}

generates:
        cr      %r2,%r3
        lhi     %r1,1
        lhi     %r2,0
        locrnle %r1,%r2
        sllg    %r1,%r1,63
        srag    %r2,%r1,63
        br      %r14

int leMaskOpt(int a, int b)
{
   int c;
   __asm__("cr %1,%2\n\tslbgr %0,%0":"=r"(c):"r"(a),"r"(b):"cc");
   // slbgr create a 64-bit mask => lgfr would not be required
   return c;
}

better:
        cr %r2,%r3
        slbgr %r2,%r2
        lgfr    %r2,%r2 <= not necessary
        br      %r14


int le(int a, int b)
{
    return a <= b;
}

generates:
        cr      %r2,%r3
        lhi     %r1,1
        lhi     %r2,0
        locrnle %r1,%r2
        lgfr    %r2,%r1
        br      %r14

int leOpt(int a, int b)
{
   unsigned long long c;
   __asm__("cr %1,%2\n\tslbgr %0,%0":"=r"(c):"r"(a),"r"(b):"cc");
   return (c >> 63);
}

better:
        cr %r2,%r3
        slbgr %r2,%r2
        srlg    %r2,%r2,63
        br      %r14

Reply via email to