s390x: Implement DIVIDE TO INTEGER

Ilya Leoshkevich Wed, 21 Jan 2026 14:22:40 -0800

DIVIDE TO INTEGER computes floating point remainder and is used by
LuaJIT, so add it to QEMU.


The instruction comes in two flavors: for floats and doubles, which are
very similar. Since it's also quite complex, copy-pasting the
implementation would result in barely maintainable code. Mitigate that
using macros. An alternative would be an .inc file, but this looks like
an overkill.

Signed-off-by: Ilya Leoshkevich <[email protected]>
---
 target/s390x/helper.h            |   2 +
 target/s390x/tcg/fpu_helper.c    | 199 +++++++++++++++++++++++++++++++
 target/s390x/tcg/insn-data.h.inc |   5 +-
 target/s390x/tcg/translate.c     |  26 ++++
 4 files changed, 231 insertions(+), 1 deletion(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 1a8a76abb98..f2b24c65a88 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -46,6 +46,8 @@ DEF_HELPER_FLAGS_3(sxb, TCG_CALL_NO_WG, i128, env, i128, i128)
 DEF_HELPER_FLAGS_3(deb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(ddb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(dxb, TCG_CALL_NO_WG, i128, env, i128, i128)
+DEF_HELPER_5(didb, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(dieb, void, env, i32, i32, i32, i32)
 DEF_HELPER_FLAGS_3(meeb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(mdeb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(mdb, TCG_CALL_NO_WG, i64, env, i64, i64)
diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c
index 1ba43715ac1..f524c4257fb 100644
--- a/target/s390x/tcg/fpu_helper.c
+++ b/target/s390x/tcg/fpu_helper.c
@@ -286,6 +286,205 @@ Int128 HELPER(dxb)(CPUS390XState *env, Int128 a, Int128 b)
     return RET128(ret);
 }
 
+static float128 float128_precision_round_to_float32(float128 x)
+{
+    x.low = 0;
+    x.high = deposit64(x.high, 0, 25, 0);
+    return x;
+}
+
+static float128 float128_precision_round_to_float64(float128 x)
+{
+    x.low = deposit64(x.low, 0, 60, 0);
+    return x;
+}
+
+static int float128_get_exp(float128 x)
+{
+    return extract64(x.high, 48, 15) - 16383;
+}
+
+static float128 float128_set_exp(float128 x, int exp)
+{
+    x.high = deposit64(x.high, 48, 15, exp + 16383);
+    return x;
+}
+
+static float128 float128_adjust_exp(float128 x, int delta)
+{
+    return float128_set_exp(x, float128_get_exp(x) + delta);
+}
+
+static bool float128_is_int(float128 x)
+{
+    return extract64(x.high, 0, 48) == 0 && x.low == 0;
+}
+
+static float32 extract_float32(CPUS390XState *env, uint32_t r)
+{
+    return env->vregs[r][0] >> 32;
+}
+
+static void deposit_float32(CPUS390XState *env, uint32_t r, float32 x)
+{
+    env->vregs[r][0] = deposit64(env->vregs[r][0], 32, 32, x);
+}
+
+static float64 extract_float64(CPUS390XState *env, uint32_t r)
+{
+    return env->vregs[r][0];
+}
+
+static void deposit_float64(CPUS390XState *env, uint32_t r, float64 x)
+{
+    env->vregs[r][0] = x;
+}
+
+#define DIVIDE_TO_INTEGER(name, floatN, p, exp_max, exp_bias)                  
\
+void HELPER(name)(CPUS390XState *env, uint32_t r1, uint32_t r2,                
\
+                  uint32_t r3, uint32_t m4)                                    
\
+{                                                                              
\
+    int float_exception_flags = 0;                                             
\
+    floatN a, b, n, r;                                                         
\
+    int dxc = -1;                                                              
\
+    uint32_t cc;                                                               
\
+                                                                               
\
+    a = extract_ ## floatN(env, r1);                                           
\
+    b = extract_ ## floatN(env, r2);                                           
\
+                                                                               
\
+    /* POp table "Results: DIVIDE TO INTEGER (Part 1 of 2)" */                 
\
+    if (floatN ## _is_signaling_nan(a, &env->fpu_status)) {                    
\
+        r = n = floatN ## _silence_nan(a, &env->fpu_status);                   
\
+        cc = 1;                                                                
\
+        float_exception_flags |= float_flag_invalid;                           
\
+    } else if (floatN ## _is_signaling_nan(b, &env->fpu_status)) {             
\
+        r = n = floatN ## _silence_nan(b, &env->fpu_status);                   
\
+        cc = 1;                                                                
\
+        float_exception_flags |= float_flag_invalid;                           
\
+    } else if (floatN ## _is_quiet_nan(a, &env->fpu_status)) {                 
\
+        r = n = a;                                                             
\
+        cc = 1;                                                                
\
+    } else if (floatN ## _is_quiet_nan(b, &env->fpu_status)) {                 
\
+        r = n = b;                                                             
\
+        cc = 1;                                                                
\
+    } else if (floatN ## _is_infinity(a) || floatN ## _is_zero(b)) {           
\
+        r = n = floatN ## _default_nan(&env->fpu_status);                      
\
+        cc = 1;                                                                
\
+        float_exception_flags |= float_flag_invalid;                           
\
+    } else if (floatN ## _is_infinity(b))  {                                   
\
+        r = a;                                                                 
\
+        n = floatN ## _set_sign(floatN ## _zero,                               
\
+                                floatN ## _is_neg(a) != floatN ## _is_neg(b)); 
\
+        cc = 0;                                                                
\
+    } else {                                                                   
\
+        float128 a128, b128, m128, n128, q128, r128;                           
\
+        bool is_final, is_q128_smallish;                                       
\
+        int old_mode, r128_exp;                                                
\
+        uint32_t r_flags;                                                      
\
+                                                                               
\
+        /* Compute precise quotient */                                         
\
+        a128 = floatN ## _to_float128(a, &env->fpu_status);                    
\
+        b128 = floatN ## _to_float128(b, &env->fpu_status);                    
\
+        q128 = float128_div(a128, b128, &env->fpu_status);                     
\
+                                                                               
\
+        /* Final or partial case? */                                           
\
+        is_q128_smallish = float128_get_exp(q128) < p;                         
\
+        is_final = is_q128_smallish || float128_is_int(q128);                  
\
+                                                                               
\
+        /*                                                                     
\
+         * Final quotient is rounded using M4,                                 
\
+         * partial quotient is rounded toward zero.                            
\
+         */                                                                    
\
+        old_mode = s390_swap_bfp_rounding_mode(env, is_final ? m4 : 5);        
\
+        n128 = float128_round_to_int(q128, &env->fpu_status);                  
\
+        s390_restore_bfp_rounding_mode(env, old_mode);                         
\
+                                                                               
\
+        /*                                                                     
\
+         * Intermediate values are precision-rounded,                          
\
+         * see "Intermediate Values" in POp.                                   
\
+         */                                                                    
\
+        n128 = float128_precision_round_to_ ## floatN(n128);                   
\
+                                                                               
\
+        /* Compute remainder */                                                
\
+        m128 = float128_mul(b128, n128, &env->fpu_status);                     
\
+        env->fpu_status.float_exception_flags = 0;                             
\
+        r128 = float128_sub(a128, m128, &env->fpu_status);                     
\
+        r128_exp = float128_get_exp(r128);                                     
\
+        r = float128_to_## floatN(r128, &env->fpu_status);                     
\
+        r_flags = env->fpu_status.float_exception_flags;                       
\
+                                                                               
\
+        /* POp table "Results: DIVIDE TO INTEGER (Part 2 of 2)" */             
\
+        if (is_q128_smallish) {                                                
\
+            cc = 0;                                                            
\
+            if (!floatN ## _is_zero(r)) {                                      
\
+                if (r128_exp < -(exp_max - 1)) {                               
\
+                    if ((env->fpc >> 24) & S390_IEEE_MASK_UNDERFLOW) {         
\
+                        float_exception_flags |= float_flag_underflow;         
\
+                        dxc = 0x10;                                            
\
+                        r128 = float128_adjust_exp(r128, exp_bias);            
\
+                        r = float128_to_## floatN(r128, &env->fpu_status);     
\
+                    }                                                          
\
+                } else if (r_flags & float_flag_inexact) {                     
\
+                    float_exception_flags |= float_flag_inexact;               
\
+                    if ((env->fpc >> 24) & S390_IEEE_MASK_INEXACT) {           
\
+                        /*                                                     
\
+                         * Check whether remainder was truncated (rounded      
\
+                         * toward zero) or incremented.                        
\
+                         */                                                    
\
+                        if (float128_lt(                                       
\
+                                floatN ## _to_float128(floatN ## _abs(r),      
\
+                                                       &env->fpu_status),      
\
+                                float128_abs(r128), &env->fpu_status)) {       
\
+                           dxc = 0x8;                                          
\
+                        } else {                                               
\
+                           dxc = 0xc;                                          
\
+                        }                                                      
\
+                    }                                                          
\
+                }                                                              
\
+            }                                                                  
\
+        } else if (float128_get_exp(n128) > exp_max) {                         
\
+            n128 = float128_adjust_exp(n128, -exp_bias);                       
\
+            cc = floatN ## _is_zero(r) ? 1 : 3;                                
\
+        } else {                                                               
\
+            cc = floatN ## _is_zero(r) ? 0 : 2;                                
\
+        }                                                                      
\
+                                                                               
\
+        /* Adjust sign of zero */                                              
\
+        if (floatN ## _is_zero(r)) {                                           
\
+            r = floatN ## _set_sign(r, float128_is_neg(a128));                 
\
+        }                                                                      
\
+        n = float128_to_ ## floatN(n128, &env->fpu_status);                    
\
+        if (floatN ## _is_zero(n)) {                                           
\
+            n = floatN ## _set_sign(n,                                         
\
+                                    float128_is_neg(a128) !=                   
\
+                                        float128_is_neg(b128));                
\
+        }                                                                      
\
+    }                                                                          
\
+                                                                               
\
+    /* Flush the results if needed */                                          
\
+    if ((float_exception_flags & float_flag_invalid) &&                        
\
+        ((env->fpc >> 24) & S390_IEEE_MASK_INVALID)) {                         
\
+        /* The action for invalid operation is "Suppress" */                   
\
+    } else {                                                                   
\
+        /* The action for other exceptions is "Complete" */                    
\
+        deposit_ ## floatN(env, r1, r);                                        
\
+        deposit_ ## floatN(env, r3, n);                                        
\
+        env->cc_op = cc;                                                       
\
+    }                                                                          
\
+                                                                               
\
+    /* Raise an exception if needed */                                         
\
+    if (dxc == -1) {                                                           
\
+        env->fpu_status.float_exception_flags = float_exception_flags;         
\
+        handle_exceptions(env, false, GETPC());                                
\
+    } else {                                                                   
\
+        env->fpu_status.float_exception_flags = 0;                             
\
+        tcg_s390_data_exception(env, dxc, GETPC());                            
\
+    }                                                                          
\
+}
+
+DIVIDE_TO_INTEGER(dieb, float32, 24, 127, 192)
+DIVIDE_TO_INTEGER(didb, float64, 53, 1023, 1536)
+
 /* 32-bit FP multiplication */
 uint64_t HELPER(meeb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
index baaafe922e9..0d5392eac54 100644
--- a/target/s390x/tcg/insn-data.h.inc
+++ b/target/s390x/tcg/insn-data.h.inc
@@ -9,7 +9,7 @@
  *  OPC  = (op << 8) | op2 where op is the major, op2 the minor opcode
  *  NAME = name of the opcode, used internally
  *  FMT  = format of the opcode (defined in insn-format.h.inc)
- *  FAC  = facility the opcode is available in (defined in DisasFacility)
+ *  FAC  = facility the opcode is available in (define in translate.c)
  *  I1   = func in1_xx fills o->in1
  *  I2   = func in2_xx fills o->in2
  *  P    = func prep_xx initializes o->*out*
@@ -361,6 +361,9 @@
     C(0xb91d, DSGFR,   RRE,   Z,   r1p1, r2_32s, r1_P, 0, divs64, 0)
     C(0xe30d, DSG,     RXY_a, Z,   r1p1, m2_64, r1_P, 0, divs64, 0)
     C(0xe31d, DSGF,    RXY_a, Z,   r1p1, m2_32s, r1_P, 0, divs64, 0)
+/* DIVIDE TO INTEGER */
+    D(0xb35b, DIDBR,   RRF_b, Z,   0, 0, 0, 0, dib, 0, 64)
+    D(0xb353, DIEBR,   RRF_b, Z,   0, 0, 0, 0, dib, 0, 32)
 
 /* EXCLUSIVE OR */
     C(0x1700, XR,      RR_a,  Z,   r1, r2, new, r1_32, xor, nz32)
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 540c5a569c0..a3b753bc829 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -2283,6 +2283,32 @@ static DisasJumpType op_dxb(DisasContext *s, DisasOps *o)
     return DISAS_NEXT;
 }
 
+static DisasJumpType op_dib(DisasContext *s, DisasOps *o)
+{
+    const bool fpe = s390_has_feat(S390_FEAT_FLOATING_POINT_EXT);
+    uint8_t m4 = get_field(s, m4);
+
+    if (get_field(s, r1) == get_field(s, r2) ||
+        get_field(s, r1) == get_field(s, r3) ||
+        get_field(s, r2) == get_field(s, r3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    if (m4 == 2 || (!fpe && m4 == 3) || m4 > 7) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    (s->insn->data == 32 ? gen_helper_dieb : gen_helper_didb)(
+        tcg_env, tcg_constant_i32(get_field(s, r1)),
+        tcg_constant_i32(get_field(s, r2)),
+        tcg_constant_i32(get_field(s, r3)), tcg_constant_i32(m4));
+    set_cc_static(s);
+
+    return DISAS_NEXT;
+}
+
 static DisasJumpType op_ear(DisasContext *s, DisasOps *o)
 {
     int r2 = get_field(s, r2);
-- 
2.52.0

[PATCH 2/3] target/s390x: Implement DIVIDE TO INTEGER

Reply via email to