On 3/27/23 20:06, Song Gao wrote:
+static uint64_t do_vpcnt(uint64_t u1) +{ + u1 = (u1 & 0x5555555555555555ULL) + ((u1 >> 1) & 0x5555555555555555ULL); + u1 = (u1 & 0x3333333333333333ULL) + ((u1 >> 2) & 0x3333333333333333ULL); + u1 = (u1 & 0x0F0F0F0F0F0F0F0FULL) + ((u1 >> 4) & 0x0F0F0F0F0F0F0F0FULL); + u1 = (u1 & 0x00FF00FF00FF00FFULL) + ((u1 >> 8) & 0x00FF00FF00FF00FFULL); + u1 = (u1 & 0x0000FFFF0000FFFFULL) + ((u1 >> 16) & 0x0000FFFF0000FFFFULL); + u1 = (u1 & 0x00000000FFFFFFFFULL) + ((u1 >> 32)); + + return u1; +} + +#define VPCNT(NAME, BIT, E, T) \ +void HELPER(NAME)(CPULoongArchState *env, uint32_t vd, uint32_t vj) \ +{ \ + int i; \ + VReg *Vd = &(env->fpr[vd].vreg); \ + VReg *Vj = &(env->fpr[vj].vreg); \ + \ + for (i = 0; i < LSX_LEN/BIT; i++) \ + { \ + Vd->E(i) = do_vpcnt((T)Vj->E(i)); \ + } \ +} + +VPCNT(vpcnt_b, 8, B, uint8_t) +VPCNT(vpcnt_h, 16, H, uint16_t) +VPCNT(vpcnt_w, 32, W, uint32_t) +VPCNT(vpcnt_d, 64, D, uint64_t)
host-utils.h has ctpop{8,16,32,64}. r~