On 3/12/20 7:58 AM, LIU Zhiwei wrote: > +target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env, > + uint32_t desc) > +{ > + target_ulong cnt = 0; > + uint32_t mlen = vext_mlen(desc); > + uint32_t vm = vext_vm(desc); > + uint32_t vl = env->vl; > + int i; > + > + for (i = 0; i < vl; i++) { > + if (vm || vext_elem_mask(v0, mlen, i)) { > + if (vext_elem_mask(vs2, mlen, i)) { > + cnt++; > + } > + } > + } > + return cnt; > +}
This is ok as-is, so Reviewed-by: Richard Henderson <richard.hender...@linaro.org> But you can do better. You create an array, similar to arm's pred_esz_masks[], indexed by log2(mlen). mask = pred_mlen_masks[log2_mlen]; n = vl >> (6 - log2_mlen); r = extract32(vl, 0, 6 - log2_mlen); if (r) { rmask = extract64(mask, 0, r << log2_mlen); } else { rmask = 0; } if (vm) { for (i = 0; i < n; i++) { uint64_t j = ((uint64_t *)vs2)[i]; cnt += ctpop64(j & mask); } if (rmask) { uint64_t j = ((uint64_t *)vs2)[i]; cnt += ctpop64(j & rmask); } } else { for (i = 0; i < n; i++) { uint64_t j = ((uint64_t *)vs2)[i]; uint64_t k = ((uint64_t *)v0)[i]; cnt += ctpop64(j & k & mask); } if (rmask) { uint64_t j = ((uint64_t *)vs2)[i]; uint64_t k = ((uint64_t *)v0)[i]; cnt += ctpop64(j & k & rmask); } } r~