On Wed, 2019-04-17 at 13:17 -0700, Francisco Jerez wrote: > "Juan A. Suarez Romero" <jasua...@igalia.com> writes: > > > From: Iago Toral Quiroga <ito...@igalia.com> > > > > v2: > > - Adapted unit tests to make them consistent with the changes done > > to the validation of half-float conversions. > > > > v3 (Curro): > > - Check all the accummulators > > - Constify declarations > > - Do not check src1 type in single-source instructions. > > - Check for all instructions that read accumulator (either implicitly or > > explicitly) > > - Check restrictions in src1 too. > > - Merge conditional block > > - Add invalid test case. > > > > v4 (Curro): > > - Assert on 3-src instructions, as they are not validated. > > - Get rid of types_are_mixed_float(), as we know instruction is mixed > > float at that point. > > - Remove conditions from not verified case. > > - Fix brackets on conditional. > > --- > > src/intel/compiler/brw_eu_validate.c | 268 ++++++++++ > > src/intel/compiler/test_eu_validate.cpp | 630 ++++++++++++++++++++++++ > > 2 files changed, 898 insertions(+) > > > > diff --git a/src/intel/compiler/brw_eu_validate.c > > b/src/intel/compiler/brw_eu_validate.c > > index cfaf126e2f5..9530d4da209 100644 > > --- a/src/intel/compiler/brw_eu_validate.c > > +++ b/src/intel/compiler/brw_eu_validate.c > > @@ -170,6 +170,20 @@ src1_is_null(const struct gen_device_info *devinfo, > > const brw_inst *inst) > > brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; > > } > > > > +static bool > > +src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst) > > +{ > > + return brw_inst_src0_reg_file(devinfo, inst) == > > BRW_ARCHITECTURE_REGISTER_FILE && > > + (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == > > BRW_ARF_ACCUMULATOR; > > +} > > + > > +static bool > > +src1_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst) > > +{ > > + return brw_inst_src1_reg_file(devinfo, inst) == > > BRW_ARCHITECTURE_REGISTER_FILE && > > + (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == > > BRW_ARF_ACCUMULATOR; > > +} > > + > > static bool > > src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst) > > { > > @@ -275,6 +289,24 @@ sources_not_null(const struct gen_device_info *devinfo, > > return error_msg; > > } > > > > +static bool > > +inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst > > *inst) > > +{ > > + /* Check instructions that use implicit accumulator sources */ > > + switch (brw_inst_opcode(devinfo, inst)) { > > + case BRW_OPCODE_MAC: > > + case BRW_OPCODE_MACH: > > + case BRW_OPCODE_SADA2: > > + return true; > > + } > > + > > + /* FIXME: support 3-src instructions */ > > + unsigned num_sources = num_sources_from_inst(devinfo, inst); > > + assert(num_sources < 3); > > + > > + return src0_is_acc(devinfo, inst) || (num_sources > 1 && > > src1_is_acc(devinfo, inst)); > > +} > > + > > static struct string > > send_restrictions(const struct gen_device_info *devinfo, > > const brw_inst *inst) > > @@ -938,6 +970,241 @@ general_restrictions_on_region_parameters(const > > struct gen_device_info *devinfo, > > return error_msg; > > } > > > > +static struct string > > +special_restrictions_for_mixed_float_mode(const struct gen_device_info > > *devinfo, > > + const brw_inst *inst) > > +{ > > + struct string error_msg = { .str = NULL, .len = 0 }; > > + > > + const unsigned opcode = brw_inst_opcode(devinfo, inst); > > + const unsigned num_sources = num_sources_from_inst(devinfo, inst); > > + if (num_sources >= 3) > > + return error_msg; > > + > > + if (!is_mixed_float(devinfo, inst)) > > + return error_msg; > > + > > + unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); > > + bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16; > > + > > + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); > > + enum brw_reg_type src1_type = num_sources > 1 ? > > + brw_inst_src1_type(devinfo, inst) : 0; > > + enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); > > + > > + unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); > > + bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, > > dst_stride); > > + > > + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode > > + * Float Operations: > > + * > > + * "Indirect addressing on source is not supported when source and > > + * destination data types are mixed float." > > + */ > > + ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != > > BRW_ADDRESS_DIRECT || > > + (num_sources > 1 && > > + brw_inst_src1_address_mode(devinfo, inst) != > > BRW_ADDRESS_DIRECT), > > + "Indirect addressing on source is not supported when source > > and " > > + "destination data types are mixed float"); > > + > > + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode > > + * Float Operations: > > + * > > + * "No SIMD16 in mixed mode when destination is f32. Instruction > > + * execution size must be no more than 8." > > + */ > > + ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F, > > + "Mixed float mode with 32-bit float destination is limited " > > + "to SIMD8"); > > + > > + if (is_align16) { > > + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode > > + * Float Operations: > > + * > > + * "In Align16 mode, when half float and float data types are mixed > > + * between source operands OR between source and destination > > operands, > > + * the register content are assumed to be packed." > > + * > > + * Since Align16 doesn't have a concept of horizontal stride (or > > width), > > + * it means that vertical stride must always be 4, since 0 and 2 > > would > > + * lead to replicated data, and any other value is disallowed in > > Align16. > > + * However, the PRM also says: > > + * > > + * "In Align16, vertical stride can never be zero for f16" > > + * > > + * Which is oddly redundant and specific considering the more general > > + * assumption that all operands are assumed to be packed, so we > > + * understand that this might be hinting that there may be an > > exception > > + * for f32 operands with a vstride of 0, so we don't validate this > > for > > + * them while we don't have empirical evidence that it is forbidden. > > + * > > + * "Math operations for mixed mode: > > + * - In Align16, only packed format is supported" > > + * > > + * It is not clear what this is restricting since as stated in > > previous > > + * spec quotes, Align16 always assumes packed data. However, since > > + * we are allowing vstride of 0 on f32, we check again here without > > that > > + * exception. > > + > > The comment text from "However, the PRM also says" till here seems > obsolete by your last changes. Please remove it. > > With that fixed: > > Reviewed-by: Francisco Jerez <curroje...@riseup.net> > > I'm guessing that's all the reviews you needed on this series?
Correct. Thanks very much! I'll do a last test and push. J.A. > > + */ > > + ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != > > BRW_VERTICAL_STRIDE_4, > > + "Align16 mixed float mode assumes packed data (vstride must > > be 4"); > > + > > + ERROR_IF(num_sources >= 2 && > > + brw_inst_src1_vstride(devinfo, inst) != > > BRW_VERTICAL_STRIDE_4, > > + "Align16 mixed float mode assumes packed data (vstride must > > be 4"); > > + > > + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode > > + * Float Operations: > > + * > > + * "For Align16 mixed mode, both input and output packed f16 data > > + * must be oword aligned, no oword crossing in packed f16." > > + * > > + * The previous rule requires that Align16 operands are always > > packed, > > + * and since there is only one bit for Align16 subnr, which > > represents > > + * offsets 0B and 16B, this rule is always enforced and we don't > > need to > > + * validate it. > > + */ > > + > > + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode > > + * Float Operations: > > + * > > + * "No SIMD16 in mixed mode when destination is packed f16 for > > both > > + * Align1 and Align16." > > + * > > + * And: > > + * > > + * "In Align16 mode, when half float and float data types are mixed > > + * between source operands OR between source and destination > > operands, > > + * the register content are assumed to be packed." > > + * > > + * Which implies that SIMD16 is not available in Align16. This is > > further > > + * confirmed by: > > + * > > + * "For Align16 mixed mode, both input and output packed f16 data > > + * must be oword aligned, no oword crossing in packed f16" > > + * > > + * Since oword-aligned packed f16 data would cross oword boundaries > > when > > + * the execution size is larger than 8. > > + */ > > + ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to > > SIMD8"); > > + > > + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode > > + * Float Operations: > > + * > > + * "No accumulator read access for Align16 mixed float." > > + */ > > + ERROR_IF(inst_uses_src_acc(devinfo, inst), > > + "No accumulator read access for Align16 mixed float"); > > + } else { > > + assert(!is_align16); > > + > > + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode > > + * Float Operations: > > + * > > + * "No SIMD16 in mixed mode when destination is packed f16 for > > both > > + * Align1 and Align16." > > + */ > > + ERROR_IF(exec_size > 8 && dst_is_packed && > > + dst_type == BRW_REGISTER_TYPE_HF, > > + "Align1 mixed float mode is limited to SIMD8 when > > destination " > > + "is packed half-float"); > > + > > + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode > > + * Float Operations: > > + * > > + * "Math operations for mixed mode: > > + * - In Align1, f16 inputs need to be strided" > > + */ > > + if (opcode == BRW_OPCODE_MATH) { > > + if (src0_type == BRW_REGISTER_TYPE_HF) { > > + ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1, > > + "Align1 mixed mode math needs strided half-float > > inputs"); > > + } > > + > > + if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) { > > + ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1, > > + "Align1 mixed mode math needs strided half-float > > inputs"); > > + } > > + } > > + > > + if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) { > > + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode > > + * Float Operations: > > + * > > + * "In Align1, destination stride can be smaller than execution > > + * type. When destination is stride of 1, 16 bit packed data > > is > > + * updated on the destination. However, output packed f16 data > > + * must be oword aligned, no oword crossing in packed f16." > > + * > > + * The requirement of not crossing oword boundaries for 16-bit > > oword > > + * aligned data means that execution size is limited to 8. > > + */ > > + unsigned subreg; > > + if (brw_inst_dst_address_mode(devinfo, inst) == > > BRW_ADDRESS_DIRECT) > > + subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); > > + else > > + subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst); > > + ERROR_IF(subreg % 16 != 0, > > + "Align1 mixed mode packed half-float output must be " > > + "oword aligned"); > > + ERROR_IF(exec_size > 8, > > + "Align1 mixed mode packed half-float output must not " > > + "cross oword boundaries (max exec size is 8)"); > > + > > + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode > > + * Float Operations: > > + * > > + * "When source is float or half float from accumulator > > register and > > + * destination is half float with a stride of 1, the source > > must > > + * register aligned. i.e., source must have offset zero." > > + * > > + * Align16 mixed float mode doesn't allow accumulator access on > > sources, > > + * so we only need to check this for Align1. > > + */ > > + if (src0_is_acc(devinfo, inst) && > > + (src0_type == BRW_REGISTER_TYPE_F || > > + src0_type == BRW_REGISTER_TYPE_HF)) { > > + ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0, > > + "Mixed float mode requires register-aligned > > accumulator " > > + "source reads when destination is packed half-float"); > > + > > + } > > + > > + if (num_sources > 1 && > > + src1_is_acc(devinfo, inst) && > > + (src1_type == BRW_REGISTER_TYPE_F || > > + src1_type == BRW_REGISTER_TYPE_HF)) { > > + ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0, > > + "Mixed float mode requires register-aligned > > accumulator " > > + "source reads when destination is packed half-float"); > > + } > > + } > > + > > + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode > > + * Float Operations: > > + * > > + * "No swizzle is allowed when an accumulator is used as an > > implicit > > + * source or an explicit source in an instruction. i.e. when > > + * destination is half float with an implicit accumulator source, > > + * destination stride needs to be 2." > > + * > > + * FIXME: it is not quite clear what the first sentence actually > > means > > + * or its link to the implication described after it, so we > > only > > + * validate the explicit implication, which is clearly > > described. > > + */ > > + if (dst_type == BRW_REGISTER_TYPE_HF && > > + inst_uses_src_acc(devinfo, inst)) { > > + ERROR_IF(dst_stride != 2, > > + "Mixed float mode with implicit/explicit accumulator " > > + "source and half-float destination requires a stride " > > + "of 2 on the destination"); > > + } > > + } > > + > > + return error_msg; > > +} > > + > > /** > > * Creates an \p access_mask for an \p exec_size, \p element_size, and a > > region > > * > > @@ -1576,6 +1843,7 @@ brw_validate_instructions(const struct > > gen_device_info *devinfo, > > CHECK(send_restrictions); > > CHECK(general_restrictions_based_on_operand_types); > > CHECK(general_restrictions_on_region_parameters); > > + CHECK(special_restrictions_for_mixed_float_mode); > > CHECK(region_alignment_rules); > > CHECK(vector_immediate_restrictions); > > > > CHECK(special_requirements_for_handling_double_precision_data_types); > > diff --git a/src/intel/compiler/test_eu_validate.cpp > > b/src/intel/compiler/test_eu_validate.cpp > > index 2e06da2f5b4..65326416064 100644 > > --- a/src/intel/compiler/test_eu_validate.cpp > > +++ b/src/intel/compiler/test_eu_validate.cpp > > @@ -1019,6 +1019,636 @@ TEST_P(validation_test, half_float_conversion) > > } > > } > > > > +TEST_P(validation_test, mixed_float_source_indirect_addressing) > > +{ > > + static const struct { > > + enum brw_reg_type dst_type; > > + enum brw_reg_type src0_type; > > + enum brw_reg_type src1_type; > > + unsigned dst_stride; > > + bool dst_indirect; > > + bool src0_indirect; > > + bool expected_result; > > + } inst[] = { > > +#define INST(dst_type, src0_type, src1_type, \ > > + dst_stride, dst_indirect, src0_indirect, expected_result) \ > > + { \ > > + BRW_REGISTER_TYPE_##dst_type, \ > > + BRW_REGISTER_TYPE_##src0_type, \ > > + BRW_REGISTER_TYPE_##src1_type, \ > > + BRW_HORIZONTAL_STRIDE_##dst_stride, \ > > + dst_indirect, \ > > + src0_indirect, \ > > + expected_result, \ > > + } > > + > > + /* Source and dest are mixed float: indirect src addressing not > > allowed */ > > + INST(HF, F, F, 2, false, false, true), > > + INST(HF, F, F, 2, true, false, true), > > + INST(HF, F, F, 2, false, true, false), > > + INST(HF, F, F, 2, true, true, false), > > + INST( F, HF, F, 1, false, false, true), > > + INST( F, HF, F, 1, true, false, true), > > + INST( F, HF, F, 1, false, true, false), > > + INST( F, HF, F, 1, true, true, false), > > + > > + INST(HF, HF, F, 2, false, false, true), > > + INST(HF, HF, F, 2, true, false, true), > > + INST(HF, HF, F, 2, false, true, false), > > + INST(HF, HF, F, 2, true, true, false), > > + INST( F, F, HF, 1, false, false, true), > > + INST( F, F, HF, 1, true, false, true), > > + INST( F, F, HF, 1, false, true, false), > > + INST( F, F, HF, 1, true, true, false), > > + > > +#undef INST > > + }; > > + > > + if (devinfo.gen < 8) > > + return; > > + > > + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { > > + brw_ADD(p, retype(g0, inst[i].dst_type), > > + retype(g0, inst[i].src0_type), > > + retype(g0, inst[i].src1_type)); > > + > > + brw_inst_set_dst_address_mode(&devinfo, last_inst, > > inst[i].dst_indirect); > > + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); > > + brw_inst_set_src0_address_mode(&devinfo, last_inst, > > inst[i].src0_indirect); > > + > > + EXPECT_EQ(inst[i].expected_result, validate(p)); > > + > > + clear_instructions(p); > > + } > > +} > > + > > +TEST_P(validation_test, mixed_float_align1_simd16) > > +{ > > + static const struct { > > + unsigned exec_size; > > + enum brw_reg_type dst_type; > > + enum brw_reg_type src0_type; > > + enum brw_reg_type src1_type; > > + unsigned dst_stride; > > + bool expected_result; > > + } inst[] = { > > +#define INST(exec_size, dst_type, src0_type, src1_type, \ > > + dst_stride, expected_result) \ > > + { \ > > + BRW_EXECUTE_##exec_size, \ > > + BRW_REGISTER_TYPE_##dst_type, \ > > + BRW_REGISTER_TYPE_##src0_type, \ > > + BRW_REGISTER_TYPE_##src1_type, \ > > + BRW_HORIZONTAL_STRIDE_##dst_stride, \ > > + expected_result, \ > > + } > > + > > + /* No SIMD16 in mixed mode when destination is packed f16 */ > > + INST( 8, HF, F, HF, 2, true), > > + INST(16, HF, HF, F, 2, true), > > + INST(16, HF, HF, F, 1, false), > > + INST(16, HF, F, HF, 1, false), > > + > > + /* No SIMD16 in mixed mode when destination is f32 */ > > + INST( 8, F, HF, F, 1, true), > > + INST( 8, F, F, HF, 1, true), > > + INST(16, F, HF, F, 1, false), > > + INST(16, F, F, HF, 1, false), > > + > > +#undef INST > > + }; > > + > > + if (devinfo.gen < 8) > > + return; > > + > > + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { > > + brw_ADD(p, retype(g0, inst[i].dst_type), > > + retype(g0, inst[i].src0_type), > > + retype(g0, inst[i].src1_type)); > > + > > + brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); > > + > > + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); > > + > > + EXPECT_EQ(inst[i].expected_result, validate(p)); > > + > > + clear_instructions(p); > > + } > > +} > > + > > +TEST_P(validation_test, > > mixed_float_align1_packed_fp16_dst_acc_read_offset_0) > > +{ > > + static const struct { > > + enum brw_reg_type dst_type; > > + enum brw_reg_type src0_type; > > + enum brw_reg_type src1_type; > > + unsigned dst_stride; > > + bool read_acc; > > + unsigned subnr; > > + bool expected_result_bdw; > > + bool expected_result_chv_skl; > > + } inst[] = { > > +#define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr, > > \ > > + expected_result_bdw, expected_result_chv_skl) > > \ > > + { > > \ > > + BRW_REGISTER_TYPE_##dst_type, > > \ > > + BRW_REGISTER_TYPE_##src0_type, > > \ > > + BRW_REGISTER_TYPE_##src1_type, > > \ > > + BRW_HORIZONTAL_STRIDE_##dst_stride, > > \ > > + read_acc, > > \ > > + subnr, > > \ > > + expected_result_bdw, > > \ > > + expected_result_chv_skl, > > \ > > + } > > + > > + /* Destination is not packed */ > > + INST(HF, HF, F, 2, true, 0, true, true), > > + INST(HF, HF, F, 2, true, 2, true, true), > > + INST(HF, HF, F, 2, true, 4, true, true), > > + INST(HF, HF, F, 2, true, 8, true, true), > > + INST(HF, HF, F, 2, true, 16, true, true), > > + > > + /* Destination is packed, we don't read acc */ > > + INST(HF, HF, F, 1, false, 0, false, true), > > + INST(HF, HF, F, 1, false, 2, false, true), > > + INST(HF, HF, F, 1, false, 4, false, true), > > + INST(HF, HF, F, 1, false, 8, false, true), > > + INST(HF, HF, F, 1, false, 16, false, true), > > + > > + /* Destination is packed, we read acc */ > > + INST(HF, HF, F, 1, true, 0, false, false), > > + INST(HF, HF, F, 1, true, 2, false, false), > > + INST(HF, HF, F, 1, true, 4, false, false), > > + INST(HF, HF, F, 1, true, 8, false, false), > > + INST(HF, HF, F, 1, true, 16, false, false), > > + > > +#undef INST > > + }; > > + > > + if (devinfo.gen < 8) > > + return; > > + > > + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { > > + brw_ADD(p, retype(g0, inst[i].dst_type), > > + retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type), > > + retype(g0, inst[i].src1_type)); > > + > > + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); > > + > > + brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr); > > + > > + if (devinfo.is_cherryview || devinfo.gen >= 9) > > + EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p)); > > + else > > + EXPECT_EQ(inst[i].expected_result_bdw, validate(p)); > > + > > + clear_instructions(p); > > + } > > +} > > + > > +TEST_P(validation_test, mixed_float_fp16_dest_with_acc) > > +{ > > + static const struct { > > + unsigned exec_size; > > + unsigned opcode; > > + enum brw_reg_type dst_type; > > + enum brw_reg_type src0_type; > > + enum brw_reg_type src1_type; > > + unsigned dst_stride; > > + bool read_acc; > > + bool expected_result_bdw; > > + bool expected_result_chv_skl; > > + } inst[] = { > > +#define INST(exec_size, opcode, dst_type, src0_type, src1_type, \ > > + dst_stride, read_acc,expected_result_bdw, \ > > + expected_result_chv_skl) \ > > + { \ > > + BRW_EXECUTE_##exec_size, \ > > + BRW_OPCODE_##opcode, \ > > + BRW_REGISTER_TYPE_##dst_type, \ > > + BRW_REGISTER_TYPE_##src0_type, \ > > + BRW_REGISTER_TYPE_##src1_type, \ > > + BRW_HORIZONTAL_STRIDE_##dst_stride, \ > > + read_acc, \ > > + expected_result_bdw, \ > > + expected_result_chv_skl, \ > > + } > > + > > + /* Packed fp16 dest with implicit acc needs hstride=2 */ > > + INST(8, MAC, HF, HF, F, 1, false, false, false), > > + INST(8, MAC, HF, HF, F, 2, false, true, true), > > + INST(8, MAC, HF, F, HF, 1, false, false, false), > > + INST(8, MAC, HF, F, HF, 2, false, true, true), > > + > > + /* Packed fp16 dest with explicit acc needs hstride=2 */ > > + INST(8, ADD, HF, HF, F, 1, true, false, false), > > + INST(8, ADD, HF, HF, F, 2, true, true, true), > > + INST(8, ADD, HF, F, HF, 1, true, false, false), > > + INST(8, ADD, HF, F, HF, 2, true, true, true), > > + > > + /* If destination is not fp16, restriction doesn't apply */ > > + INST(8, MAC, F, HF, F, 1, false, true, true), > > + INST(8, MAC, F, HF, F, 2, false, true, true), > > + > > + /* If there is no implicit/explicit acc, restriction doesn't apply */ > > + INST(8, ADD, HF, HF, F, 1, false, false, true), > > + INST(8, ADD, HF, HF, F, 2, false, true, true), > > + INST(8, ADD, HF, F, HF, 1, false, false, true), > > + INST(8, ADD, HF, F, HF, 2, false, true, true), > > + INST(8, ADD, F, HF, F, 1, false, true, true), > > + INST(8, ADD, F, HF, F, 2, false, true, true), > > + > > +#undef INST > > + }; > > + > > + if (devinfo.gen < 8) > > + return; > > + > > + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { > > + if (inst[i].opcode == BRW_OPCODE_MAC) { > > + brw_MAC(p, retype(g0, inst[i].dst_type), > > + retype(g0, inst[i].src0_type), > > + retype(g0, inst[i].src1_type)); > > + } else { > > + assert(inst[i].opcode == BRW_OPCODE_ADD); > > + brw_ADD(p, retype(g0, inst[i].dst_type), > > + retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type), > > + retype(g0, inst[i].src1_type)); > > + } > > + > > + brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); > > + > > + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); > > + > > + if (devinfo.is_cherryview || devinfo.gen >= 9) > > + EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p)); > > + else > > + EXPECT_EQ(inst[i].expected_result_bdw, validate(p)); > > + > > + clear_instructions(p); > > + } > > +} > > + > > +TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs) > > +{ > > + static const struct { > > + enum brw_reg_type dst_type; > > + enum brw_reg_type src0_type; > > + enum brw_reg_type src1_type; > > + unsigned dst_stride; > > + unsigned src0_stride; > > + unsigned src1_stride; > > + bool expected_result; > > + } inst[] = { > > +#define INST(dst_type, src0_type, src1_type, \ > > + dst_stride, src0_stride, src1_stride, expected_result) \ > > + { \ > > + BRW_REGISTER_TYPE_##dst_type, \ > > + BRW_REGISTER_TYPE_##src0_type, \ > > + BRW_REGISTER_TYPE_##src1_type, \ > > + BRW_HORIZONTAL_STRIDE_##dst_stride, \ > > + BRW_HORIZONTAL_STRIDE_##src0_stride, \ > > + BRW_HORIZONTAL_STRIDE_##src1_stride, \ > > + expected_result, \ > > + } > > + > > + INST(HF, HF, F, 2, 2, 1, true), > > + INST(HF, F, HF, 2, 1, 2, true), > > + INST(HF, F, HF, 1, 1, 2, true), > > + INST(HF, F, HF, 2, 1, 1, false), > > + INST(HF, HF, F, 2, 1, 1, false), > > + INST(HF, HF, F, 1, 1, 1, false), > > + INST(HF, HF, F, 2, 1, 1, false), > > + INST( F, HF, F, 1, 1, 1, false), > > + INST( F, F, HF, 1, 1, 2, true), > > + INST( F, HF, HF, 1, 2, 1, false), > > + INST( F, HF, HF, 1, 2, 2, true), > > + > > +#undef INST > > + }; > > + > > + /* No half-float math in gen8 */ > > + if (devinfo.gen < 9) > > + return; > > + > > + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { > > + gen6_math(p, retype(g0, inst[i].dst_type), > > + BRW_MATH_FUNCTION_POW, > > + retype(g0, inst[i].src0_type), > > + retype(g0, inst[i].src1_type)); > > + > > + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); > > + > > + brw_inst_set_src0_vstride(&devinfo, last_inst, > > BRW_VERTICAL_STRIDE_4); > > + brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); > > + brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride); > > + > > + brw_inst_set_src1_vstride(&devinfo, last_inst, > > BRW_VERTICAL_STRIDE_4); > > + brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4); > > + brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride); > > + > > + EXPECT_EQ(inst[i].expected_result, validate(p)); > > + > > + clear_instructions(p); > > + } > > +} > > + > > +TEST_P(validation_test, mixed_float_align1_packed_fp16_dst) > > +{ > > + static const struct { > > + unsigned exec_size; > > + enum brw_reg_type dst_type; > > + enum brw_reg_type src0_type; > > + enum brw_reg_type src1_type; > > + unsigned dst_stride; > > + unsigned dst_subnr; > > + bool expected_result_bdw; > > + bool expected_result_chv_skl; > > + } inst[] = { > > +#define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, > > dst_subnr, \ > > + expected_result_bdw, expected_result_chv_skl) > > \ > > + { > > \ > > + BRW_EXECUTE_##exec_size, > > \ > > + BRW_REGISTER_TYPE_##dst_type, > > \ > > + BRW_REGISTER_TYPE_##src0_type, > > \ > > + BRW_REGISTER_TYPE_##src1_type, > > \ > > + BRW_HORIZONTAL_STRIDE_##dst_stride, > > \ > > + dst_subnr, > > \ > > + expected_result_bdw, > > \ > > + expected_result_chv_skl > > \ > > + } > > + > > + /* SIMD8 packed fp16 dst won't cross oword boundaries if region is > > + * oword-aligned > > + */ > > + INST( 8, HF, HF, F, 1, 0, false, true), > > + INST( 8, HF, HF, F, 1, 2, false, false), > > + INST( 8, HF, HF, F, 1, 4, false, false), > > + INST( 8, HF, HF, F, 1, 8, false, false), > > + INST( 8, HF, HF, F, 1, 16, false, true), > > + > > + /* SIMD16 packed fp16 always crosses oword boundaries */ > > + INST(16, HF, HF, F, 1, 0, false, false), > > + INST(16, HF, HF, F, 1, 2, false, false), > > + INST(16, HF, HF, F, 1, 4, false, false), > > + INST(16, HF, HF, F, 1, 8, false, false), > > + INST(16, HF, HF, F, 1, 16, false, false), > > + > > + /* If destination is not packed (or not fp16) we can cross oword > > + * boundaries > > + */ > > + INST( 8, HF, HF, F, 2, 0, true, true), > > + INST( 8, F, HF, F, 1, 0, true, true), > > + > > +#undef INST > > + }; > > + > > + if (devinfo.gen < 8) > > + return; > > + > > + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { > > + brw_ADD(p, retype(g0, inst[i].dst_type), > > + retype(g0, inst[i].src0_type), > > + retype(g0, inst[i].src1_type)); > > + > > + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); > > + brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, > > inst[i].dst_subnr); > > + > > + brw_inst_set_src0_vstride(&devinfo, last_inst, > > BRW_VERTICAL_STRIDE_4); > > + brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); > > + brw_inst_set_src0_hstride(&devinfo, last_inst, > > BRW_HORIZONTAL_STRIDE_1); > > + > > + brw_inst_set_src1_vstride(&devinfo, last_inst, > > BRW_VERTICAL_STRIDE_4); > > + brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4); > > + brw_inst_set_src1_hstride(&devinfo, last_inst, > > BRW_HORIZONTAL_STRIDE_1); > > + > > + brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); > > + > > + if (devinfo.is_cherryview || devinfo.gen >= 9) > > + EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p)); > > + else > > + EXPECT_EQ(inst[i].expected_result_bdw, validate(p)); > > + > > + clear_instructions(p); > > + } > > +} > > + > > +TEST_P(validation_test, mixed_float_align16_packed_data) > > +{ > > + static const struct { > > + enum brw_reg_type dst_type; > > + enum brw_reg_type src0_type; > > + enum brw_reg_type src1_type; > > + unsigned src0_vstride; > > + unsigned src1_vstride; > > + bool expected_result; > > + } inst[] = { > > +#define INST(dst_type, src0_type, src1_type, \ > > + src0_vstride, src1_vstride, expected_result) \ > > + { \ > > + BRW_REGISTER_TYPE_##dst_type, \ > > + BRW_REGISTER_TYPE_##src0_type, \ > > + BRW_REGISTER_TYPE_##src1_type, \ > > + BRW_VERTICAL_STRIDE_##src0_vstride, \ > > + BRW_VERTICAL_STRIDE_##src1_vstride, \ > > + expected_result, \ > > + } > > + > > + /* We only test with F destination because there is a restriction > > + * by which F->HF conversions need to be DWord aligned but Align16 > > also > > + * requires that destination horizontal stride is 1. > > + */ > > + INST(F, F, HF, 4, 4, true), > > + INST(F, F, HF, 2, 4, false), > > + INST(F, F, HF, 4, 2, false), > > + INST(F, F, HF, 0, 4, false), > > + INST(F, F, HF, 4, 0, false), > > + INST(F, HF, F, 4, 4, true), > > + INST(F, HF, F, 4, 2, false), > > + INST(F, HF, F, 2, 4, false), > > + INST(F, HF, F, 0, 4, false), > > + INST(F, HF, F, 4, 0, false), > > + > > +#undef INST > > + }; > > + > > + if (devinfo.gen < 8 || devinfo.gen >= 11) > > + return; > > + > > + brw_set_default_access_mode(p, BRW_ALIGN_16); > > + > > + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { > > + brw_ADD(p, retype(g0, inst[i].dst_type), > > + retype(g0, inst[i].src0_type), > > + retype(g0, inst[i].src1_type)); > > + > > + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride); > > + brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride); > > + > > + EXPECT_EQ(inst[i].expected_result, validate(p)); > > + > > + clear_instructions(p); > > + } > > +} > > + > > +TEST_P(validation_test, mixed_float_align16_no_simd16) > > +{ > > + static const struct { > > + unsigned exec_size; > > + enum brw_reg_type dst_type; > > + enum brw_reg_type src0_type; > > + enum brw_reg_type src1_type; > > + bool expected_result; > > + } inst[] = { > > +#define INST(exec_size, dst_type, src0_type, src1_type, expected_result) \ > > + { \ > > + BRW_EXECUTE_##exec_size, \ > > + BRW_REGISTER_TYPE_##dst_type, \ > > + BRW_REGISTER_TYPE_##src0_type, \ > > + BRW_REGISTER_TYPE_##src1_type, \ > > + expected_result, \ > > + } > > + > > + /* We only test with F destination because there is a restriction > > + * by which F->HF conversions need to be DWord aligned but Align16 > > also > > + * requires that destination horizontal stride is 1. > > + */ > > + INST( 8, F, F, HF, true), > > + INST( 8, F, HF, F, true), > > + INST( 8, F, F, HF, true), > > + INST(16, F, F, HF, false), > > + INST(16, F, HF, F, false), > > + INST(16, F, F, HF, false), > > + > > +#undef INST > > + }; > > + > > + if (devinfo.gen < 8 || devinfo.gen >= 11) > > + return; > > + > > + brw_set_default_access_mode(p, BRW_ALIGN_16); > > + > > + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { > > + brw_ADD(p, retype(g0, inst[i].dst_type), > > + retype(g0, inst[i].src0_type), > > + retype(g0, inst[i].src1_type)); > > + > > + brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); > > + > > + brw_inst_set_src0_vstride(&devinfo, last_inst, > > BRW_VERTICAL_STRIDE_4); > > + brw_inst_set_src1_vstride(&devinfo, last_inst, > > BRW_VERTICAL_STRIDE_4); > > + > > + EXPECT_EQ(inst[i].expected_result, validate(p)); > > + > > + clear_instructions(p); > > + } > > +} > > + > > +TEST_P(validation_test, mixed_float_align16_no_acc_read) > > +{ > > + static const struct { > > + enum brw_reg_type dst_type; > > + enum brw_reg_type src0_type; > > + enum brw_reg_type src1_type; > > + bool read_acc; > > + bool expected_result; > > + } inst[] = { > > +#define INST(dst_type, src0_type, src1_type, read_acc, expected_result) \ > > + { \ > > + BRW_REGISTER_TYPE_##dst_type, \ > > + BRW_REGISTER_TYPE_##src0_type, \ > > + BRW_REGISTER_TYPE_##src1_type, \ > > + read_acc, \ > > + expected_result, \ > > + } > > + > > + /* We only test with F destination because there is a restriction > > + * by which F->HF conversions need to be DWord aligned but Align16 > > also > > + * requires that destination horizontal stride is 1. > > + */ > > + INST( F, F, HF, false, true), > > + INST( F, F, HF, true, false), > > + INST( F, HF, F, false, true), > > + INST( F, HF, F, true, false), > > + > > +#undef INST > > + }; > > + > > + if (devinfo.gen < 8 || devinfo.gen >= 11) > > + return; > > + > > + brw_set_default_access_mode(p, BRW_ALIGN_16); > > + > > + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { > > + brw_ADD(p, retype(g0, inst[i].dst_type), > > + retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type), > > + retype(g0, inst[i].src1_type)); > > + > > + brw_inst_set_src0_vstride(&devinfo, last_inst, > > BRW_VERTICAL_STRIDE_4); > > + brw_inst_set_src1_vstride(&devinfo, last_inst, > > BRW_VERTICAL_STRIDE_4); > > + > > + EXPECT_EQ(inst[i].expected_result, validate(p)); > > + > > + clear_instructions(p); > > + } > > +} > > + > > +TEST_P(validation_test, mixed_float_align16_math_packed_format) > > +{ > > + static const struct { > > + enum brw_reg_type dst_type; > > + enum brw_reg_type src0_type; > > + enum brw_reg_type src1_type; > > + unsigned src0_vstride; > > + unsigned src1_vstride; > > + bool expected_result; > > + } inst[] = { > > +#define INST(dst_type, src0_type, src1_type, \ > > + src0_vstride, src1_vstride, expected_result) \ > > + { \ > > + BRW_REGISTER_TYPE_##dst_type, \ > > + BRW_REGISTER_TYPE_##src0_type, \ > > + BRW_REGISTER_TYPE_##src1_type, \ > > + BRW_VERTICAL_STRIDE_##src0_vstride, \ > > + BRW_VERTICAL_STRIDE_##src1_vstride, \ > > + expected_result, \ > > + } > > + > > + /* We only test with F destination because there is a restriction > > + * by which F->HF conversions need to be DWord aligned but Align16 > > also > > + * requires that destination horizontal stride is 1. > > + */ > > + INST( F, HF, F, 4, 0, false), > > + INST( F, HF, HF, 4, 4, true), > > + INST( F, F, HF, 4, 0, false), > > + INST( F, F, HF, 2, 4, false), > > + INST( F, F, HF, 4, 2, false), > > + INST( F, HF, HF, 0, 4, false), > > + > > +#undef INST > > + }; > > + > > + /* Align16 Math for mixed float mode is not supported in gen8 */ > > + if (devinfo.gen < 9 || devinfo.gen >= 11) > > + return; > > + > > + brw_set_default_access_mode(p, BRW_ALIGN_16); > > + > > + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { > > + gen6_math(p, retype(g0, inst[i].dst_type), > > + BRW_MATH_FUNCTION_POW, > > + retype(g0, inst[i].src0_type), > > + retype(g0, inst[i].src1_type)); > > + > > + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride); > > + brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride); > > + > > + EXPECT_EQ(inst[i].expected_result, validate(p)); > > + > > + clear_instructions(p); > > + } > > +} > > + > > TEST_P(validation_test, vector_immediate_destination_alignment) > > { > > static const struct { > > -- > > 2.20.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev