[Mesa-dev] [PATCH] st/mesa: add st fp64 support (v7)
From: Dave Airlie v2 : add double to int/unsigned conversion v3: handle fp64 consts better v4: use DRSQ v4.1: add d2b v4.2: drop DDIV v5: split out some prep patches. v5.1: add some comments. v5.2: more comments v6: simplify down the double instruction generation loop. v7: Merge Ilia's two cleanup patches. Signed-off-by: Dave Airlie --- src/mesa/state_tracker/st_extensions.c | 6 + src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 578 ++--- 2 files changed, 458 insertions(+), 126 deletions(-) diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 9137a50..ce29d07 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -900,4 +900,10 @@ void st_init_extensions(struct pipe_screen *screen, PIPE_VIDEO_CAP_SUPPORTS_INTERLACED)) { extensions->NV_vdpau_interop = GL_TRUE; } + + if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX, +PIPE_SHADER_CAP_DOUBLES) && + screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, +PIPE_SHADER_CAP_DOUBLES)) + extensions->ARB_gpu_shader_fp64 = GL_TRUE; } diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 56502fb..003d280 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -229,7 +229,7 @@ public: DECLARE_RALLOC_CXX_OPERATORS(glsl_to_tgsi_instruction) unsigned op; - st_dst_reg dst[1]; + st_dst_reg dst[2]; st_src_reg src[4]; /** Pointer to the ir source this tree came from for debugging */ ir_instruction *ir; @@ -262,16 +262,17 @@ public: class immediate_storage : public exec_node { public: - immediate_storage(gl_constant_value *values, int size, int type) + immediate_storage(gl_constant_value *values, int size32, int type) { - memcpy(this->values, values, size * sizeof(gl_constant_value)); - this->size = size; + memcpy(this->values, values, size32 * sizeof(gl_constant_value)); + this->size32 = size32; this->type = type; } + /* doubles are stored across 2 gl_constant_values */ gl_constant_value values[4]; - int size; /**< Number of components (1-4) */ - int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ + int size32; /**< Number of 32-bit components (1-4) */ + int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ }; class function_entry : public exec_node { @@ -334,7 +335,7 @@ public: variable_storage *find_variable_storage(ir_variable *var); - int add_constant(gl_register_file file, gl_constant_value values[4], + int add_constant(gl_register_file file, gl_constant_value values[8], int size, int datatype, GLuint *swizzle_out); function_entry *get_function_signature(ir_function_signature *sig); @@ -342,6 +343,7 @@ public: st_src_reg get_temp(const glsl_type *type); void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); + st_src_reg st_src_reg_for_double(double val); st_src_reg st_src_reg_for_float(float val); st_src_reg st_src_reg_for_int(int val); st_src_reg st_src_reg_for_type(int type, int val); @@ -397,6 +399,10 @@ public: st_dst_reg dst, st_src_reg src0); glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_dst_reg dst1, + st_src_reg src0); + + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, @@ -408,6 +414,11 @@ public: st_src_reg src0, st_src_reg src1, st_src_reg src2, st_src_reg src3); + glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, + st_dst_reg dst, st_dst_reg dst1, + st_src_reg src0, st_src_reg src1, + st_src_reg src2, st_src_reg src3); + unsigned get_opcode(ir_instruction *ir, unsigned op, st_dst_reg dst, st_src_reg src0, st_src_reg src1); @@ -432,6 +443,7 @@ public: void emit_scs(ir_instruction *ir, unsigned op, st_dst_reg dst, const st_src_reg &src); + bool try_emit_mad(ir_expression *ir, int mul_operand); bool try_emit_mad_for_and_not(ir_expression *ir, @@ -451,6 +463,8 @@ public: void copy_propagate(void); int eliminate_dead_code(void); + + void merge_two_dsts(void); void merge_registers(void); void renumber_registers(void); @@ -464,7 +478,6 @@ public: static st_src_reg undef_src = st_src_reg(PROGRA
Re: [Mesa-dev] [PATCH] st/mesa: add st fp64 support (v7)
On Thu, Feb 19, 2015 at 6:09 PM, Dave Airlie wrote: > From: Dave Airlie > > v2 : add double to int/unsigned conversion > v3: handle fp64 consts better > v4: use DRSQ > v4.1: add d2b > v4.2: drop DDIV > > v5: split out some prep patches. > v5.1: add some comments. > v5.2: more comments > > v6: simplify down the double instruction > generation loop. > > v7: Merge Ilia's two cleanup patches. > > Signed-off-by: Dave Airlie > --- > src/mesa/state_tracker/st_extensions.c | 6 + > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 578 > ++--- > 2 files changed, 458 insertions(+), 126 deletions(-) > > diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > index 56502fb..003d280 100644 > --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > @@ -464,7 +478,6 @@ public: > static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, > GLSL_TYPE_ERROR); > > static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, > GLSL_TYPE_ERROR); > - > static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, > GLSL_TYPE_FLOAT, 0); > static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, > GLSL_TYPE_FLOAT, 1); > static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, > GLSL_TYPE_FLOAT, 2); Drop this hunk. > @@ -597,22 +616,129 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, > unsigned op, > > this->instructions.push_tail(inst); > > + /* > +* This section contains the double processing. > +* GLSL just represents doubles as single channel values, > +* however most HW and TGSI represent doubles as pairs of register > channels. > +* > +* so we have to fixup destination writemask/index and src > swizzle/indexes. > +* dest writemasks need to translate from single channel write mask > +* to a dual-channel writemask, but also need to modify the index, > +* if we are touching the Z,W fields in the pre-translated writemask. > +* > +* src channels have similiar index modifications along with swizzle > +* changes to we pick the XY, ZW pairs from the correct index. > +* > +* GLSL [0].x -> TGSI [0].xy > +* GLSL [0].y -> TGSI [0].zw > +* GLSL [0].z -> TGSI [1].xy > +* GLSL [0].w -> TGSI [1].zw > +*/ > + if (inst->dst[0].type == GLSL_TYPE_DOUBLE || inst->dst[1].type == > GLSL_TYPE_DOUBLE || > + inst->src[0].type == GLSL_TYPE_DOUBLE) { > + glsl_to_tgsi_instruction *dinst = NULL; > + int initial_src_swz[4], initial_src_idx[4]; > + int initial_dst_idx[2], initial_dst_writemask[2]; > + /* select the writemask for dst0 or dst1 */ > + unsigned writemask = inst->dst[0].file == PROGRAM_UNDEFINED ? > inst->dst[1].writemask : inst->dst[0].writemask; > + > + /* copy out the writemask, index and swizzles for all src/dsts. */ > + for (j = 0; j < 2; j++) { > + initial_dst_writemask[j] = inst->dst[j].writemask; > + initial_dst_idx[j] = inst->dst[j].index; > + } > + > + for (j = 0; j < 4; j++) { > + initial_src_swz[j] = inst->src[j].swizzle; > + initial_src_idx[j] = inst->src[j].index; > + } > + > + /* > + * scan all the components in the dst writemask > + * generate an instruction for each of them if required. > + */ > + while (writemask) { > + > + int i = u_bit_scan(&writemask); > + > + /* first time use previous instruction */ > + if (dinst == NULL) { > +dinst = inst; > + } else { > +/* create a new instructions for subsequent attempts */ > +dinst = new(mem_ctx) glsl_to_tgsi_instruction(); > +*dinst = *inst; > +dinst->next = NULL; > +dinst->prev = NULL; > +this->instructions.push_tail(dinst); > + } > + > + /* modify the destination if we are splitting */ > + for (j = 0; j < 2; j++) { > +if (dinst->dst[j].type == GLSL_TYPE_DOUBLE) { > + dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : > WRITEMASK_XY; > + dinst->dst[j].index = initial_dst_idx[j]; > + if (i > 1) > + dinst->dst[j].index++; > +} else { > + /* if we aren't writing to a double, just get the bit of the > initial writemask > + for this channel */ > + dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i); > +} > + } > + > + /* modify the src registers */ > + for (j = 0; j < 4; j++) { > +int swz = GET_SWZ(initial_src_swz[j], i); > + > +if (dinst->src[j].type == GLSL_TYPE_DOUBLE) { > + dinst->src[j].index = initial_src_idx[j]; > + if (swz > 1) > + dinst->src[j].index++; > + > + if (swz & 1) > +