Jerome Glisse wrote:

Okay i finaly came over a stupid bug (as all bugs are...).
Thus i commited the table to r300 and here is what look
like swizzle & modified emit_arithm (there is some debug
code to test swizzling)...

Note that i changed pfs_reg_t thus swizzling is done
in emit arith and note in t_src. This way we can have
multiple constant as arg for emit_arith and then swizzling
alloc & copy const for us (have to add 7 native case to
the table for that).
The reason I was doing swizzling in t_src is that some ARB_f_p opcodes aren't native on r300 and we need to emit multiple instuctions to emulate them (see LRP). If one of the sources used a non-native swizzle, we'd waste alu instructions re-doing the swizzle at each emit. A case where this may be very important is the SIN/COS instructions, a document in the Radeon SDK says that COS is 11 instructions..

Also, TEX sources can be swizzled. So putting swizzling/negation into t_src made sense
in my mind.

If you think that i remove on important field in
pfs_reg tell me. I am wondering if we can drop
the valid field ?
The most important thing missing is the v_cross/s_cross fields. These are used to say that the source swizzle depends on the result of the other instruction stream. ie. WZYW (v_cross=1), colour instruction depends on result of alpha instruction, XYZX (s_cross=1), alpha insn depends on result of colour instruction. WZYX (v_cross=1,
s_cross=1), both depend on opposite stream.

This allows for an extremely primitive form of instruction reordering so that we make use of the split xyz/w units, instead of leaving a whole load of NOPS when an ARB_f_p
instruction only writes xyz or w.

The valid field comes in useful occasionally when testing some things. The has_w field was only used by my swizzling code to say whether or not the W coord had to be copied over to the resulting swizzle, so you could probably drop that if you don't need it for
your code.

I haven't yet done indivual or global neg but as i said
i think that the best solution is to first swizzle and then
do a MAD t, -t, 1, 0 with appropriate write mask.

Anyway once Keith commited your patch and you
commited your change in r300, i will commit change
to use table with individual neg support...
Cool. I'll have a closer look at your code when I get home again in 12 or so hours.

Cheers,
Ben Skeggs.

Jerome Glisse

typedef struct _pfs_reg_t {
        enum {
                REG_TYPE_INPUT,
                REG_TYPE_OUTPUT,
                REG_TYPE_TEMP,
                REG_TYPE_CONST
        } type:2;
        GLuint index:6;
        GLuint xyzw:12;
        GLuint negate:4;
        GLboolean has_w:1;
        GLboolean valid:1;
} pfs_reg_t;


GLuint swizzle( struct r300_fragment_program *rp,
                         pfs_reg_t swz_src )
{
        GLuint src[3] = { 0, 0, 0 };
        GLuint inst[4] = { 0, 0, 0, 0 };
        GLuint i, xyz, w, j;
        pfs_reg_t tmp;

        switch (swz_src.type) {
        case REG_TYPE_INPUT:
                src[0] = rp->inputs[swz_src.index];
                break;
        case REG_TYPE_TEMP:
                src[0] = rp->temps[swz_src.index];
                src[0] = swz_src.index;
                rp->used_in_node |= (1 << src[0]);
                break;
        case REG_TYPE_CONST:
                src[0] = swz_src.index;
                break;
        default:
                ERROR("invalid source reg\n");
                return 0;
        }

        /* Allocate temp reg for swizzling */
        tmp = get_temp_reg(rp);
        src[1] = tmp.index;

        xyz = swz_src.xyzw & 511;
        w = (swz_src.xyzw >> 9) & 7;


printf("w  : %d\n",w);
        inst[2] = r300_swz_srca_mask[0][w] |
                (R300_FPI2_ARGA_ONE  << R300_FPI2_ARG1A_SHIFT) |
                (R300_FPI2_ARGA_ZERO << R300_FPI2_ARG2A_SHIFT) |
                R300_FPI0_OUTC_MAD;
        inst[3] = src[0] |
                R300_FPI3_SRC1A_CONST |
                R300_FPI3_SRC2A_CONST |
                (src[1] << R300_FPI3_DSTA_SHIFT);
        inst[3] |= R300_FPI3_DSTA_REG;

        for (i = 0; i < r300_swizzle[xyz].length; i++) {
                inst[0]  = r300_swizzle[xyz].inst[(i << 1)];
                inst[1]  = r300_swizzle[xyz].inst[(i << 1) + 1];
                inst[1] |= src[r300_swizzle[xyz].src[i]];
                inst[1] |= src[1] << R300_FPI1_DSTC_SHIFT;

                rp->alu.inst[rp->v_pos].inst0 = inst[0];
                rp->alu.inst[rp->v_pos].inst1 = inst[1];
                rp->alu.inst[rp->s_pos].inst2 = inst[2];
                rp->alu.inst[rp->s_pos].inst3 = inst[3];
                rp->v_pos += 1;
                rp->s_pos += 1;

                j = rp->v_pos > rp->s_pos ? rp->v_pos : rp->s_pos;
                if (j > rp->alu.length) {
                        rp->alu.length++;
                        rp->node[rp->cur_node].alu_end++;
                }
        }

        return src[1];
}



static void emit_arith( struct r300_fragment_program *rp,
                        int op,
                        pfs_reg_t dest,
                        int mask,
                        pfs_reg_t src0,
                        pfs_reg_t src1,
                        pfs_reg_t src2,
                        int flags )
{
        pfs_reg_t src[3] = { src0, src1, src2 };
        int hwdest, hwsrc[3];
        int argc;
        int v_idx = rp->v_pos, s_idx = rp->s_pos;
        GLuint inst[4] = { 0, 0, 0, 0 };
        GLuint srcc_mask, srca_mask;
        int i;

        pfs_reg_t tt_reg = get_temp_reg(rp);
        GLuint tt_id = tt_reg.index;

        /* check opcode */
        if (op > MAX_PFS_OP) {
                ERROR("unknown opcode!\n");
                return;
        }
        argc = r300_fpop[op].argc;

        /* grab hwregs of sources */
        for (i=0;i<argc;i++) {
                switch (src[i].type) {
                case REG_TYPE_INPUT:
                        hwsrc[i] = rp->inputs[src[i].index];
                        break;
                case REG_TYPE_TEMP:
                        hwsrc[i] = rp->temps[src[i].index];
                        rp->used_in_node |= (1 << hwsrc[i]);
                        break;
                case REG_TYPE_CONST:
                        hwsrc[i] = src[i].index;
                        break;
                default:
                        ERROR("invalid source reg\n");
                        return;
                }
        }
        
        /* grab hwregs of dest */
        switch (dest.type) {
        case REG_TYPE_TEMP:
                hwdest = rp->temps[dest.index];
                rp->used_in_node |= (1 << hwdest);
                break;
        case REG_TYPE_OUTPUT:
                hwdest = 0;
                break;
        default:
                ERROR("invalid dest reg type %d\n", dest.type);
                return;
        }

        for (i=0;i<3;i++) {
                if (i < argc) {

#define GET_XYZ(u)      ((u) & 511)
#define GET_W(u)        (((u) >> 9) & 7)

                        if (0) {
                                printf("------------------------------\n");
                                printf("zero  a %d %d %d\n",
                                       i,
                                       GET_XYZ(pfs_zero.xyzw),
                                       GET_W(pfs_zero.xyzw));
                                printf("one   a %d %d %d\n",
                                       i,
                                       GET_XYZ(pfs_one.xyzw),
                                       GET_W(pfs_one.xyzw));
                                printf("arith a %d %d %d\n",
                                       i,
                                       GET_XYZ(src[i].xyzw),
                                       GET_W(src[i].xyzw));
                        }
                        srcc_mask=r300_swz_srcc_mask[i][GET_XYZ(src[i].xyzw)];
                        srca_mask=r300_swz_srca_mask[i][GET_W(src[i].xyzw)];

                        if (srcc_mask & 32) {
                                /* swizzle */
                                hwsrc[i] = swizzle(rp, src[i]);
                                inst[0] |= r300_swz_srcc_mask[i][136] << (i*7);
                                inst[2] |= r300_swz_srca_mask[i][3]   << (i*7);
                        } else {
                                /* native format lucky :) */
                                inst[0] |= srcc_mask << (i*7);
                                inst[2] |= srca_mask << (i*7);
                                if (src[i].type == REG_TYPE_CONST) {
                                        inst[1] |= (1<<5) << (i*6);
                                        inst[3] |= (1<<5) << (i*6);
                                }
                        }

                        inst[1] |= hwsrc[i] << (i*6);
                        inst[3] |= hwsrc[i] << (i*6);
                } else {
                        /* read constant zero, may aswell use a ZERO swizzle
                           aswell.. */
                        inst[0] |= R300_FPI0_ARGC_ZERO << (i*7);
                        inst[2] |= R300_FPI2_ARGA_ZERO << (i*7);
                        inst[1] |= (1<<5) << (i*6);
                        inst[2] |= (1<<5) << (i*6);
                }
        }
        if (mask & 7) {
                rp->alu.inst[v_idx].inst0 = inst[0] | r300_fpop[op].v_op |flags;
#if 1
                rp->alu.inst[v_idx].inst1 = inst[1] |
                        (tt_id << R300_FPI1_DSTC_SHIFT) |
                        ((mask & WRITEMASK_XYZ) << 23);
#else
                rp->alu.inst[v_idx].inst1 = inst[1] |
                        (hwdest << R300_FPI1_DSTC_SHIFT) |
                        ((mask & WRITEMASK_XYZ) << (dest.type == 
REG_TYPE_OUTPUT ? 26 : 23));
#endif
                rp->v_pos = v_idx + 1;
        }
        if (mask & 8) {
                rp->alu.inst[s_idx].inst2 = inst[2] | r300_fpop[op].s_op |flags;
#if 1
                rp->alu.inst[s_idx].inst3 = inst[3] |
                        (tt_id << R300_FPI3_DSTA_SHIFT) |
                        (1 << 23);
#else
                rp->alu.inst[s_idx].inst3 = inst[3] |
                        (hwdest << R300_FPI3_DSTA_SHIFT) |
                        (1 << (dest.type == REG_TYPE_OUTPUT ? 24 : 23));
#endif
                rp->s_pos = s_idx + 1;
        }


        i = rp->v_pos > rp->s_pos ? rp->v_pos : rp->s_pos;
        if (i > rp->alu.length) {
                rp->alu.length++;
                rp->node[rp->cur_node].alu_end++;
        }
#if 1

        tt_reg.xyzw = (SWIZZLE_Z) |
                (SWIZZLE_Y << 3)|
                (SWIZZLE_X << 6)|
                (SWIZZLE_W << 9);
        tt_id = swizzle(rp, tt_reg);
//       tt_id = 0;

        v_idx = rp->v_pos;
        s_idx = rp->s_pos;

        printf("reg : %d\n",tt_id);

        inst[0]  = r300_swz_srcc_mask[0][136] << (0*7);
        inst[2]  = r300_swz_srca_mask[0][3]   << (0*7);
        inst[0] |= r300_swz_srcc_mask[0][365] << (1*7);
        inst[2] |= r300_swz_srca_mask[0][5]   << (1*7);
        inst[0] |= r300_swz_srcc_mask[0][292] << (2*7);
        inst[2] |= r300_swz_srca_mask[0][4]   << (2*7);
        inst[1]  = tt_id;
        inst[3]  = tt_id;
        inst[1] |= (1<<5) << (1*6);
        inst[1] |= (1<<5) << (2*6);
        inst[3] |= (1<<5) << (1*6);
        inst[3] |= (1<<5) << (2*6);

        if (0) {
                inst[1] |= (1<<5);
                inst[3] |= (1<<5);
        }

        if (mask & 7) {
                inst[1] |= (hwdest << R300_FPI1_DSTC_SHIFT) |
                        ((mask & WRITEMASK_XYZ) << (dest.type == 
REG_TYPE_OUTPUT ? 26 : 23));
                rp->alu.inst[v_idx].inst0 = inst[0];
                rp->alu.inst[v_idx].inst1 = inst[1];
rp->v_pos = v_idx + 1; }
        if (mask & 8) {
                inst[3] |= (hwdest << R300_FPI3_DSTA_SHIFT) |
                        (1 << (dest.type == REG_TYPE_OUTPUT ? 24 : 23));

                rp->alu.inst[s_idx].inst2 = inst[2];
                rp->alu.inst[s_idx].inst3 = inst[3];
                rp->s_pos = s_idx + 1;
        }

        i = rp->v_pos > rp->s_pos ? rp->v_pos : rp->s_pos;
        if (i > rp->alu.length) {
                rp->alu.length++;
                rp->node[rp->cur_node].alu_end++;
        }


#endif
      return;
}


-------------------------------------------------------
This SF.Net email is sponsored by Oracle Space Sweepstakes
Want to be the first software developer in space?
Enter now for the Oracle Space Sweepstakes!
http://ads.osdn.com/?ad_idt12&alloc_id344&op=click
--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel





-------------------------------------------------------
This SF.Net email is sponsored by Oracle Space Sweepstakes
Want to be the first software developer in space?
Enter now for the Oracle Space Sweepstakes!
http://ads.osdn.com/?ad_id=7412&alloc_id=16344&op=click
--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

Reply via email to