>> + /* Copy and then modify blit ctx for use in a chunked blit */
>> + chunk = ctx;
>> + chunk.src_bits = pix_buf;
>> + chunk.src.y = 0;
>> + chunk.src_stride = ATI_HOST_DATA_ACC_BITS * bypp;
>> +
>> + /* Blit one scanline chunk at a time */
>> + row = s->host_data.row;
>> + col = s->host_data.col;
>> + idx = 0;
>> + DPRINTF("blt %dpx @ row: %d, col: %d\n", pix_count, row, col);
>> + while (idx < pix_count && row < ctx.dst.height) {
>> + unsigned pix_in_scanline = MIN(pix_count - idx,
>> + ctx.dst.width - col);
>> + chunk.src.x = idx;
>> + /* Build a rect for this scanline chunk */
>> + chunk.dst.x = ctx.dst.x + col;
>> + chunk.dst.y = ctx.dst.y + row;
>> + chunk.dst.width = pix_in_scanline;
>> + chunk.dst.height = 1;
>> + DPRINTF("blt %dpx span @ row: %d, col: %d to dst (%d,%d)\n",
>> + pix_in_scanline, row, col, chunk.dst.x, chunk.dst.y);
>> + if (ati_2d_do_blt(&chunk, s->use_pixman)) {
>> + ati_set_dirty(&s->vga, &chunk);
>> + }
>> + idx += pix_in_scanline;
>> + col += pix_in_scanline;
>> + if (col >= ctx.dst.width) {
>> + col = 0;
>> + row += 1;
>> + }
>> + }
>
> This looks a bit complex. I wonder if this would be simpler as two
> nested loops over rows and then cols but I can live with this for now too.
>
I gave nested loops a shot on this but my attempt didn't turn out any simpler,
sadly. I think it may not fit well here in that we don't necessarily get full
rows and so we always have to be tracking the number of pixels written so that
we can end early when we run out of data. So the inner column loop ends up
having to do all of this bookeeping anyway.