On SKL+ the linear source buffer has to start from cache line boundary
to meet the 2d engine source copy requirements. Apply this cache line
alignment policy for SKL+ only.

v2: Apply these changes only to SKL+ for not breaking old platforms
    based on Chris Wilson's reviews.

Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Signed-off-by: Guang Bai <guang....@intel.com>
---
 src/sna/sna_io.c | 47 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 12 deletions(-)

diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index d32bd58..ae82d1f 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -1064,7 +1064,7 @@ tile:
        if (kgem->gen >= 0100) {
                cmd |= 8;
                do {
-                       int nbox_this_time, rem;
+                       int nbox_this_time, rem, pitch_aligned;
 
                        nbox_this_time = nbox;
                        rem = kgem_batch_space(kgem);
@@ -1077,12 +1077,19 @@ tile:
 
                        /* Count the total number of bytes to be read and 
allocate a
                         * single buffer large enough. Or if it is very small, 
combine
-                        * with other allocations. */
+                        * with other allocations. Each sub-buffer starting 
point has
+                        * to be aligned to 64 bytes to conform SKL+ hardware 
requirments.
+                        * Align the pitch of each sub-buffer to 64 bytes for 
simplicities.
+                        */
                        offset = 0;
                        for (n = 0; n < nbox_this_time; n++) {
                                int height = box[n].y2 - box[n].y1;
                                int width = box[n].x2 - box[n].x1;
-                               offset += PITCH(width, 
dst->drawable.bitsPerPixel >> 3) * height;
+                               if (kgem->gen >= 0110) {
+                                       pitch_aligned = ALIGN(PITCH(width, 
dst->drawable.bitsPerPixel >> 3), 64);
+                                       offset += pitch_aligned * height;
+                               } else
+                                       offset += PITCH(width, 
dst->drawable.bitsPerPixel >> 3) * height;
                        }
 
                        src_bo = kgem_create_buffer(kgem, offset,
@@ -1113,14 +1120,24 @@ tile:
                                        assert(box->x1 + dst_dx >= 0);
                                        assert(box->y1 + dst_dy >= 0);
 
-                                       memcpy_blt(src, (char *)ptr + offset,
-                                                  dst->drawable.bitsPerPixel,
-                                                  stride, pitch,
-                                                  box->x1 + src_dx, box->y1 + 
src_dy,
-                                                  0, 0,
-                                                  width, height);
+                                       if (kgem->gen >= 0110) {
+                                               pitch_aligned = ALIGN(pitch, 
64);
+                                               memcpy_blt(src, (char *)ptr + 
offset,
+                                                          
dst->drawable.bitsPerPixel,
+                                                          stride, 
pitch_aligned,
+                                                          box->x1 + src_dx, 
box->y1 + src_dy,
+                                                          0, 0,
+                                                          width, height);
+                                       } else
+                                               memcpy_blt(src, (char *)ptr + 
offset,
+                                                          
dst->drawable.bitsPerPixel,
+                                                          stride, pitch,
+                                                          box->x1 + src_dx, 
box->y1 + src_dy,
+                                                          0, 0,
+                                                          width, height);
 
                                        assert(kgem->mode == KGEM_BLT);
+
                                        b = kgem->batch + kgem->nbatch;
                                        b[0] = cmd;
                                        b[1] = br13;
@@ -1133,16 +1150,22 @@ tile:
                                                                 
KGEM_RELOC_FENCED,
                                                                 0);
                                        b[6] = 0;
-                                       b[7] = pitch;
+                                       if (kgem->gen >= 0110)
+                                               b[7] = pitch_aligned;
+                                       else
+                                               b[7] = pitch;
+
                                        *(uint64_t *)(b+8) =
                                                kgem_add_reloc64(kgem, 
kgem->nbatch + 8, src_bo,
                                                                 
I915_GEM_DOMAIN_RENDER << 16 |
                                                                 
KGEM_RELOC_FENCED,
                                                                 offset);
                                        kgem->nbatch += 10;
-
                                        box++;
-                                       offset += pitch * height;
+                                       if (kgem->gen >= 0110)
+                                               offset += pitch_aligned * 
height;
+                                       else
+                                               offset += pitch * height;
                                } while (--nbox_this_time);
                                assert(offset == __kgem_buffer_size(src_bo));
                                sigtrap_put();
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to