On SKL+ the linear source buffer has to start from cache line boundary
to meet the 2d engine source copy requirements.

Signed-off-by: Guang Bai <guang....@intel.com>
---
 src/sna/sna_io.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index d32bd58..5bfbdbb 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -1064,7 +1064,7 @@ tile:
        if (kgem->gen >= 0100) {
                cmd |= 8;
                do {
-                       int nbox_this_time, rem;
+                       int nbox_this_time, rem, pitch_aligned;
 
                        nbox_this_time = nbox;
                        rem = kgem_batch_space(kgem);
@@ -1077,12 +1077,16 @@ tile:
 
                        /* Count the total number of bytes to be read and 
allocate a
                         * single buffer large enough. Or if it is very small, 
combine
-                        * with other allocations. */
+                        * with other allocations. Each sub-buffer starting 
point has
+                        * to be aligned to 64 bytes to conform latest hardware 
requirments.
+                        * Align the pitch of each sub-buffer to 64 bytes for 
simplicities.
+                        */
                        offset = 0;
                        for (n = 0; n < nbox_this_time; n++) {
                                int height = box[n].y2 - box[n].y1;
                                int width = box[n].x2 - box[n].x1;
-                               offset += PITCH(width, 
dst->drawable.bitsPerPixel >> 3) * height;
+                               pitch_aligned = ALIGN(PITCH(width, 
dst->drawable.bitsPerPixel >> 3), 64);
+                               offset += pitch_aligned * height;
                        }
 
                        src_bo = kgem_create_buffer(kgem, offset,
@@ -1113,9 +1117,10 @@ tile:
                                        assert(box->x1 + dst_dx >= 0);
                                        assert(box->y1 + dst_dy >= 0);
 
+                                       pitch_aligned = ALIGN(pitch, 64);
                                        memcpy_blt(src, (char *)ptr + offset,
                                                   dst->drawable.bitsPerPixel,
-                                                  stride, pitch,
+                                                  stride, pitch_aligned,
                                                   box->x1 + src_dx, box->y1 + 
src_dy,
                                                   0, 0,
                                                   width, height);
@@ -1133,7 +1138,7 @@ tile:
                                                                 
KGEM_RELOC_FENCED,
                                                                 0);
                                        b[6] = 0;
-                                       b[7] = pitch;
+                                       b[7] = pitch_aligned;
                                        *(uint64_t *)(b+8) =
                                                kgem_add_reloc64(kgem, 
kgem->nbatch + 8, src_bo,
                                                                 
I915_GEM_DOMAIN_RENDER << 16 |
@@ -1142,7 +1147,7 @@ tile:
                                        kgem->nbatch += 10;
 
                                        box++;
-                                       offset += pitch * height;
+                                       offset += pitch_aligned * height;
                                } while (--nbox_this_time);
                                assert(offset == __kgem_buffer_size(src_bo));
                                sigtrap_put();
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to