[Mesa-dev] [PATCH v2 13/16] intel: tools: dump-gpu: dump 48-bit addresses
From: Scott D Phillips For gen8+, write out PPGTT tables in aub files so that full 48-bit addresses can be serialized. v2: Fix handling of `end` index in map_ppgtt Signed-off-by: Scott D Phillips Signed-off-by: Lionel Landwerlin Cc: Jordan Justen --- src/intel/tools/intel_aub.h | 3 +- src/intel/tools/intel_dump_gpu.c | 315 +++ 2 files changed, 151 insertions(+), 167 deletions(-) diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h index 9ca548edaf3..2888515048f 100644 --- a/src/intel/tools/intel_aub.h +++ b/src/intel/tools/intel_aub.h @@ -117,7 +117,8 @@ /* DW3 */ #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_MASK0xf000 -#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_LOCAL (1 << 28) +#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT(0 << 28) +#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL(2 << 28) #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY (4 << 28) /** diff --git a/src/intel/tools/intel_dump_gpu.c b/src/intel/tools/intel_dump_gpu.c index 86c133da433..bfff481ba5e 100644 --- a/src/intel/tools/intel_dump_gpu.c +++ b/src/intel/tools/intel_dump_gpu.c @@ -51,6 +51,8 @@ #define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1)) #define MI_LRI_FORCE_POSTED (1<<12) +#define MI_BATCH_NON_SECURE_I965 (1 << 8) + #define MI_BATCH_BUFFER_END (0xA << 23) #define min(a, b) ({\ @@ -59,6 +61,12 @@ _a < _b ? _a : _b; \ }) +#define max(a, b) ({\ + __typeof(a) _a = (a); \ + __typeof(b) _b = (b); \ + _a > _b ? _a : _b; \ + }) + #define HWS_PGA_RCSUNIT 0x02080 #define HWS_PGA_VCSUNIT0 0x12080 #define HWS_PGA_BCSUNIT 0x22080 @@ -93,8 +101,12 @@ #define RING_SIZE (1 * 4096) #define PPHWSP_SIZE (1 * 4096) -#define GEN10_LR_CONTEXT_RENDER_SIZE (19 * 4096) -#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * 4096) +#define GEN11_LR_CONTEXT_RENDER_SIZE(14 * 4096) +#define GEN10_LR_CONTEXT_RENDER_SIZE(19 * 4096) +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * 4096) +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * 4096) +#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * 4096) + #define STATIC_GGTT_MAP_START 0 @@ -110,14 +122,19 @@ #define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + GEN8_LR_CONTEXT_OTHER_SIZE) #define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START) -#define CONTEXT_FLAGS (0x229) /* Normal Priority | L3-LLC Coherency | - Legacy Context with no 64 bit VA support | Valid */ +#define PML4_PHYS_ADDR ((uint64_t)(STATIC_GGTT_MAP_END)) + +#define CONTEXT_FLAGS (0x339) /* Normal Priority | L3-LLC Coherency | + * PPGTT Enabled | + * Legacy Context with 64 bit VA support | + * Valid + */ -#define RENDER_CONTEXT_DESCRIPTOR ((uint64_t)1 << 32 | RENDER_CONTEXT_ADDR | CONTEXT_FLAGS) -#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 32 | BLITTER_CONTEXT_ADDR | CONTEXT_FLAGS) -#define VIDEO_CONTEXT_DESCRIPTOR ((uint64_t)3 << 32 | VIDEO_CONTEXT_ADDR | CONTEXT_FLAGS) +#define RENDER_CONTEXT_DESCRIPTOR ((uint64_t)1 << 62 | RENDER_CONTEXT_ADDR | CONTEXT_FLAGS) +#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 62 | BLITTER_CONTEXT_ADDR | CONTEXT_FLAGS) +#define VIDEO_CONTEXT_DESCRIPTOR ((uint64_t)3 << 62 | VIDEO_CONTEXT_ADDR | CONTEXT_FLAGS) -static const uint32_t render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE / +static const uint32_t render_context_init[GEN9_LR_CONTEXT_RENDER_SIZE / /* Choose the largest */ sizeof(uint32_t)] = { 0 /* MI_NOOP */, MI_LOAD_REGISTER_IMM_n(14) | MI_LRI_FORCE_POSTED, @@ -147,8 +164,8 @@ static const uint32_t render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE / 0x2280 /* PDP2_LDW */, 0, 0x227C /* PDP1_UDW */, 0, 0x2278 /* PDP1_LDW */, 0, - 0x2274 /* PDP0_UDW */, 0, - 0x2270 /* PDP0_LDW */, 0, + 0x2274 /* PDP0_UDW */, PML4_PHYS_ADDR >> 32, + 0x2270 /* PDP0_LDW */, PML4_PHYS_ADDR, /* MI_NOOP */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -185,8 +202,8 @@ static const uint32_t blitter_context_init[GEN8_LR_CONTEXT_OTHER_SIZE / 0x22280 /* PDP2_LDW */, 0, 0x2227C /* PDP1_UDW */, 0, 0x22278 /* PDP1_LDW */, 0, - 0x22274 /* PDP0_UDW */, 0, - 0x22270 /* PDP0_LDW */, 0, + 0x22274 /* PDP0_UDW */, PML4_PHYS_ADDR >> 32, + 0x22270 /* PDP0_LDW */, PML4_PHYS_ADDR, /* MI_NOOP */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -220,8 +237,8 @@ static const uint32_t video_context_init[GEN8_LR_CONTEXT_OTHER_SIZE / 0x1C280 /* PDP2_LDW */, 0, 0x1C27C /* PDP1_UDW */, 0, 0x1C
Re: [Mesa-dev] [PATCH v2 13/16] intel: tools: dump-gpu: dump 48-bit addresses
On Tue, Jun 19, 2018 at 02:45:28PM +0100, Lionel Landwerlin wrote: > From: Scott D Phillips > > For gen8+, write out PPGTT tables in aub files so that full 48-bit > addresses can be serialized. > > v2: Fix handling of `end` index in map_ppgtt > > Signed-off-by: Scott D Phillips > Signed-off-by: Lionel Landwerlin > Cc: Jordan Justen > --- > src/intel/tools/intel_aub.h | 3 +- > src/intel/tools/intel_dump_gpu.c | 315 +++ > 2 files changed, 151 insertions(+), 167 deletions(-) > > diff --git a/src/intel/tools/intel_aub.h b/src/intel/tools/intel_aub.h > index 9ca548edaf3..2888515048f 100644 > --- a/src/intel/tools/intel_aub.h > +++ b/src/intel/tools/intel_aub.h > @@ -117,7 +117,8 @@ > /* DW3 */ > > #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_MASK 0xf000 > -#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_LOCAL (1 << 28) > +#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT (0 << 28) > +#define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL (2 << 28) > #define AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY(4 << 28) > > /** > diff --git a/src/intel/tools/intel_dump_gpu.c > b/src/intel/tools/intel_dump_gpu.c > index 86c133da433..bfff481ba5e 100644 > --- a/src/intel/tools/intel_dump_gpu.c > +++ b/src/intel/tools/intel_dump_gpu.c > @@ -51,6 +51,8 @@ > #define MI_LOAD_REGISTER_IMM_n(n) ((0x22 << 23) | (2 * (n) - 1)) > #define MI_LRI_FORCE_POSTED (1<<12) > > +#define MI_BATCH_NON_SECURE_I965 (1 << 8) > + > #define MI_BATCH_BUFFER_END (0xA << 23) > > #define min(a, b) ({\ > @@ -59,6 +61,12 @@ > _a < _b ? _a : _b; \ >}) > > +#define max(a, b) ({\ > + __typeof(a) _a = (a); \ > + __typeof(b) _b = (b); \ > + _a > _b ? _a : _b; \ > + }) > + > #define HWS_PGA_RCSUNIT 0x02080 > #define HWS_PGA_VCSUNIT0 0x12080 > #define HWS_PGA_BCSUNIT 0x22080 > @@ -93,8 +101,12 @@ > > #define RING_SIZE (1 * 4096) > #define PPHWSP_SIZE (1 * 4096) > -#define GEN10_LR_CONTEXT_RENDER_SIZE (19 * 4096) > -#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * 4096) > +#define GEN11_LR_CONTEXT_RENDER_SIZE(14 * 4096) > +#define GEN10_LR_CONTEXT_RENDER_SIZE(19 * 4096) > +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * 4096) > +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * 4096) > +#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * 4096) > + > > #define STATIC_GGTT_MAP_START 0 > > @@ -110,14 +122,19 @@ > #define STATIC_GGTT_MAP_END (VIDEO_CONTEXT_ADDR + PPHWSP_SIZE + > GEN8_LR_CONTEXT_OTHER_SIZE) > #define STATIC_GGTT_MAP_SIZE (STATIC_GGTT_MAP_END - STATIC_GGTT_MAP_START) > > -#define CONTEXT_FLAGS (0x229) /* Normal Priority | L3-LLC Coherency | > - Legacy Context with no 64 bit VA support > | Valid */ > +#define PML4_PHYS_ADDR ((uint64_t)(STATIC_GGTT_MAP_END)) > + > +#define CONTEXT_FLAGS (0x339) /* Normal Priority | L3-LLC Coherency | > + * PPGTT Enabled | > + * Legacy Context with 64 bit VA support | > + * Valid > + */ > > -#define RENDER_CONTEXT_DESCRIPTOR ((uint64_t)1 << 32 | RENDER_CONTEXT_ADDR > | CONTEXT_FLAGS) > -#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 32 | BLITTER_CONTEXT_ADDR > | CONTEXT_FLAGS) > -#define VIDEO_CONTEXT_DESCRIPTOR ((uint64_t)3 << 32 | VIDEO_CONTEXT_ADDR > | CONTEXT_FLAGS) > +#define RENDER_CONTEXT_DESCRIPTOR ((uint64_t)1 << 62 | RENDER_CONTEXT_ADDR > | CONTEXT_FLAGS) > +#define BLITTER_CONTEXT_DESCRIPTOR ((uint64_t)2 << 62 | BLITTER_CONTEXT_ADDR > | CONTEXT_FLAGS) > +#define VIDEO_CONTEXT_DESCRIPTOR ((uint64_t)3 << 62 | VIDEO_CONTEXT_ADDR > | CONTEXT_FLAGS) > > -static const uint32_t render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE / > +static const uint32_t render_context_init[GEN9_LR_CONTEXT_RENDER_SIZE / /* > Choose the largest */ >sizeof(uint32_t)] = { > 0 /* MI_NOOP */, > MI_LOAD_REGISTER_IMM_n(14) | MI_LRI_FORCE_POSTED, > @@ -147,8 +164,8 @@ static const uint32_t > render_context_init[GEN10_LR_CONTEXT_RENDER_SIZE / > 0x2280 /* PDP2_LDW */, 0, > 0x227C /* PDP1_UDW */, 0, > 0x2278 /* PDP1_LDW */, 0, > - 0x2274 /* PDP0_UDW */, 0, > - 0x2270 /* PDP0_LDW */, 0, > + 0x2274 /* PDP0_UDW */, PML4_PHYS_ADDR >> 32, > + 0x2270 /* PDP0_LDW */, PML4_PHYS_ADDR, > /* MI_NOOP */ > 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, > > @@ -185,8 +202,8 @@ static const uint32_t > blitter_context_init[GEN8_LR_CONTEXT_OTHER_SIZE / > 0x22280 /* PDP2_LDW */, 0, > 0x2227C /* PDP1_UDW */, 0, > 0x22278 /* PDP1_LDW */, 0, > - 0x22274 /* PDP0_UDW */, 0, > - 0x22270 /* PDP0_LDW */, 0, > + 0x22274 /* PD