Mark Wielaard wrote on 04.08.2015 00:17:
> On Mon, Aug 03, 2015 at 10:34:19PM +0200, Kai Wasserbäch wrote:
>>> Could you point me to the source code that does the libelf calls to create
>>> the ELF file? Maybe reading the source helps to figure out what might go
>>> wrong. The stacktrace from the test doesn't immediately seem to give a
>>> direct clue.
>>
>> I think all the ELF stuff is encapsulated in
>> <http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/radeon/radeon_elf_util.c>
>> (and the header for that). The functions defined therein are called from
>> <http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/radeonsi/si_shader.c>
>> and
>> <http://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/radeonsi/si_compute.c>
>> if I haven't missed something. Michel can probably spot any mistakes in this,
>> therefore I CCed him on this message.
>>
>> Let me know, if you need something else.
> 
> Thanks that was really helpful. It looks like the real problem is the
> parsing of the relocation section. Would it be possible for you to dump
> the ELF image that is being parsed in radeon/radeon_elf_util.c
> (radeon_elf_read) Maybe just by adding the following just before the
> elf_memory () call:
>   int dfd = creat ("/tmp/dump.elf", 00755);
>   write (dfd, elf_buffer, elf_size);
>   close (dfd);

I guarded this with a environment variable and replaced creat(), which is
deprecated AFAIR, with open().

Then I ran the test. Instead of just running through it didn't exit by itself.
After a few minutes I killed it (size of dump.elf didn't change).

In addition to the ELF dump, I added a shader dump, which radeonsi can produce.

Cheers,
Kai

Attachment: dump.elf
Description: Binary data

SHADER KEY
  instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
  as_es = 0
  as_es = 0
VERT
DCL IN[0]
DCL OUT[0], POSITION
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] 
addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] 
addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 
inreg, i32, i32, i32, i32) #0 {
main_body:
  %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 
0, i64 0
  %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
  %13 = add i32 %5, %7
  %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13)
  %15 = extractelement <4 x float> %14, i32 0
  %16 = extractelement <4 x float> %14, i32 1
  %17 = extractelement <4 x float> %14, i32 2
  %18 = extractelement <4 x float> %14, i32 3
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, 
float %16, float %17, float %18)
  ret void
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, 
float)

attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }

!0 = !{!"const", null, i32 1}

Shader Disassembly:

        s_load_dwordx4 s[0:3], s[8:9], 0x0                  ; C0800900
        v_add_i32_e32 v0, s10, v0                           ; 4A00000A
        s_waitcnt lgkmcnt(0)                                ; BF8C007F
        buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000
        s_waitcnt vmcnt(0)                                  ; BF8C0770
        exp 15, 12, 0, 1, 0, v0, v1, v2, v3                 ; F80008CF 03020100
        s_endpgm                                            ; BF810000

*** SHADER STATS ***
SGPRS: 16
VGPRS: 4
Code Size: 36 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
  export_16bpc = 0x3
  last_cbuf = 0
  color_two_side = 0
  alpha_func = 7
  alpha_to_one = 0
  poly_stipple = 0
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL OUT[0], COLOR
DCL CONST[0..3]
DCL TEMP[0..4], ARRAY(1), LOCAL
DCL TEMP[5..6], LOCAL
DCL ADDR[0]
IMM[0] FLT64 {0.00000000, 0.25000000}
IMM[1] FLT64 {0.50000000, 0.75000000}
IMM[2] FLT32 {    0.0000,     1.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].xy, IMM[0].xyxy
  1: MOV TEMP[1].xy, IMM[0].zwzw
  2: MOV TEMP[2].xy, IMM[1].xyxy
  3: MOV TEMP[3].xy, IMM[1].zwzw
  4: UARL ADDR[0].x, CONST[3].xxxx
  5: DADD TEMP[5].xy, TEMP[ADDR[0].x](1).xyxy, CONST[0].xyxy
  6: DNEG TEMP[6].xy, CONST[2].xyxy
  7: DADD TEMP[5].xy, TEMP[5].xyxy, TEMP[6].xyxy
  8: DABS TEMP[5].xy, TEMP[5].xyxy
  9: DSGE TEMP[5].x, CONST[1].xyxy, TEMP[5].xyxy
 10: UIF TEMP[5].xxxx :0
 11:   MOV TEMP[5], IMM[2].xyxy
 12: ELSE :0
 13:   MOV TEMP[5], IMM[2].yxxy
 14: ENDIF
 15: MOV OUT[0], TEMP[5]
 16: END
; ModuleID = 'tgsi'

define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] 
addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] 
addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, 
<3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, 
float, i32, float, float) #0 {
main_body:
  %22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 
0, i64 0
  %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
  %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
  %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
  %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
  %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
  %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
  %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
  %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
  %31 = bitcast float %30 to i32
  %32 = extractelement <5 x double> <double 0.000000e+00, double bitcast (<2 x 
i32> <i32 0, i32 1070596096> to double), double bitcast (<2 x i32> <i32 0, i32 
1071644672> to double), double bitcast (<2 x i32> <i32 0, i32 1072168960> to 
double), double 0.000000e+00>, i32 %31
  %33 = bitcast float %24 to i32
  %34 = insertelement <2 x i32> undef, i32 %33, i32 0
  %35 = bitcast float %25 to i32
  %36 = insertelement <2 x i32> %34, i32 %35, i32 1
  %37 = bitcast <2 x i32> %36 to double
  %38 = fadd double %32, %37
  %39 = bitcast float %28 to i32
  %40 = insertelement <2 x i32> undef, i32 %39, i32 0
  %41 = bitcast float %29 to i32
  %42 = insertelement <2 x i32> %40, i32 %41, i32 1
  %43 = bitcast <2 x i32> %42 to double
  %44 = fsub double %38, %43
  %45 = call double @fabs(double %44)
  %46 = bitcast float %26 to i32
  %47 = insertelement <2 x i32> undef, i32 %46, i32 0
  %48 = bitcast float %27 to i32
  %49 = insertelement <2 x i32> %47, i32 %48, i32 1
  %50 = bitcast <2 x i32> %49 to double
  %51 = fcmp oge double %50, %45
  %. = select i1 %51, float 1.000000e+00, float 0.000000e+00
  %.28 = select i1 %51, float 0.000000e+00, float 1.000000e+00
  %52 = call i32 @llvm.SI.packf16(float %.28, float %.)
  %53 = bitcast i32 %52 to float
  %54 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 1.000000e+00)
  %55 = bitcast i32 %54 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %53, 
float %55, float %53, float %55)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: readnone
declare double @fabs(double) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, 
float)

attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = !{!"const", null, i32 1}

Shader Disassembly:

        s_load_dwordx4 s[0:3], s[2:3], 0x0                           ; C0800300
        v_mov_b32_e32 v1, 0x3fe80000                                 ; 7E0202FF 
3FE80000
        s_mov_b32 s5, SCRATCH_RSRC_DWORD1                            ; BE8503FF 
00000000
        v_mov_b32_e32 v2, 0                                          ; 7E040280
        v_mov_b32_e32 v3, 0                                          ; 7E060280
        v_mov_b32_e32 v0, 0                                          ; 7E000280
        s_waitcnt lgkmcnt(0)                                         ; BF8C007F
        s_buffer_load_dword s8, s[0:3], 0xc                          ; C204010C
        s_mov_b32 s4, SCRATCH_RSRC_DWORD0                            ; BE8403FF 
00000000
        s_mov_b32 s7, 0x80f000                                       ; BE8703FF 
0080F000
        s_mov_b32 s6, -1                                             ; BE8603C1
        v_mov_b32_e32 v4, 0                                          ; 7E080280
        buffer_store_dwordx2 v[0:1], v4, s[4:7], s10 offen offset:24 ; E0741018 
0A010004
        s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)                      ; BF8C0000
        v_mov_b32_e32 v1, 0x3fe00000                                 ; 7E0202FF 
3FE00000
        v_mov_b32_e32 v4, 0                                          ; 7E080280
        buffer_store_dwordx2 v[0:1], v4, s[4:7], s10 offen offset:16 ; E0741010 
0A010004
        s_waitcnt vmcnt(0) expcnt(0)                                 ; BF8C0700
        v_mov_b32_e32 v1, 0x3fd00000                                 ; 7E0202FF 
3FD00000
        v_mov_b32_e32 v4, 0                                          ; 7E080280
        buffer_store_dwordx2 v[0:1], v4, s[4:7], s10 offen offset:8  ; E0741008 
0A010004
        v_mov_b32_e32 v4, 0                                          ; 7E080280
        buffer_store_dwordx2 v[2:3], v4, s[4:7], s10 offen           ; E0741000 
0A010204
        s_lshl_b32 s8, s8, 3                                         ; 8F088308
        s_add_i32 s8, s8, 0                                          ; 81088008
        v_mov_b32_e32 v4, 0                                          ; 7E080280
        buffer_store_dwordx2 v[2:3], v4, s[4:7], s10 offen offset:32 ; E0741020 
0A010204
        s_waitcnt vmcnt(2) expcnt(0)                                 ; BF8C0702
        v_mov_b32_e32 v0, s8                                         ; 7E000208
        buffer_load_dwordx2 v[0:1], v0, s[4:7], s10 offen            ; E0341000 
0A010000
        s_buffer_load_dword s4, s[0:3], 0x0                          ; C2020100
        s_buffer_load_dword s5, s[0:3], 0x1                          ; C2028101
        s_buffer_load_dword s6, s[0:3], 0x8                          ; C2030108
        s_buffer_load_dword s7, s[0:3], 0x9                          ; C2038109
        s_buffer_load_dword s8, s[0:3], 0x4                          ; C2040104
        s_buffer_load_dword s0, s[0:3], 0x5                          ; C2000105
        s_waitcnt vmcnt(0) lgkmcnt(0)                                ; BF8C0070
        v_mov_b32_e32 v2, s4                                         ; 7E040204
        v_mov_b32_e32 v3, s5                                         ; 7E060205
        v_add_f64 v[0:1], v[0:1], v[2:3]                             ; D2C80000 
00020500
        v_mov_b32_e32 v2, s6                                         ; 7E040206
        v_mov_b32_e32 v3, s7                                         ; 7E060207
        v_add_f64 v[0:1], v[0:1], -v[2:3]                            ; D2C80000 
40020500
        v_mov_b32_e32 v2, s8                                         ; 7E040208
        v_mov_b32_e32 v3, s0                                         ; 7E060200
        v_cmp_ge_f64_e64 s[0:1], v[2:3], |v[0:1]|                    ; D04C0200 
00020102
        v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]                         ; D2000000 
0001E480
        v_cndmask_b32_e64 v1, 1.0, 0, s[0:1]                         ; D2000001 
000100F2
        v_cvt_pkrtz_f16_f32_e32 v0, v1, v0                           ; 5E000101
        v_cvt_pkrtz_f16_f32_e64 v1, 0, 1.0                           ; D25E0001 
0001E480
        exp 15, 0, 1, 1, 1, v0, v1, v0, v1                           ; F8001C0F 
01000100
        s_endpgm                                                     ; BF810000

*** SHADER STATS ***
SGPRS: 16
VGPRS: 8
Code Size: 284 bytes
LDS: 0 blocks
Scratch: 4096 bytes per wave
********************
SHADER KEY
  instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
  as_es = 0
  as_es = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
  0: MOV OUT[0], IN[0]
  1: MOV OUT[1], IN[1]
  2: END
; ModuleID = 'tgsi'

define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] 
addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] 
addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 
inreg, i32, i32, i32, i32) #0 {
main_body:
  %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 
0, i64 0
  %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
  %13 = add i32 %5, %7
  %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13)
  %15 = extractelement <4 x float> %14, i32 0
  %16 = extractelement <4 x float> %14, i32 1
  %17 = extractelement <4 x float> %14, i32 2
  %18 = extractelement <4 x float> %14, i32 3
  %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 
0, i64 1
  %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0
  %21 = add i32 %5, %7
  %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21)
  %23 = extractelement <4 x float> %22, i32 0
  %24 = extractelement <4 x float> %22, i32 1
  %25 = extractelement <4 x float> %22, i32 2
  %26 = extractelement <4 x float> %22, i32 3
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, 
float %24, float %25, float %26)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, 
float %16, float %17, float %18)
  ret void
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, 
float)

attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }

!0 = !{!"const", null, i32 1}

Shader Disassembly:

        s_load_dwordx4 s[0:3], s[8:9], 0x0                  ; C0800900
        s_load_dwordx4 s[4:7], s[8:9], 0x4                  ; C0820904
        v_add_i32_e32 v0, s10, v0                           ; 4A00000A
        s_waitcnt lgkmcnt(0)                                ; BF8C007F
        buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100
        buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500
        s_waitcnt vmcnt(0)                                  ; BF8C0770
        exp 15, 32, 0, 0, 0, v5, v6, v7, v8                 ; F800020F 08070605
        exp 15, 12, 0, 1, 0, v1, v2, v3, v4                 ; F80008CF 04030201
        s_endpgm                                            ; BF810000

*** SHADER STATS ***
SGPRS: 16
VGPRS: 12
Code Size: 56 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
  export_16bpc = 0x3
  last_cbuf = 0
  color_two_side = 0
  alpha_func = 7
  alpha_to_one = 0
  poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] 
addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] 
addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, 
<3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, 
float, i32, float, float) #0 {
main_body:
  %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5)
  %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5)
  %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5)
  %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5)
  %26 = call i32 @llvm.SI.packf16(float %22, float %23)
  %27 = bitcast i32 %26 to float
  %28 = call i32 @llvm.SI.packf16(float %24, float %25)
  %29 = bitcast i32 %28 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, 
float %29, float %27, float %29)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, 
float)

attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }

Shader Disassembly:

        s_mov_b32 m0, s9                    ; BEFC0309
        v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002
        v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102
        v_cvt_pkrtz_f16_f32_e32 v0, v0, v1  ; 5E000300
        v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202
        v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302
        v_cvt_pkrtz_f16_f32_e32 v1, v1, v2  ; 5E020501
        exp 15, 0, 1, 1, 1, v0, v1, v0, v1  ; F8001C0F 01000100
        s_endpgm                            ; BF810000

*** SHADER STATS ***
SGPRS: 16
VGPRS: 4
Code Size: 40 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************

Attachment: signature.asc
Description: OpenPGP digital signature

Reply via email to