Hi,
I'm testing some OpenMP offloading behavior and gfortran 15.2 raise a
bad behaviour with this small piece of code.
The code tests an array of a simple user defined type with 2 arrays (a
vector of 3 elements and a matrix of 3x3 elements). The arrays sizes are
known at compile time.
An array of 10 element is built, values are intialized then they are
checked on host and on device.
The code works with Nvidia (nvhpc/24.3 on A100 and nvhpc/25.5 on T600),
Cray Fortran (cce18 on Mi250) or the AMD compiler (on Mi250) but not
with GCC 15.2 on Nvidia T600 nor with the old GCC OG13 on Nvidia A100 GPU .
In the provided code the reductions (kernels lines 100 or 107) seams to
always return .true. with gfortran. Changing the tested value (removing
the minus sign for exemple) do no change the code behaviour.
Built with:
gfortran -c -fopenmp -cpp begou_omp_array_of_types.f90
gfortran begou_omp_array_of_types.o -fopenmp -cpp -o
begou_omp_array_of_types.exe
execution shows always:
on cpu OK
on gpu OK
Element xx is valid
Patrick
!=================================
! Ce code la gestion d'un tableau de types
! définis par utilisateur, offloadé sur GPU
! le type ne contient que des tableaux de taille
! connue à la compilation
!=================================
module mytype_m
implicit none
type mytype_t
integer :: mydefval
logical :: ongpu
integer, dimension(3) :: vect
integer, dimension(3,3) :: mat
type(mytype_t), pointer :: next
end type mytype_t
contains
!-----------------------------------
! Ajouter un élément dans la liste chainée sur le Host
! les tableaux sont à 0
! defval > 10 et l'unité est le n° de l'élémént dans le tableau
!-----------------------------------
subroutine init_one_element(elem, defval,ongpu)
implicit none
type(mytype_t) :: elem
integer, intent(in) :: defval
integer :: i,j
logical, intent(in), optional :: ongpu
logical :: isongpu
if (present(ongpu)) then
isongpu=ongpu
else
isongpu=.false.
end if
if (isongpu) then
!$omp target enter data map(to: elem)
!$omp target teams distribute parallel do
do i=1,3
elem%vect(i)=-1*(defval+i)
end do
!$omp target teams distribute parallel do collapse(2)
do i=1,3
do j=1,3
elem%mat(i,j)=-1*(defval+i+3*(j-1))
end do
end do
end if
! initialisations sur le host
elem%mydefval=defval
elem%ongpu=isongpu
do i=1,3
elem%vect(i)=defval+i
end do
do j=1,3
do i=1,3
elem%mat(i,j)=defval+i+3*(j-1)
end do
end do
end subroutine init_one_element
!-----------------------------------
! Verification d'un élément
!-----------------------------------
function check_one_element(current) result(ok)
implicit none
type(mytype_t) :: current
logical :: ok,okgpu1,okgpu2
integer :: i,j,tocheck
ok=.true.
do i=1,3
ok= ok .and. (current%vect(i) .eq. (current%mydefval+i))
end do
do j=1,3
do i=1,3
ok= ok .and. (current%mat(i,j) .eq. (current%mydefval+i+3*(j-1)))
end do
end do
if (ok) then
write(6,'(a)')" on cpu OK"
else
write(6,'(a)')" on cpu ERROR"
end if
okgpu1=.true.
okgpu2=.true.
if(current%ongpu) then
tocheck=current%mydefval
!$omp target teams distribute parallel do reduction(.and.:okgpu1)
do i=1,3
okgpu1 = okgpu1 .and. (current%vect(i) .EQ. -1*(tocheck+i))
end do
!$omp target teams distribute parallel do reduction(.and.:okgpu2) collapse(2)
do i=1,3
do j=1,3
okgpu2= okgpu2 .and. (current%mat(i,j) .eq. 1*(tocheck+i+3*(j-1)))
end do
end do
if (okgpu1 .and. okgpu2 ) then
write(6,'(a)')" on gpu OK"
else
write(6,'(2(a,L))')" on gpu ERROR vector: ",okgpu1," matrice: ", okgpu2
end if
end if
ok=ok .and. okgpu1 .and. okgpu2
return
end function check_one_element
end module mytype_m
program main
use mytype_m
implicit none
integer, parameter :: N=10
type(mytype_t), dimension(N) :: tableau
integer :: inival=0
integer :: item, i, j, localinival
logical :: ok
do item=1,N
call init_one_element(tableau(item), 10*item, ongpu=.true.)
end do
do item=1,N
ok = check_one_element(tableau(item))
if (ok) then
write(6,'(a,i2,a)') "Element ",item," is valid "
else
write(6,'(a,i2,a)') "Element ",item," is invalid"
end if
end do
end program main