[bug report] drm/amdgpu/mes: use ring for kernel queue submission

2024-10-07 Thread Dan Carpenter
Hello Jack Xiao,

Commit d0c423b64765 ("drm/amdgpu/mes: use ring for kernel queue
submission") from Mar 27, 2020 (linux-next), leads to the following
Smatch static checker warning:

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1240 amdgpu_mes_add_ring()
warn: double unlock '&adev->mes.mutex_hidden' (orig line 1213)

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
1143 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
1144 int queue_type, int idx,
1145 struct amdgpu_mes_ctx_data *ctx_data,
1146 struct amdgpu_ring **out)
1147 {
1148 struct amdgpu_ring *ring;
1149 struct amdgpu_mes_gang *gang;
1150 struct amdgpu_mes_queue_properties qprops = {0};
1151 int r, queue_id, pasid;
1152 
1153 /*
1154  * Avoid taking any other locks under MES lock to avoid 
circular
1155  * lock dependencies.
1156  */
1157 amdgpu_mes_lock(&adev->mes);
1158 gang = idr_find(&adev->mes.gang_id_idr, gang_id);
1159 if (!gang) {
1160 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
1161 amdgpu_mes_unlock(&adev->mes);
1162 return -EINVAL;
1163 }
1164 pasid = gang->process->pasid;
1165 
1166 ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
1167 if (!ring) {
1168 amdgpu_mes_unlock(&adev->mes);
1169 return -ENOMEM;
1170 }
1171 
1172 ring->ring_obj = NULL;
1173 ring->use_doorbell = true;
1174 ring->is_mes_queue = true;
1175 ring->mes_ctx = ctx_data;
1176 ring->idx = idx;
1177 ring->no_scheduler = true;
1178 
1179 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1180 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
1181   compute[ring->idx].mec_hpd);
1182 ring->eop_gpu_addr =
1183 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
1184 }
1185 
1186 switch (queue_type) {
1187 case AMDGPU_RING_TYPE_GFX:
1188 ring->funcs = adev->gfx.gfx_ring[0].funcs;
1189 ring->me = adev->gfx.gfx_ring[0].me;
1190 ring->pipe = adev->gfx.gfx_ring[0].pipe;
1191 break;
1192 case AMDGPU_RING_TYPE_COMPUTE:
1193 ring->funcs = adev->gfx.compute_ring[0].funcs;
1194 ring->me = adev->gfx.compute_ring[0].me;
1195 ring->pipe = adev->gfx.compute_ring[0].pipe;
1196 break;
1197 case AMDGPU_RING_TYPE_SDMA:
1198 ring->funcs = adev->sdma.instance[0].ring.funcs;
1199 break;
1200 default:
1201 BUG();
1202 }
1203 
1204 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1205  AMDGPU_RING_PRIO_DEFAULT, NULL);
1206 if (r)
1207 goto clean_up_memory;
1208 
1209 amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
1210 
1211 dma_fence_wait(gang->process->vm->last_update, false);
1212 dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
1213 amdgpu_mes_unlock(&adev->mes);
 ^

1214 
1215 r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
1216 if (r)
1217 goto clean_up_ring;
 ^^

1218 
1219 ring->hw_queue_id = queue_id;
1220 ring->doorbell_index = qprops.doorbell_off;
1221 
1222 if (queue_type == AMDGPU_RING_TYPE_GFX)
1223 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, 
queue_id);
1224 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
1225 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
1226 queue_id);
1227 else if (queue_type == AMDGPU_RING_TYPE_SDMA)
1228 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
1229 queue_id);
1230 else
1231 BUG();
1232 
1233 *out = ring;
1234 return 0;
1235 
1236 clean_up_ring:
1237 amdgpu_ring_fini(ring);
1238 clean_up_memory:
1239 kfree(ring);
--> 1240 amdgpu_mes_unlock(&adev->mes);
 ^^

1241 return r;
1242 }

regards,
dan carpenter


[bug report] drm/amdgpu/mes: use ring for kernel queue submission

2022-10-26 Thread Dan Carpenter
Hello Jack Xiao,

The patch d0c423b64765: "drm/amdgpu/mes: use ring for kernel queue
submission" from Mar 27, 2020, leads to the following Smatch static
checker warning:

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1056 amdgpu_mes_add_ring()
error: format string overflow. buf_size: 16 length: 38 [user data]

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
980 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
981 int queue_type, int idx,
982 struct amdgpu_mes_ctx_data *ctx_data,
983 struct amdgpu_ring **out)
984 {
985 struct amdgpu_ring *ring;
986 struct amdgpu_mes_gang *gang;
987 struct amdgpu_mes_queue_properties qprops = {0};
988 int r, queue_id, pasid;
989 
990 /*
991  * Avoid taking any other locks under MES lock to avoid circular
992  * lock dependencies.
993  */
994 amdgpu_mes_lock(&adev->mes);
995 gang = idr_find(&adev->mes.gang_id_idr, gang_id);
996 if (!gang) {
997 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
998 amdgpu_mes_unlock(&adev->mes);
999 return -EINVAL;
1000 }
1001 pasid = gang->process->pasid;
1002 
1003 ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
1004 if (!ring) {
1005 amdgpu_mes_unlock(&adev->mes);
1006 return -ENOMEM;
1007 }
1008 
1009 ring->ring_obj = NULL;
1010 ring->use_doorbell = true;
1011 ring->is_mes_queue = true;
1012 ring->mes_ctx = ctx_data;
1013 ring->idx = idx;
1014 ring->no_scheduler = true;
1015 
1016 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1017 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
1018   compute[ring->idx].mec_hpd);
1019 ring->eop_gpu_addr =
1020 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
1021 }
1022 
1023 switch (queue_type) {
1024 case AMDGPU_RING_TYPE_GFX:
1025 ring->funcs = adev->gfx.gfx_ring[0].funcs;
1026 break;
1027 case AMDGPU_RING_TYPE_COMPUTE:
1028 ring->funcs = adev->gfx.compute_ring[0].funcs;
1029 break;
1030 case AMDGPU_RING_TYPE_SDMA:
1031 ring->funcs = adev->sdma.instance[0].ring.funcs;
1032 break;
1033 default:
1034 BUG();
1035 }
1036 
1037 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
1038  AMDGPU_RING_PRIO_DEFAULT, NULL);
1039 if (r)
1040 goto clean_up_memory;
1041 
1042 amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
1043 
1044 dma_fence_wait(gang->process->vm->last_update, false);
1045 dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
1046 amdgpu_mes_unlock(&adev->mes);
1047 
1048 r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
1049 if (r)
1050 goto clean_up_ring;
1051 
1052 ring->hw_queue_id = queue_id;
1053 ring->doorbell_index = qprops.doorbell_off;
1054 
1055 if (queue_type == AMDGPU_RING_TYPE_GFX)
--> 1056 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, 
queue_id);

I'm not sure why this is warning now instead of in 2020.  But the bug is
definitely real.  "gang_id" is capped at INT_MAX so that can overflow
already even if the values of "pasid" and "queue_id" are zero.

Using snprintf() is safer but also probably the buffer should be larger.

1057 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
1058 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
1059 queue_id);
1060 else if (queue_type == AMDGPU_RING_TYPE_SDMA)
1061 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
1062 queue_id);
1063 else
1064 BUG();
1065 
1066 *out = ring;
1067 return 0;
1068 
1069 clean_up_ring:
1070 amdgpu_ring_fini(ring);
1071 clean_up_memory:
1072 kfree(ring);
1073 amdgpu_mes_unlock(&adev->mes);
1074 return r;
1075 }

regards,
dan carpenter


[bug report] drm/amdgpu/mes: use ring for kernel queue submission

2022-05-09 Thread Dan Carpenter
Hello Jack Xiao,

The patch d0c423b64765: "drm/amdgpu/mes: use ring for kernel queue
submission" from Mar 27, 2020, leads to the following Smatch static
checker warning:

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:924 amdgpu_mes_add_ring() error: format 
string overflow. buf_size: 16 length: 39
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:927 amdgpu_mes_add_ring() error: format 
string overflow. buf_size: 16 length: 43
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:930 amdgpu_mes_add_ring() error: format 
string overflow. buf_size: 16 length: 40

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
848 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
849 int queue_type, int idx,
850 struct amdgpu_mes_ctx_data *ctx_data,
851 struct amdgpu_ring **out)
852 {
853 struct amdgpu_ring *ring;
854 struct amdgpu_mes_gang *gang;
855 struct amdgpu_mes_queue_properties qprops = {0};
856 int r, queue_id, pasid;
857 
858 /*
859  * Avoid taking any other locks under MES lock to avoid circular
860  * lock dependencies.
861  */
862 amdgpu_mes_lock(&adev->mes);
863 gang = idr_find(&adev->mes.gang_id_idr, gang_id);
864 if (!gang) {
865 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
866 amdgpu_mes_unlock(&adev->mes);
867 return -EINVAL;
868 }
869 pasid = gang->process->pasid;
870 
871 ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
872 if (!ring) {
873 amdgpu_mes_unlock(&adev->mes);
874 return -ENOMEM;
875 }
876 
877 ring->ring_obj = NULL;
878 ring->use_doorbell = true;
879 ring->is_mes_queue = true;
880 ring->mes_ctx = ctx_data;
881 ring->idx = idx;
882 ring->no_scheduler = true;
883 
884 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
885 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
886   compute[ring->idx].mec_hpd);
887 ring->eop_gpu_addr =
888 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
889 }
890 
891 switch (queue_type) {
892 case AMDGPU_RING_TYPE_GFX:
893 ring->funcs = adev->gfx.gfx_ring[0].funcs;
894 break;
895 case AMDGPU_RING_TYPE_COMPUTE:
896 ring->funcs = adev->gfx.compute_ring[0].funcs;
897 break;
898 case AMDGPU_RING_TYPE_SDMA:
899 ring->funcs = adev->sdma.instance[0].ring.funcs;
900 break;
901 default:
902 BUG();
903 }
904 
905 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
906  AMDGPU_RING_PRIO_DEFAULT, NULL);
907 if (r)
908 goto clean_up_memory;
909 
910 amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
911 
912 dma_fence_wait(gang->process->vm->last_update, false);
913 dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
914 amdgpu_mes_unlock(&adev->mes);
915 
916 r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
917 if (r)
918 goto clean_up_ring;
919 
920 ring->hw_queue_id = queue_id;
921 ring->doorbell_index = qprops.doorbell_off;
922 
923 if (queue_type == AMDGPU_RING_TYPE_GFX)
--> 924 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, 
queue_id);

Using sprintf() is always ill-advised.  Better to use snprintf().

"gfx_.." 6 characters.
passid is capped at USHRT_MAX so 5 characters
gang_id is capped at INT_MAX so 10 characters
queue_id is up to 10 characters as well.
1 char for the NUL terminator

Smatch is saying that it can be 39 characters but depending on the
implementation of idr_alloc() this could reach up to 32 characters.
Still that's well past the 16 characters avaliable.

925 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
926 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
927 queue_id);

Same

928 else if (queue_type == AMDGPU_RING_TYPE_SDMA)
929 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
930 queue_id);

Same

931 else
932 BUG();
933 
934 *out = ring;
935 return 0;
936 
937 clean_up_ring:
938 amdgpu_ring_fini(ring);
939 clean_up_memory:
940 kfree(ring);
941 amdgpu_mes_un

[bug report] drm/amdgpu/mes: use ring for kernel queue submission

2022-05-09 Thread Dan Carpenter
Hello Jack Xiao,

The patch d0c423b64765: "drm/amdgpu/mes: use ring for kernel queue
submission" from Mar 27, 2020, leads to the following Smatch static
checker warning:

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:941 amdgpu_mes_add_ring()
error: double unlocked '&adev->mes.mutex_hidden' (orig line 914)

drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
848 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
849 int queue_type, int idx,
850 struct amdgpu_mes_ctx_data *ctx_data,
851 struct amdgpu_ring **out)
852 {
853 struct amdgpu_ring *ring;
854 struct amdgpu_mes_gang *gang;
855 struct amdgpu_mes_queue_properties qprops = {0};
856 int r, queue_id, pasid;
857 
858 /*
859  * Avoid taking any other locks under MES lock to avoid circular
860  * lock dependencies.
861  */
862 amdgpu_mes_lock(&adev->mes);
863 gang = idr_find(&adev->mes.gang_id_idr, gang_id);
864 if (!gang) {
865 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
866 amdgpu_mes_unlock(&adev->mes);
867 return -EINVAL;
868 }
869 pasid = gang->process->pasid;
870 
871 ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
872 if (!ring) {
873 amdgpu_mes_unlock(&adev->mes);
874 return -ENOMEM;
875 }
876 
877 ring->ring_obj = NULL;
878 ring->use_doorbell = true;
879 ring->is_mes_queue = true;
880 ring->mes_ctx = ctx_data;
881 ring->idx = idx;
882 ring->no_scheduler = true;
883 
884 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
885 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
886   compute[ring->idx].mec_hpd);
887 ring->eop_gpu_addr =
888 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
889 }
890 
891 switch (queue_type) {
892 case AMDGPU_RING_TYPE_GFX:
893 ring->funcs = adev->gfx.gfx_ring[0].funcs;
894 break;
895 case AMDGPU_RING_TYPE_COMPUTE:
896 ring->funcs = adev->gfx.compute_ring[0].funcs;
897 break;
898 case AMDGPU_RING_TYPE_SDMA:
899 ring->funcs = adev->sdma.instance[0].ring.funcs;
900 break;
901 default:
902 BUG();
903 }
904 
905 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
906  AMDGPU_RING_PRIO_DEFAULT, NULL);
907 if (r)
908 goto clean_up_memory;
909 
910 amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
911 
912 dma_fence_wait(gang->process->vm->last_update, false);
913 dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
914 amdgpu_mes_unlock(&adev->mes);

Unlocked

915 
916 r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
917 if (r)
918 goto clean_up_ring;

Double unlocked by goto.  It's weird that this warning is only showing
up now but my guess is that this code was only just released to the
public?

919 
920 ring->hw_queue_id = queue_id;
921 ring->doorbell_index = qprops.doorbell_off;
922 
923 if (queue_type == AMDGPU_RING_TYPE_GFX)
924 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, 
queue_id);
925 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
926 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
927 queue_id);
928 else if (queue_type == AMDGPU_RING_TYPE_SDMA)
929 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
930 queue_id);
931 else
932 BUG();
933 
934 *out = ring;
935 return 0;
936 
937 clean_up_ring:
938 amdgpu_ring_fini(ring);
939 clean_up_memory:
940 kfree(ring);
--> 941 amdgpu_mes_unlock(&adev->mes);
942 return r;
943 }

regards,
dan carpenter