[bug report] drm/amdgpu/mes: use ring for kernel queue submission
Hello Jack Xiao, Commit d0c423b64765 ("drm/amdgpu/mes: use ring for kernel queue submission") from Mar 27, 2020 (linux-next), leads to the following Smatch static checker warning: drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1240 amdgpu_mes_add_ring() warn: double unlock '&adev->mes.mutex_hidden' (orig line 1213) drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 1143 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, 1144 int queue_type, int idx, 1145 struct amdgpu_mes_ctx_data *ctx_data, 1146 struct amdgpu_ring **out) 1147 { 1148 struct amdgpu_ring *ring; 1149 struct amdgpu_mes_gang *gang; 1150 struct amdgpu_mes_queue_properties qprops = {0}; 1151 int r, queue_id, pasid; 1152 1153 /* 1154 * Avoid taking any other locks under MES lock to avoid circular 1155 * lock dependencies. 1156 */ 1157 amdgpu_mes_lock(&adev->mes); 1158 gang = idr_find(&adev->mes.gang_id_idr, gang_id); 1159 if (!gang) { 1160 DRM_ERROR("gang id %d doesn't exist\n", gang_id); 1161 amdgpu_mes_unlock(&adev->mes); 1162 return -EINVAL; 1163 } 1164 pasid = gang->process->pasid; 1165 1166 ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL); 1167 if (!ring) { 1168 amdgpu_mes_unlock(&adev->mes); 1169 return -ENOMEM; 1170 } 1171 1172 ring->ring_obj = NULL; 1173 ring->use_doorbell = true; 1174 ring->is_mes_queue = true; 1175 ring->mes_ctx = ctx_data; 1176 ring->idx = idx; 1177 ring->no_scheduler = true; 1178 1179 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 1180 int offset = offsetof(struct amdgpu_mes_ctx_meta_data, 1181 compute[ring->idx].mec_hpd); 1182 ring->eop_gpu_addr = 1183 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 1184 } 1185 1186 switch (queue_type) { 1187 case AMDGPU_RING_TYPE_GFX: 1188 ring->funcs = adev->gfx.gfx_ring[0].funcs; 1189 ring->me = adev->gfx.gfx_ring[0].me; 1190 ring->pipe = adev->gfx.gfx_ring[0].pipe; 1191 break; 1192 case AMDGPU_RING_TYPE_COMPUTE: 1193 ring->funcs = adev->gfx.compute_ring[0].funcs; 1194 ring->me = adev->gfx.compute_ring[0].me; 1195 ring->pipe = adev->gfx.compute_ring[0].pipe; 1196 break; 1197 case AMDGPU_RING_TYPE_SDMA: 1198 ring->funcs = adev->sdma.instance[0].ring.funcs; 1199 break; 1200 default: 1201 BUG(); 1202 } 1203 1204 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1205 AMDGPU_RING_PRIO_DEFAULT, NULL); 1206 if (r) 1207 goto clean_up_memory; 1208 1209 amdgpu_mes_ring_to_queue_props(adev, ring, &qprops); 1210 1211 dma_fence_wait(gang->process->vm->last_update, false); 1212 dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false); 1213 amdgpu_mes_unlock(&adev->mes); ^ 1214 1215 r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id); 1216 if (r) 1217 goto clean_up_ring; ^^ 1218 1219 ring->hw_queue_id = queue_id; 1220 ring->doorbell_index = qprops.doorbell_off; 1221 1222 if (queue_type == AMDGPU_RING_TYPE_GFX) 1223 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id); 1224 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) 1225 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id, 1226 queue_id); 1227 else if (queue_type == AMDGPU_RING_TYPE_SDMA) 1228 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id, 1229 queue_id); 1230 else 1231 BUG(); 1232 1233 *out = ring; 1234 return 0; 1235 1236 clean_up_ring: 1237 amdgpu_ring_fini(ring); 1238 clean_up_memory: 1239 kfree(ring); --> 1240 amdgpu_mes_unlock(&adev->mes); ^^ 1241 return r; 1242 } regards, dan carpenter
[bug report] drm/amdgpu/mes: use ring for kernel queue submission
Hello Jack Xiao, The patch d0c423b64765: "drm/amdgpu/mes: use ring for kernel queue submission" from Mar 27, 2020, leads to the following Smatch static checker warning: drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1056 amdgpu_mes_add_ring() error: format string overflow. buf_size: 16 length: 38 [user data] drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 980 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, 981 int queue_type, int idx, 982 struct amdgpu_mes_ctx_data *ctx_data, 983 struct amdgpu_ring **out) 984 { 985 struct amdgpu_ring *ring; 986 struct amdgpu_mes_gang *gang; 987 struct amdgpu_mes_queue_properties qprops = {0}; 988 int r, queue_id, pasid; 989 990 /* 991 * Avoid taking any other locks under MES lock to avoid circular 992 * lock dependencies. 993 */ 994 amdgpu_mes_lock(&adev->mes); 995 gang = idr_find(&adev->mes.gang_id_idr, gang_id); 996 if (!gang) { 997 DRM_ERROR("gang id %d doesn't exist\n", gang_id); 998 amdgpu_mes_unlock(&adev->mes); 999 return -EINVAL; 1000 } 1001 pasid = gang->process->pasid; 1002 1003 ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL); 1004 if (!ring) { 1005 amdgpu_mes_unlock(&adev->mes); 1006 return -ENOMEM; 1007 } 1008 1009 ring->ring_obj = NULL; 1010 ring->use_doorbell = true; 1011 ring->is_mes_queue = true; 1012 ring->mes_ctx = ctx_data; 1013 ring->idx = idx; 1014 ring->no_scheduler = true; 1015 1016 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 1017 int offset = offsetof(struct amdgpu_mes_ctx_meta_data, 1018 compute[ring->idx].mec_hpd); 1019 ring->eop_gpu_addr = 1020 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 1021 } 1022 1023 switch (queue_type) { 1024 case AMDGPU_RING_TYPE_GFX: 1025 ring->funcs = adev->gfx.gfx_ring[0].funcs; 1026 break; 1027 case AMDGPU_RING_TYPE_COMPUTE: 1028 ring->funcs = adev->gfx.compute_ring[0].funcs; 1029 break; 1030 case AMDGPU_RING_TYPE_SDMA: 1031 ring->funcs = adev->sdma.instance[0].ring.funcs; 1032 break; 1033 default: 1034 BUG(); 1035 } 1036 1037 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, 1038 AMDGPU_RING_PRIO_DEFAULT, NULL); 1039 if (r) 1040 goto clean_up_memory; 1041 1042 amdgpu_mes_ring_to_queue_props(adev, ring, &qprops); 1043 1044 dma_fence_wait(gang->process->vm->last_update, false); 1045 dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false); 1046 amdgpu_mes_unlock(&adev->mes); 1047 1048 r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id); 1049 if (r) 1050 goto clean_up_ring; 1051 1052 ring->hw_queue_id = queue_id; 1053 ring->doorbell_index = qprops.doorbell_off; 1054 1055 if (queue_type == AMDGPU_RING_TYPE_GFX) --> 1056 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id); I'm not sure why this is warning now instead of in 2020. But the bug is definitely real. "gang_id" is capped at INT_MAX so that can overflow already even if the values of "pasid" and "queue_id" are zero. Using snprintf() is safer but also probably the buffer should be larger. 1057 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) 1058 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id, 1059 queue_id); 1060 else if (queue_type == AMDGPU_RING_TYPE_SDMA) 1061 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id, 1062 queue_id); 1063 else 1064 BUG(); 1065 1066 *out = ring; 1067 return 0; 1068 1069 clean_up_ring: 1070 amdgpu_ring_fini(ring); 1071 clean_up_memory: 1072 kfree(ring); 1073 amdgpu_mes_unlock(&adev->mes); 1074 return r; 1075 } regards, dan carpenter
[bug report] drm/amdgpu/mes: use ring for kernel queue submission
Hello Jack Xiao, The patch d0c423b64765: "drm/amdgpu/mes: use ring for kernel queue submission" from Mar 27, 2020, leads to the following Smatch static checker warning: drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:924 amdgpu_mes_add_ring() error: format string overflow. buf_size: 16 length: 39 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:927 amdgpu_mes_add_ring() error: format string overflow. buf_size: 16 length: 43 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:930 amdgpu_mes_add_ring() error: format string overflow. buf_size: 16 length: 40 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 848 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, 849 int queue_type, int idx, 850 struct amdgpu_mes_ctx_data *ctx_data, 851 struct amdgpu_ring **out) 852 { 853 struct amdgpu_ring *ring; 854 struct amdgpu_mes_gang *gang; 855 struct amdgpu_mes_queue_properties qprops = {0}; 856 int r, queue_id, pasid; 857 858 /* 859 * Avoid taking any other locks under MES lock to avoid circular 860 * lock dependencies. 861 */ 862 amdgpu_mes_lock(&adev->mes); 863 gang = idr_find(&adev->mes.gang_id_idr, gang_id); 864 if (!gang) { 865 DRM_ERROR("gang id %d doesn't exist\n", gang_id); 866 amdgpu_mes_unlock(&adev->mes); 867 return -EINVAL; 868 } 869 pasid = gang->process->pasid; 870 871 ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL); 872 if (!ring) { 873 amdgpu_mes_unlock(&adev->mes); 874 return -ENOMEM; 875 } 876 877 ring->ring_obj = NULL; 878 ring->use_doorbell = true; 879 ring->is_mes_queue = true; 880 ring->mes_ctx = ctx_data; 881 ring->idx = idx; 882 ring->no_scheduler = true; 883 884 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 885 int offset = offsetof(struct amdgpu_mes_ctx_meta_data, 886 compute[ring->idx].mec_hpd); 887 ring->eop_gpu_addr = 888 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 889 } 890 891 switch (queue_type) { 892 case AMDGPU_RING_TYPE_GFX: 893 ring->funcs = adev->gfx.gfx_ring[0].funcs; 894 break; 895 case AMDGPU_RING_TYPE_COMPUTE: 896 ring->funcs = adev->gfx.compute_ring[0].funcs; 897 break; 898 case AMDGPU_RING_TYPE_SDMA: 899 ring->funcs = adev->sdma.instance[0].ring.funcs; 900 break; 901 default: 902 BUG(); 903 } 904 905 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, 906 AMDGPU_RING_PRIO_DEFAULT, NULL); 907 if (r) 908 goto clean_up_memory; 909 910 amdgpu_mes_ring_to_queue_props(adev, ring, &qprops); 911 912 dma_fence_wait(gang->process->vm->last_update, false); 913 dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false); 914 amdgpu_mes_unlock(&adev->mes); 915 916 r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id); 917 if (r) 918 goto clean_up_ring; 919 920 ring->hw_queue_id = queue_id; 921 ring->doorbell_index = qprops.doorbell_off; 922 923 if (queue_type == AMDGPU_RING_TYPE_GFX) --> 924 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id); Using sprintf() is always ill-advised. Better to use snprintf(). "gfx_.." 6 characters. passid is capped at USHRT_MAX so 5 characters gang_id is capped at INT_MAX so 10 characters queue_id is up to 10 characters as well. 1 char for the NUL terminator Smatch is saying that it can be 39 characters but depending on the implementation of idr_alloc() this could reach up to 32 characters. Still that's well past the 16 characters avaliable. 925 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) 926 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id, 927 queue_id); Same 928 else if (queue_type == AMDGPU_RING_TYPE_SDMA) 929 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id, 930 queue_id); Same 931 else 932 BUG(); 933 934 *out = ring; 935 return 0; 936 937 clean_up_ring: 938 amdgpu_ring_fini(ring); 939 clean_up_memory: 940 kfree(ring); 941 amdgpu_mes_un
[bug report] drm/amdgpu/mes: use ring for kernel queue submission
Hello Jack Xiao, The patch d0c423b64765: "drm/amdgpu/mes: use ring for kernel queue submission" from Mar 27, 2020, leads to the following Smatch static checker warning: drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:941 amdgpu_mes_add_ring() error: double unlocked '&adev->mes.mutex_hidden' (orig line 914) drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 848 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, 849 int queue_type, int idx, 850 struct amdgpu_mes_ctx_data *ctx_data, 851 struct amdgpu_ring **out) 852 { 853 struct amdgpu_ring *ring; 854 struct amdgpu_mes_gang *gang; 855 struct amdgpu_mes_queue_properties qprops = {0}; 856 int r, queue_id, pasid; 857 858 /* 859 * Avoid taking any other locks under MES lock to avoid circular 860 * lock dependencies. 861 */ 862 amdgpu_mes_lock(&adev->mes); 863 gang = idr_find(&adev->mes.gang_id_idr, gang_id); 864 if (!gang) { 865 DRM_ERROR("gang id %d doesn't exist\n", gang_id); 866 amdgpu_mes_unlock(&adev->mes); 867 return -EINVAL; 868 } 869 pasid = gang->process->pasid; 870 871 ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL); 872 if (!ring) { 873 amdgpu_mes_unlock(&adev->mes); 874 return -ENOMEM; 875 } 876 877 ring->ring_obj = NULL; 878 ring->use_doorbell = true; 879 ring->is_mes_queue = true; 880 ring->mes_ctx = ctx_data; 881 ring->idx = idx; 882 ring->no_scheduler = true; 883 884 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 885 int offset = offsetof(struct amdgpu_mes_ctx_meta_data, 886 compute[ring->idx].mec_hpd); 887 ring->eop_gpu_addr = 888 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 889 } 890 891 switch (queue_type) { 892 case AMDGPU_RING_TYPE_GFX: 893 ring->funcs = adev->gfx.gfx_ring[0].funcs; 894 break; 895 case AMDGPU_RING_TYPE_COMPUTE: 896 ring->funcs = adev->gfx.compute_ring[0].funcs; 897 break; 898 case AMDGPU_RING_TYPE_SDMA: 899 ring->funcs = adev->sdma.instance[0].ring.funcs; 900 break; 901 default: 902 BUG(); 903 } 904 905 r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, 906 AMDGPU_RING_PRIO_DEFAULT, NULL); 907 if (r) 908 goto clean_up_memory; 909 910 amdgpu_mes_ring_to_queue_props(adev, ring, &qprops); 911 912 dma_fence_wait(gang->process->vm->last_update, false); 913 dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false); 914 amdgpu_mes_unlock(&adev->mes); Unlocked 915 916 r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id); 917 if (r) 918 goto clean_up_ring; Double unlocked by goto. It's weird that this warning is only showing up now but my guess is that this code was only just released to the public? 919 920 ring->hw_queue_id = queue_id; 921 ring->doorbell_index = qprops.doorbell_off; 922 923 if (queue_type == AMDGPU_RING_TYPE_GFX) 924 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id); 925 else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) 926 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id, 927 queue_id); 928 else if (queue_type == AMDGPU_RING_TYPE_SDMA) 929 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id, 930 queue_id); 931 else 932 BUG(); 933 934 *out = ring; 935 return 0; 936 937 clean_up_ring: 938 amdgpu_ring_fini(ring); 939 clean_up_memory: 940 kfree(ring); --> 941 amdgpu_mes_unlock(&adev->mes); 942 return r; 943 } regards, dan carpenter