Re: [PATCH] drm/amdkfd: Fix shift out-of-bounds issue and remove unused code.

2023-10-19 Thread Philip Yang

  


On 2023-10-18 21:56, Jesse Zhang wrote:


  [  567.613292] shift exponent 255 is too large for 64-bit type 'long unsigned int'
[  567.614498] CPU: 5 PID: 238 Comm: kworker/5:1 Tainted: G   OE  6.2.0-34-generic #34~22.04.1-Ubuntu
[  567.614502] Hardware name: AMD Splinter/Splinter-RPL, BIOS WS43927N_871 09/25/2023
[  567.614504] Workqueue: events send_exception_work_handler [amdgpu]
[  567.614748] Call Trace:
[  567.614750]  
[  567.614753]  dump_stack_lvl+0x48/0x70
[  567.614761]  dump_stack+0x10/0x20
[  567.614763]  __ubsan_handle_shift_out_of_bounds+0x156/0x310
[  567.614769]  ? srso_alias_return_thunk+0x5/0x7f
[  567.614773]  ? update_sd_lb_stats.constprop.0+0xf2/0x3c0
[  567.614780]  svm_range_split_by_granularity.cold+0x2b/0x34 [amdgpu]
[  567.615047]  ? srso_alias_return_thunk+0x5/0x7f
[  567.615052]  svm_migrate_to_ram+0x185/0x4d0 [amdgpu]
[  567.615286]  do_swap_page+0x7b6/0xa30
[  567.615291]  ? srso_alias_return_thunk+0x5/0x7f
[  567.615294]  ? __free_pages+0x119/0x130
[  567.615299]  handle_pte_fault+0x227/0x280
[  567.615303]  __handle_mm_fault+0x3c0/0x720
[  567.615311]  handle_mm_fault+0x119/0x330
[  567.615314]  ? lock_mm_and_find_vma+0x44/0x250
[  567.615318]  do_user_addr_fault+0x1a9/0x640
[  567.615323]  exc_page_fault+0x81/0x1b0
[  567.615328]  asm_exc_page_fault+0x27/0x30
[  567.615332] RIP: 0010:__get_user_8+0x1c/0x30

Suggested-by: Philip Yang 
Signed-off-by: Jesse Zhang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 62 +---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  3 --
 2 files changed, 1 insertion(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 54af7a2b29f8..ccaf86a4c02a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -781,7 +781,7 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
 			prange->flags &= ~attrs[i].value;
 			break;
 		case KFD_IOCTL_SVM_ATTR_GRANULARITY:
-			prange->granularity = attrs[i].value;
+			prange->granularity = attrs[i].value & 0x3F;

Thinks again, this should be prange->granularity =
  min_t(uint32_t, attrs[i].value, 0x3F);
Please separate to another patch to remove
  svm_range_split_by_granularity.
Regards,
Philip


  
 			break;
 		default:
 			WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
@@ -1139,66 +1139,6 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
 	list_add_tail(>child_list, >child_list);
 }
 
-/**
- * svm_range_split_by_granularity - collect ranges within granularity boundary
- *
- * @p: the process with svms list
- * @mm: mm structure
- * @addr: the vm fault address in pages, to split the prange
- * @parent: parent range if prange is from child list
- * @prange: prange to split
- *
- * Trims @prange to be a single aligned block of prange->granularity if
- * possible. The head and tail are added to the child_list in @parent.
- *
- * Context: caller must hold mmap_read_lock and prange->lock
- *
- * Return:
- * 0 - OK, otherwise error code
- */
-int
-svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
-			   unsigned long addr, struct svm_range *parent,
-			   struct svm_range *prange)
-{
-	struct svm_range *head, *tail;
-	unsigned long start, last, size;
-	int r;
-
-	/* Align splited range start and size to granularity size, then a single
-	 * PTE will be used for whole range, this reduces the number of PTE
-	 * updated and the L1 TLB space used for translation.
-	 */
-	size = 1UL << prange->granularity;
-	start = ALIGN_DOWN(addr, size);
-	last = ALIGN(addr + 1, size) - 1;
-
-	pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n",
-		 prange->svms, prange->start, prange->last, start, last, size);
-
-	if (start > prange->start) {
-		r = svm_range_split(prange, start, prange->last, );
-		if (r)
-			return r;
-		svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE);
-	}
-
-	if (last < prange->last) {
-		r = svm_range_split(prange, prange->start, last, );
-		if (r)
-			return r;
-		svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
-	}
-
-	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
-	if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
-		prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
-		pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
-			 prange, prange->start, prange->last,
-			 SVM_OP_ADD_RANGE_AND_MAP);
-	}
-	return 0;
-}
 static bool
 svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index be11ba0c4289..026863a0abcd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -172,9 +172,6 @@ struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange,
 int svm_range_vram_node_new(struct kfd_node *node, struct svm_range 

RE: [PATCH] drm/amdkfd: Fix shift out-of-bounds issue and remove unused code.

2023-10-18 Thread Zhang, Yifan
[AMD Official Use Only - General]

I think it is better to separate it to two patches since it addresses different 
issues. With or without the change, this patch is

Reviewed-by: Yifan Zhang 

-Original Message-
From: Jesse Zhang 
Sent: Thursday, October 19, 2023 9:56 AM
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Kuehling, Felix 
; Yang, Philip ; Zhang, Yifan 
; Zhang, Jesse(Jie) ; Zhang, 
Jesse(Jie) 
Subject: [PATCH] drm/amdkfd: Fix shift out-of-bounds issue and remove unused 
code.

[  567.613292] shift exponent 255 is too large for 64-bit type 'long unsigned 
int'
[  567.614498] CPU: 5 PID: 238 Comm: kworker/5:1 Tainted: G   OE  
6.2.0-34-generic #34~22.04.1-Ubuntu
[  567.614502] Hardware name: AMD Splinter/Splinter-RPL, BIOS WS43927N_871 
09/25/2023 [  567.614504] Workqueue: events send_exception_work_handler 
[amdgpu] [  567.614748] Call Trace:
[  567.614750]  
[  567.614753]  dump_stack_lvl+0x48/0x70 [  567.614761]  dump_stack+0x10/0x20 [ 
 567.614763]  __ubsan_handle_shift_out_of_bounds+0x156/0x310
[  567.614769]  ? srso_alias_return_thunk+0x5/0x7f [  567.614773]  ? 
update_sd_lb_stats.constprop.0+0xf2/0x3c0
[  567.614780]  svm_range_split_by_granularity.cold+0x2b/0x34 [amdgpu] [  
567.615047]  ? srso_alias_return_thunk+0x5/0x7f [  567.615052]  
svm_migrate_to_ram+0x185/0x4d0 [amdgpu] [  567.615286]  
do_swap_page+0x7b6/0xa30 [  567.615291]  ? srso_alias_return_thunk+0x5/0x7f [  
567.615294]  ? __free_pages+0x119/0x130 [  567.615299]  
handle_pte_fault+0x227/0x280 [  567.615303]  __handle_mm_fault+0x3c0/0x720 [  
567.615311]  handle_mm_fault+0x119/0x330 [  567.615314]  ? 
lock_mm_and_find_vma+0x44/0x250 [  567.615318]  do_user_addr_fault+0x1a9/0x640 
[  567.615323]  exc_page_fault+0x81/0x1b0 [  567.615328]  
asm_exc_page_fault+0x27/0x30 [  567.615332] RIP: 0010:__get_user_8+0x1c/0x30

Suggested-by: Philip Yang 
Signed-off-by: Jesse Zhang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 62 +---  
drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  3 --
 2 files changed, 1 insertion(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 54af7a2b29f8..ccaf86a4c02a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -781,7 +781,7 @@ svm_range_apply_attrs(struct kfd_process *p, struct 
svm_range *prange,
prange->flags &= ~attrs[i].value;
break;
case KFD_IOCTL_SVM_ATTR_GRANULARITY:
-   prange->granularity = attrs[i].value;
+   prange->granularity = attrs[i].value & 0x3F;
break;
default:
WARN_ONCE(1, "svm_range_check_attrs wasn't called?"); 
@@ -1139,66 +1139,6 @@ svm_range_add_child(struct svm_range *prange, struct 
mm_struct *mm,
list_add_tail(>child_list, >child_list);  }

-/**
- * svm_range_split_by_granularity - collect ranges within granularity boundary
- *
- * @p: the process with svms list
- * @mm: mm structure
- * @addr: the vm fault address in pages, to split the prange
- * @parent: parent range if prange is from child list
- * @prange: prange to split
- *
- * Trims @prange to be a single aligned block of prange->granularity if
- * possible. The head and tail are added to the child_list in @parent.
- *
- * Context: caller must hold mmap_read_lock and prange->lock
- *
- * Return:
- * 0 - OK, otherwise error code
- */
-int
-svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
-  unsigned long addr, struct svm_range *parent,
-  struct svm_range *prange)
-{
-   struct svm_range *head, *tail;
-   unsigned long start, last, size;
-   int r;
-
-   /* Align splited range start and size to granularity size, then a single
-* PTE will be used for whole range, this reduces the number of PTE
-* updated and the L1 TLB space used for translation.
-*/
-   size = 1UL << prange->granularity;
-   start = ALIGN_DOWN(addr, size);
-   last = ALIGN(addr + 1, size) - 1;
-
-   pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n",
-prange->svms, prange->start, prange->last, start, last, size);
-
-   if (start > prange->start) {
-   r = svm_range_split(prange, start, prange->last, );
-   if (r)
-   return r;
-   svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE);
-   }
-
-   if (last < prange->last) {
-   r = svm_range_split(prange, prange->start, last, );
-   if (r)
-   return r;
-   svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
-   }
-
-   /* xnack on, update mapping on GPUs with ACCESS

[PATCH] drm/amdkfd: Fix shift out-of-bounds issue and remove unused code.

2023-10-18 Thread Jesse Zhang
[  567.613292] shift exponent 255 is too large for 64-bit type 'long unsigned 
int'
[  567.614498] CPU: 5 PID: 238 Comm: kworker/5:1 Tainted: G   OE  
6.2.0-34-generic #34~22.04.1-Ubuntu
[  567.614502] Hardware name: AMD Splinter/Splinter-RPL, BIOS WS43927N_871 
09/25/2023
[  567.614504] Workqueue: events send_exception_work_handler [amdgpu]
[  567.614748] Call Trace:
[  567.614750]  
[  567.614753]  dump_stack_lvl+0x48/0x70
[  567.614761]  dump_stack+0x10/0x20
[  567.614763]  __ubsan_handle_shift_out_of_bounds+0x156/0x310
[  567.614769]  ? srso_alias_return_thunk+0x5/0x7f
[  567.614773]  ? update_sd_lb_stats.constprop.0+0xf2/0x3c0
[  567.614780]  svm_range_split_by_granularity.cold+0x2b/0x34 [amdgpu]
[  567.615047]  ? srso_alias_return_thunk+0x5/0x7f
[  567.615052]  svm_migrate_to_ram+0x185/0x4d0 [amdgpu]
[  567.615286]  do_swap_page+0x7b6/0xa30
[  567.615291]  ? srso_alias_return_thunk+0x5/0x7f
[  567.615294]  ? __free_pages+0x119/0x130
[  567.615299]  handle_pte_fault+0x227/0x280
[  567.615303]  __handle_mm_fault+0x3c0/0x720
[  567.615311]  handle_mm_fault+0x119/0x330
[  567.615314]  ? lock_mm_and_find_vma+0x44/0x250
[  567.615318]  do_user_addr_fault+0x1a9/0x640
[  567.615323]  exc_page_fault+0x81/0x1b0
[  567.615328]  asm_exc_page_fault+0x27/0x30
[  567.615332] RIP: 0010:__get_user_8+0x1c/0x30

Suggested-by: Philip Yang 
Signed-off-by: Jesse Zhang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 62 +---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  3 --
 2 files changed, 1 insertion(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 54af7a2b29f8..ccaf86a4c02a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -781,7 +781,7 @@ svm_range_apply_attrs(struct kfd_process *p, struct 
svm_range *prange,
prange->flags &= ~attrs[i].value;
break;
case KFD_IOCTL_SVM_ATTR_GRANULARITY:
-   prange->granularity = attrs[i].value;
+   prange->granularity = attrs[i].value & 0x3F;
break;
default:
WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
@@ -1139,66 +1139,6 @@ svm_range_add_child(struct svm_range *prange, struct 
mm_struct *mm,
list_add_tail(>child_list, >child_list);
 }
 
-/**
- * svm_range_split_by_granularity - collect ranges within granularity boundary
- *
- * @p: the process with svms list
- * @mm: mm structure
- * @addr: the vm fault address in pages, to split the prange
- * @parent: parent range if prange is from child list
- * @prange: prange to split
- *
- * Trims @prange to be a single aligned block of prange->granularity if
- * possible. The head and tail are added to the child_list in @parent.
- *
- * Context: caller must hold mmap_read_lock and prange->lock
- *
- * Return:
- * 0 - OK, otherwise error code
- */
-int
-svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
-  unsigned long addr, struct svm_range *parent,
-  struct svm_range *prange)
-{
-   struct svm_range *head, *tail;
-   unsigned long start, last, size;
-   int r;
-
-   /* Align splited range start and size to granularity size, then a single
-* PTE will be used for whole range, this reduces the number of PTE
-* updated and the L1 TLB space used for translation.
-*/
-   size = 1UL << prange->granularity;
-   start = ALIGN_DOWN(addr, size);
-   last = ALIGN(addr + 1, size) - 1;
-
-   pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n",
-prange->svms, prange->start, prange->last, start, last, size);
-
-   if (start > prange->start) {
-   r = svm_range_split(prange, start, prange->last, );
-   if (r)
-   return r;
-   svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE);
-   }
-
-   if (last < prange->last) {
-   r = svm_range_split(prange, prange->start, last, );
-   if (r)
-   return r;
-   svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
-   }
-
-   /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
-   if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
-   prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
-   pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
-prange, prange->start, prange->last,
-SVM_OP_ADD_RANGE_AND_MAP);
-   }
-   return 0;
-}
 static bool
 svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index be11ba0c4289..026863a0abcd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h