Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-05 Thread si-wei liu



On 11/4/2020 7:12 PM, Jason Wang wrote:


On 2020/11/5 上午7:40, si-wei liu wrote:


On 11/3/2020 6:42 PM, Jason Wang wrote:


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 
+---

  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
if (r)
  vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+else
+atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
return r;
  }
@@ -591,14 +593,16 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
  unsigned int gup_flags = FOLL_LONGTERM;
  unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-unsigned long locked, lock_limit, pinned, i;
+unsigned long lock_limit, sz2pin, nchunks, i;
  u64 iova = msg->iova;
+long pinned;
  int ret = 0;
if (vhost_iotlb_itree_first(iotlb, msg->iova,
  msg->iova + msg->size - 1))
  return -EEXIST;
  +/* Limit the use of memory for bookkeeping */
  page_list = (struct page **) __get_free_page(GFP_KERNEL);
  if (!page_list)
  return -ENOMEM;
@@ -607,52 +611,64 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  gup_flags |= FOLL_WRITE;
npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> 
PAGE_SHIFT;

-if (!npages)
-return -EINVAL;
+if (!npages) {
+ret = -EINVAL;
+goto free;
+}
mmap_read_lock(dev->mm);
  -locked = atomic64_add_return(npages, >mm->pinned_vm);
  lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-if (locked > lock_limit) {
+if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
  ret = -ENOMEM;
-goto out;
+goto unlock;
  }
cur_base = msg->uaddr & PAGE_MASK;
  iova &= PAGE_MASK;
+nchunks = 0;
while (npages) {
-pinned = min_t(unsigned long, npages, list_size);
-ret = pin_user_pages(cur_base, pinned,
- gup_flags, page_list, NULL);
-if (ret != pinned)
+sz2pin = min_t(unsigned long, npages, list_size);
+pinned = pin_user_pages(cur_base, sz2pin,
+gup_flags, page_list, NULL);
+if (sz2pin != pinned) {
+if (pinned < 0) {
+ret = pinned;
+} else {
+unpin_user_pages(page_list, pinned);
+ret = -ENOMEM;
+}
  goto out;
+}
+nchunks++;
if (!last_pfn)
  map_pfn = page_to_pfn(page_list[0]);
  -for (i = 0; i < ret; i++) {
+for (i = 0; i < pinned; i++) {
  unsigned long this_pfn = page_to_pfn(page_list[i]);
  u64 csize;
if (last_pfn && (this_pfn != last_pfn + 1)) {
  /* Pin a contiguous chunk of memory */
  csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-if (vhost_vdpa_map(v, iova, csize,
-   map_pfn << PAGE_SHIFT,
-   msg->perm))
+ret = vhost_vdpa_map(v, iova, csize,
+ map_pfn << PAGE_SHIFT,
+ msg->perm);
+if (ret)
  goto out;
+
  map_pfn = this_pfn;
  iova += csize;
+nchunks = 0;
  }
last_pfn = this_pfn;
  }
  -cur_base += ret << PAGE_SHIFT;
-npages -= ret;
+cur_base += pinned << PAGE_SHIFT;
+npages -= pinned;
  }
/* Pin the rest chunk */
@@ -660,10 +676,22 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

   map_pfn << PAGE_SHIFT, msg->perm);
  out:
  if (ret) {
+if (nchunks && last_pfn) {



Can we decrease npages where you did "nchunks++" then we can check 
npages here instead?
Hmmm, I am not sure I get what you want... @nchunks gets reset to 0 
whenever a certain range of pinned pages is successfully mapped. The 
conditional (when nchunks is non-zero) here indicates if there's any 
_outstanding_ pinned page that has to clean up in the error handling 
path. While 

Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-04 Thread Jason Wang



On 2020/11/5 上午7:40, si-wei liu wrote:


On 11/3/2020 6:42 PM, Jason Wang wrote:


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 
+---

  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
    if (r)
  vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+    else
+    atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
    return r;
  }
@@ -591,14 +593,16 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
  unsigned int gup_flags = FOLL_LONGTERM;
  unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-    unsigned long locked, lock_limit, pinned, i;
+    unsigned long lock_limit, sz2pin, nchunks, i;
  u64 iova = msg->iova;
+    long pinned;
  int ret = 0;
    if (vhost_iotlb_itree_first(iotlb, msg->iova,
  msg->iova + msg->size - 1))
  return -EEXIST;
  +    /* Limit the use of memory for bookkeeping */
  page_list = (struct page **) __get_free_page(GFP_KERNEL);
  if (!page_list)
  return -ENOMEM;
@@ -607,52 +611,64 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  gup_flags |= FOLL_WRITE;
    npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> 
PAGE_SHIFT;

-    if (!npages)
-    return -EINVAL;
+    if (!npages) {
+    ret = -EINVAL;
+    goto free;
+    }
    mmap_read_lock(dev->mm);
  -    locked = atomic64_add_return(npages, >mm->pinned_vm);
  lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-    if (locked > lock_limit) {
+    if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
  ret = -ENOMEM;
-    goto out;
+    goto unlock;
  }
    cur_base = msg->uaddr & PAGE_MASK;
  iova &= PAGE_MASK;
+    nchunks = 0;
    while (npages) {
-    pinned = min_t(unsigned long, npages, list_size);
-    ret = pin_user_pages(cur_base, pinned,
- gup_flags, page_list, NULL);
-    if (ret != pinned)
+    sz2pin = min_t(unsigned long, npages, list_size);
+    pinned = pin_user_pages(cur_base, sz2pin,
+    gup_flags, page_list, NULL);
+    if (sz2pin != pinned) {
+    if (pinned < 0) {
+    ret = pinned;
+    } else {
+    unpin_user_pages(page_list, pinned);
+    ret = -ENOMEM;
+    }
  goto out;
+    }
+    nchunks++;
    if (!last_pfn)
  map_pfn = page_to_pfn(page_list[0]);
  -    for (i = 0; i < ret; i++) {
+    for (i = 0; i < pinned; i++) {
  unsigned long this_pfn = page_to_pfn(page_list[i]);
  u64 csize;
    if (last_pfn && (this_pfn != last_pfn + 1)) {
  /* Pin a contiguous chunk of memory */
  csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-    if (vhost_vdpa_map(v, iova, csize,
-   map_pfn << PAGE_SHIFT,
-   msg->perm))
+    ret = vhost_vdpa_map(v, iova, csize,
+ map_pfn << PAGE_SHIFT,
+ msg->perm);
+    if (ret)
  goto out;
+
  map_pfn = this_pfn;
  iova += csize;
+    nchunks = 0;
  }
    last_pfn = this_pfn;
  }
  -    cur_base += ret << PAGE_SHIFT;
-    npages -= ret;
+    cur_base += pinned << PAGE_SHIFT;
+    npages -= pinned;
  }
    /* Pin the rest chunk */
@@ -660,10 +676,22 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

   map_pfn << PAGE_SHIFT, msg->perm);
  out:
  if (ret) {
+    if (nchunks && last_pfn) {



Can we decrease npages where you did "nchunks++" then we can check 
npages here instead?
Hmmm, I am not sure I get what you want... @nchunks gets reset to 0 
whenever a certain range of pinned pages is successfully mapped. The 
conditional (when nchunks is non-zero) here indicates if there's any 
_outstanding_ pinned page that has to clean up in the error handling 
path. While the decrement of @npages may not occur when 

Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-04 Thread si-wei liu



On 11/3/2020 6:42 PM, Jason Wang wrote:


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 
+---

  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
if (r)
  vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+else
+atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
return r;
  }
@@ -591,14 +593,16 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
  unsigned int gup_flags = FOLL_LONGTERM;
  unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-unsigned long locked, lock_limit, pinned, i;
+unsigned long lock_limit, sz2pin, nchunks, i;
  u64 iova = msg->iova;
+long pinned;
  int ret = 0;
if (vhost_iotlb_itree_first(iotlb, msg->iova,
  msg->iova + msg->size - 1))
  return -EEXIST;
  +/* Limit the use of memory for bookkeeping */
  page_list = (struct page **) __get_free_page(GFP_KERNEL);
  if (!page_list)
  return -ENOMEM;
@@ -607,52 +611,64 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  gup_flags |= FOLL_WRITE;
npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> 
PAGE_SHIFT;

-if (!npages)
-return -EINVAL;
+if (!npages) {
+ret = -EINVAL;
+goto free;
+}
mmap_read_lock(dev->mm);
  -locked = atomic64_add_return(npages, >mm->pinned_vm);
  lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-if (locked > lock_limit) {
+if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
  ret = -ENOMEM;
-goto out;
+goto unlock;
  }
cur_base = msg->uaddr & PAGE_MASK;
  iova &= PAGE_MASK;
+nchunks = 0;
while (npages) {
-pinned = min_t(unsigned long, npages, list_size);
-ret = pin_user_pages(cur_base, pinned,
- gup_flags, page_list, NULL);
-if (ret != pinned)
+sz2pin = min_t(unsigned long, npages, list_size);
+pinned = pin_user_pages(cur_base, sz2pin,
+gup_flags, page_list, NULL);
+if (sz2pin != pinned) {
+if (pinned < 0) {
+ret = pinned;
+} else {
+unpin_user_pages(page_list, pinned);
+ret = -ENOMEM;
+}
  goto out;
+}
+nchunks++;
if (!last_pfn)
  map_pfn = page_to_pfn(page_list[0]);
  -for (i = 0; i < ret; i++) {
+for (i = 0; i < pinned; i++) {
  unsigned long this_pfn = page_to_pfn(page_list[i]);
  u64 csize;
if (last_pfn && (this_pfn != last_pfn + 1)) {
  /* Pin a contiguous chunk of memory */
  csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-if (vhost_vdpa_map(v, iova, csize,
-   map_pfn << PAGE_SHIFT,
-   msg->perm))
+ret = vhost_vdpa_map(v, iova, csize,
+ map_pfn << PAGE_SHIFT,
+ msg->perm);
+if (ret)
  goto out;
+
  map_pfn = this_pfn;
  iova += csize;
+nchunks = 0;
  }
last_pfn = this_pfn;
  }
  -cur_base += ret << PAGE_SHIFT;
-npages -= ret;
+cur_base += pinned << PAGE_SHIFT;
+npages -= pinned;
  }
/* Pin the rest chunk */
@@ -660,10 +676,22 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

   map_pfn << PAGE_SHIFT, msg->perm);
  out:
  if (ret) {
+if (nchunks && last_pfn) {



Can we decrease npages where you did "nchunks++" then we can check 
npages here instead?
Hmmm, I am not sure I get what you want... @nchunks gets reset to 0 
whenever a certain range of pinned pages is successfully mapped. The 
conditional (when nchunks is non-zero) here indicates if there's any 
_outstanding_ pinned page that has to clean up in the error handling 
path. While the decrement of @npages may not occur when resetting the 
@nchunks counter, 

Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-03 Thread Jason Wang



On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 +---
  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
  
  	if (r)

vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+   else
+   atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
  
  	return r;

  }
@@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
unsigned int gup_flags = FOLL_LONGTERM;
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-   unsigned long locked, lock_limit, pinned, i;
+   unsigned long lock_limit, sz2pin, nchunks, i;
u64 iova = msg->iova;
+   long pinned;
int ret = 0;
  
  	if (vhost_iotlb_itree_first(iotlb, msg->iova,

msg->iova + msg->size - 1))
return -EEXIST;
  
+	/* Limit the use of memory for bookkeeping */

page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list)
return -ENOMEM;
@@ -607,52 +611,64 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
gup_flags |= FOLL_WRITE;
  
  	npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;

-   if (!npages)
-   return -EINVAL;
+   if (!npages) {
+   ret = -EINVAL;
+   goto free;
+   }
  
  	mmap_read_lock(dev->mm);
  
-	locked = atomic64_add_return(npages, >mm->pinned_vm);

lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-   if (locked > lock_limit) {
+   if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
ret = -ENOMEM;
-   goto out;
+   goto unlock;
}
  
  	cur_base = msg->uaddr & PAGE_MASK;

iova &= PAGE_MASK;
+   nchunks = 0;
  
  	while (npages) {

-   pinned = min_t(unsigned long, npages, list_size);
-   ret = pin_user_pages(cur_base, pinned,
-gup_flags, page_list, NULL);
-   if (ret != pinned)
+   sz2pin = min_t(unsigned long, npages, list_size);
+   pinned = pin_user_pages(cur_base, sz2pin,
+   gup_flags, page_list, NULL);
+   if (sz2pin != pinned) {
+   if (pinned < 0) {
+   ret = pinned;
+   } else {
+   unpin_user_pages(page_list, pinned);
+   ret = -ENOMEM;
+   }
goto out;
+   }
+   nchunks++;
  
  		if (!last_pfn)

map_pfn = page_to_pfn(page_list[0]);
  
-		for (i = 0; i < ret; i++) {

+   for (i = 0; i < pinned; i++) {
unsigned long this_pfn = page_to_pfn(page_list[i]);
u64 csize;
  
  			if (last_pfn && (this_pfn != last_pfn + 1)) {

/* Pin a contiguous chunk of memory */
csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-   if (vhost_vdpa_map(v, iova, csize,
-  map_pfn << PAGE_SHIFT,
-  msg->perm))
+   ret = vhost_vdpa_map(v, iova, csize,
+map_pfn << PAGE_SHIFT,
+msg->perm);
+   if (ret)
goto out;
+
map_pfn = this_pfn;
iova += csize;
+   nchunks = 0;
}
  
  			last_pfn = this_pfn;

}
  
-		cur_base += ret << PAGE_SHIFT;

-   npages -= ret;
+   cur_base += pinned << PAGE_SHIFT;
+   npages -= pinned;
}
  
  	/* Pin the rest chunk */

@@ -660,10 +676,22 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
 map_pfn << PAGE_SHIFT, 

Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-03 Thread si-wei liu



On 11/3/2020 5:58 PM, Jason Wang wrote:


On 2020/11/4 上午9:08, si-wei liu wrote:


On 11/3/2020 5:06 PM, si-wei liu wrote:


On 11/3/2020 5:00 AM, Jason Wang wrote:


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 
+---

  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
if (r)
  vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+else
+atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
return r;
  }
@@ -591,14 +593,16 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
  unsigned int gup_flags = FOLL_LONGTERM;
  unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-unsigned long locked, lock_limit, pinned, i;
+unsigned long lock_limit, sz2pin, nchunks, i;
  u64 iova = msg->iova;
+long pinned;
  int ret = 0;
if (vhost_iotlb_itree_first(iotlb, msg->iova,
  msg->iova + msg->size - 1))
  return -EEXIST;
  +/* Limit the use of memory for bookkeeping */
  page_list = (struct page **) __get_free_page(GFP_KERNEL);
  if (!page_list)
  return -ENOMEM;
@@ -607,52 +611,64 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  gup_flags |= FOLL_WRITE;
npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> 
PAGE_SHIFT;

-if (!npages)
-return -EINVAL;
+if (!npages) {
+ret = -EINVAL;
+goto free;
+}
mmap_read_lock(dev->mm);
  -locked = atomic64_add_return(npages, >mm->pinned_vm);
  lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-if (locked > lock_limit) {
+if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
  ret = -ENOMEM;
-goto out;
+goto unlock;
  }
cur_base = msg->uaddr & PAGE_MASK;
  iova &= PAGE_MASK;
+nchunks = 0;
while (npages) {
-pinned = min_t(unsigned long, npages, list_size);
-ret = pin_user_pages(cur_base, pinned,
- gup_flags, page_list, NULL);
-if (ret != pinned)
+sz2pin = min_t(unsigned long, npages, list_size);
+pinned = pin_user_pages(cur_base, sz2pin,
+gup_flags, page_list, NULL);
+if (sz2pin != pinned) {
+if (pinned < 0) {
+ret = pinned;
+} else {
+unpin_user_pages(page_list, pinned);
+ret = -ENOMEM;
+}
  goto out;
+}
+nchunks++;
if (!last_pfn)
  map_pfn = page_to_pfn(page_list[0]);
  -for (i = 0; i < ret; i++) {
+for (i = 0; i < pinned; i++) {
  unsigned long this_pfn = page_to_pfn(page_list[i]);
  u64 csize;
if (last_pfn && (this_pfn != last_pfn + 1)) {
  /* Pin a contiguous chunk of memory */
  csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-if (vhost_vdpa_map(v, iova, csize,
-   map_pfn << PAGE_SHIFT,
-   msg->perm))
+ret = vhost_vdpa_map(v, iova, csize,
+ map_pfn << PAGE_SHIFT,
+ msg->perm);
+if (ret)
  goto out;
+
  map_pfn = this_pfn;
  iova += csize;
+nchunks = 0;
  }
last_pfn = this_pfn;
  }
  -cur_base += ret << PAGE_SHIFT;
-npages -= ret;
+cur_base += pinned << PAGE_SHIFT;
+npages -= pinned;
  }
/* Pin the rest chunk */
@@ -660,10 +676,22 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

   map_pfn << PAGE_SHIFT, msg->perm);
  out:
  if (ret) {
+if (nchunks && last_pfn) {
+unsigned long pfn;
+
+/*
+ * Unpin the outstanding pages which are unmapped.
+ * Mapped pages are accounted in vdpa_map(), thus
+ * will be handled by vdpa_unmap().
+ */
+for (pfn = map_pfn; pfn <= last_pfn; pfn++)
+

Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-03 Thread Jason Wang



On 2020/11/4 上午9:08, si-wei liu wrote:


On 11/3/2020 5:06 PM, si-wei liu wrote:


On 11/3/2020 5:00 AM, Jason Wang wrote:


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 
+---

  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
    if (r)
  vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+    else
+    atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
    return r;
  }
@@ -591,14 +593,16 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
  unsigned int gup_flags = FOLL_LONGTERM;
  unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-    unsigned long locked, lock_limit, pinned, i;
+    unsigned long lock_limit, sz2pin, nchunks, i;
  u64 iova = msg->iova;
+    long pinned;
  int ret = 0;
    if (vhost_iotlb_itree_first(iotlb, msg->iova,
  msg->iova + msg->size - 1))
  return -EEXIST;
  +    /* Limit the use of memory for bookkeeping */
  page_list = (struct page **) __get_free_page(GFP_KERNEL);
  if (!page_list)
  return -ENOMEM;
@@ -607,52 +611,64 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  gup_flags |= FOLL_WRITE;
    npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> 
PAGE_SHIFT;

-    if (!npages)
-    return -EINVAL;
+    if (!npages) {
+    ret = -EINVAL;
+    goto free;
+    }
    mmap_read_lock(dev->mm);
  -    locked = atomic64_add_return(npages, >mm->pinned_vm);
  lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-    if (locked > lock_limit) {
+    if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
  ret = -ENOMEM;
-    goto out;
+    goto unlock;
  }
    cur_base = msg->uaddr & PAGE_MASK;
  iova &= PAGE_MASK;
+    nchunks = 0;
    while (npages) {
-    pinned = min_t(unsigned long, npages, list_size);
-    ret = pin_user_pages(cur_base, pinned,
- gup_flags, page_list, NULL);
-    if (ret != pinned)
+    sz2pin = min_t(unsigned long, npages, list_size);
+    pinned = pin_user_pages(cur_base, sz2pin,
+    gup_flags, page_list, NULL);
+    if (sz2pin != pinned) {
+    if (pinned < 0) {
+    ret = pinned;
+    } else {
+    unpin_user_pages(page_list, pinned);
+    ret = -ENOMEM;
+    }
  goto out;
+    }
+    nchunks++;
    if (!last_pfn)
  map_pfn = page_to_pfn(page_list[0]);
  -    for (i = 0; i < ret; i++) {
+    for (i = 0; i < pinned; i++) {
  unsigned long this_pfn = page_to_pfn(page_list[i]);
  u64 csize;
    if (last_pfn && (this_pfn != last_pfn + 1)) {
  /* Pin a contiguous chunk of memory */
  csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-    if (vhost_vdpa_map(v, iova, csize,
-   map_pfn << PAGE_SHIFT,
-   msg->perm))
+    ret = vhost_vdpa_map(v, iova, csize,
+ map_pfn << PAGE_SHIFT,
+ msg->perm);
+    if (ret)
  goto out;
+
  map_pfn = this_pfn;
  iova += csize;
+    nchunks = 0;
  }
    last_pfn = this_pfn;
  }
  -    cur_base += ret << PAGE_SHIFT;
-    npages -= ret;
+    cur_base += pinned << PAGE_SHIFT;
+    npages -= pinned;
  }
    /* Pin the rest chunk */
@@ -660,10 +676,22 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

   map_pfn << PAGE_SHIFT, msg->perm);
  out:
  if (ret) {
+    if (nchunks && last_pfn) {
+    unsigned long pfn;
+
+    /*
+ * Unpin the outstanding pages which are unmapped.
+ * Mapped pages are accounted in vdpa_map(), thus
+ * will be handled by vdpa_unmap().
+ */
+    for (pfn = map_pfn; pfn <= last_pfn; pfn++)
+    unpin_user_page(pfn_to_page(pfn));
+    }
  

Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-03 Thread si-wei liu



On 11/3/2020 5:06 PM, si-wei liu wrote:


On 11/3/2020 5:00 AM, Jason Wang wrote:


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 
+---

  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
if (r)
  vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+else
+atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
return r;
  }
@@ -591,14 +593,16 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
  unsigned int gup_flags = FOLL_LONGTERM;
  unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-unsigned long locked, lock_limit, pinned, i;
+unsigned long lock_limit, sz2pin, nchunks, i;
  u64 iova = msg->iova;
+long pinned;
  int ret = 0;
if (vhost_iotlb_itree_first(iotlb, msg->iova,
  msg->iova + msg->size - 1))
  return -EEXIST;
  +/* Limit the use of memory for bookkeeping */
  page_list = (struct page **) __get_free_page(GFP_KERNEL);
  if (!page_list)
  return -ENOMEM;
@@ -607,52 +611,64 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  gup_flags |= FOLL_WRITE;
npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> 
PAGE_SHIFT;

-if (!npages)
-return -EINVAL;
+if (!npages) {
+ret = -EINVAL;
+goto free;
+}
mmap_read_lock(dev->mm);
  -locked = atomic64_add_return(npages, >mm->pinned_vm);
  lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-if (locked > lock_limit) {
+if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
  ret = -ENOMEM;
-goto out;
+goto unlock;
  }
cur_base = msg->uaddr & PAGE_MASK;
  iova &= PAGE_MASK;
+nchunks = 0;
while (npages) {
-pinned = min_t(unsigned long, npages, list_size);
-ret = pin_user_pages(cur_base, pinned,
- gup_flags, page_list, NULL);
-if (ret != pinned)
+sz2pin = min_t(unsigned long, npages, list_size);
+pinned = pin_user_pages(cur_base, sz2pin,
+gup_flags, page_list, NULL);
+if (sz2pin != pinned) {
+if (pinned < 0) {
+ret = pinned;
+} else {
+unpin_user_pages(page_list, pinned);
+ret = -ENOMEM;
+}
  goto out;
+}
+nchunks++;
if (!last_pfn)
  map_pfn = page_to_pfn(page_list[0]);
  -for (i = 0; i < ret; i++) {
+for (i = 0; i < pinned; i++) {
  unsigned long this_pfn = page_to_pfn(page_list[i]);
  u64 csize;
if (last_pfn && (this_pfn != last_pfn + 1)) {
  /* Pin a contiguous chunk of memory */
  csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-if (vhost_vdpa_map(v, iova, csize,
-   map_pfn << PAGE_SHIFT,
-   msg->perm))
+ret = vhost_vdpa_map(v, iova, csize,
+ map_pfn << PAGE_SHIFT,
+ msg->perm);
+if (ret)
  goto out;
+
  map_pfn = this_pfn;
  iova += csize;
+nchunks = 0;
  }
last_pfn = this_pfn;
  }
  -cur_base += ret << PAGE_SHIFT;
-npages -= ret;
+cur_base += pinned << PAGE_SHIFT;
+npages -= pinned;
  }
/* Pin the rest chunk */
@@ -660,10 +676,22 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

   map_pfn << PAGE_SHIFT, msg->perm);
  out:
  if (ret) {
+if (nchunks && last_pfn) {
+unsigned long pfn;
+
+/*
+ * Unpin the outstanding pages which are unmapped.
+ * Mapped pages are accounted in vdpa_map(), thus
+ * will be handled by vdpa_unmap().
+ */
+for (pfn = map_pfn; pfn <= last_pfn; pfn++)
+unpin_user_page(pfn_to_page(pfn));
+}
  vhost_vdpa_unmap(v, msg->iova, msg->size);



I want 

Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-03 Thread si-wei liu



On 11/3/2020 5:00 AM, Jason Wang wrote:


On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 
+---

  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
if (r)
  vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+else
+atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
return r;
  }
@@ -591,14 +593,16 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
  unsigned int gup_flags = FOLL_LONGTERM;
  unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-unsigned long locked, lock_limit, pinned, i;
+unsigned long lock_limit, sz2pin, nchunks, i;
  u64 iova = msg->iova;
+long pinned;
  int ret = 0;
if (vhost_iotlb_itree_first(iotlb, msg->iova,
  msg->iova + msg->size - 1))
  return -EEXIST;
  +/* Limit the use of memory for bookkeeping */
  page_list = (struct page **) __get_free_page(GFP_KERNEL);
  if (!page_list)
  return -ENOMEM;
@@ -607,52 +611,64 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

  gup_flags |= FOLL_WRITE;
npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> 
PAGE_SHIFT;

-if (!npages)
-return -EINVAL;
+if (!npages) {
+ret = -EINVAL;
+goto free;
+}
mmap_read_lock(dev->mm);
  -locked = atomic64_add_return(npages, >mm->pinned_vm);
  lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-if (locked > lock_limit) {
+if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
  ret = -ENOMEM;
-goto out;
+goto unlock;
  }
cur_base = msg->uaddr & PAGE_MASK;
  iova &= PAGE_MASK;
+nchunks = 0;
while (npages) {
-pinned = min_t(unsigned long, npages, list_size);
-ret = pin_user_pages(cur_base, pinned,
- gup_flags, page_list, NULL);
-if (ret != pinned)
+sz2pin = min_t(unsigned long, npages, list_size);
+pinned = pin_user_pages(cur_base, sz2pin,
+gup_flags, page_list, NULL);
+if (sz2pin != pinned) {
+if (pinned < 0) {
+ret = pinned;
+} else {
+unpin_user_pages(page_list, pinned);
+ret = -ENOMEM;
+}
  goto out;
+}
+nchunks++;
if (!last_pfn)
  map_pfn = page_to_pfn(page_list[0]);
  -for (i = 0; i < ret; i++) {
+for (i = 0; i < pinned; i++) {
  unsigned long this_pfn = page_to_pfn(page_list[i]);
  u64 csize;
if (last_pfn && (this_pfn != last_pfn + 1)) {
  /* Pin a contiguous chunk of memory */
  csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-if (vhost_vdpa_map(v, iova, csize,
-   map_pfn << PAGE_SHIFT,
-   msg->perm))
+ret = vhost_vdpa_map(v, iova, csize,
+ map_pfn << PAGE_SHIFT,
+ msg->perm);
+if (ret)
  goto out;
+
  map_pfn = this_pfn;
  iova += csize;
+nchunks = 0;
  }
last_pfn = this_pfn;
  }
  -cur_base += ret << PAGE_SHIFT;
-npages -= ret;
+cur_base += pinned << PAGE_SHIFT;
+npages -= pinned;
  }
/* Pin the rest chunk */
@@ -660,10 +676,22 @@ static int 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,

   map_pfn << PAGE_SHIFT, msg->perm);
  out:
  if (ret) {
+if (nchunks && last_pfn) {
+unsigned long pfn;
+
+/*
+ * Unpin the outstanding pages which are unmapped.
+ * Mapped pages are accounted in vdpa_map(), thus
+ * will be handled by vdpa_unmap().
+ */
+for (pfn = map_pfn; pfn <= last_pfn; pfn++)
+unpin_user_page(pfn_to_page(pfn));
+}
  vhost_vdpa_unmap(v, msg->iova, msg->size);



I want to know what's wrong with current code.


Re: [PATCH 2/2] vhost-vdpa: fix page pinning leakage in error path (rework)

2020-11-03 Thread Jason Wang



On 2020/10/30 下午3:45, Si-Wei Liu wrote:

Pinned pages are not properly accounted particularly when
mapping error occurs on IOTLB update. Clean up dangling
pinned pages for the error path.

The memory usage for bookkeeping pinned pages is reverted
to what it was before: only one single free page is needed.
This helps reduce the host memory demand for VM with a large
amount of memory, or in the situation where host is running
short of free memory.

Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend")
Signed-off-by: Si-Wei Liu 
---
  drivers/vhost/vdpa.c | 64 +---
  1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b6d9016..8da8558 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -560,6 +560,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
  
  	if (r)

vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
+   else
+   atomic64_add(size >> PAGE_SHIFT, >mm->pinned_vm);
  
  	return r;

  }
@@ -591,14 +593,16 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
unsigned int gup_flags = FOLL_LONGTERM;
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
-   unsigned long locked, lock_limit, pinned, i;
+   unsigned long lock_limit, sz2pin, nchunks, i;
u64 iova = msg->iova;
+   long pinned;
int ret = 0;
  
  	if (vhost_iotlb_itree_first(iotlb, msg->iova,

msg->iova + msg->size - 1))
return -EEXIST;
  
+	/* Limit the use of memory for bookkeeping */

page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list)
return -ENOMEM;
@@ -607,52 +611,64 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
gup_flags |= FOLL_WRITE;
  
  	npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;

-   if (!npages)
-   return -EINVAL;
+   if (!npages) {
+   ret = -EINVAL;
+   goto free;
+   }
  
  	mmap_read_lock(dev->mm);
  
-	locked = atomic64_add_return(npages, >mm->pinned_vm);

lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-   if (locked > lock_limit) {
+   if (npages + atomic64_read(>mm->pinned_vm) > lock_limit) {
ret = -ENOMEM;
-   goto out;
+   goto unlock;
}
  
  	cur_base = msg->uaddr & PAGE_MASK;

iova &= PAGE_MASK;
+   nchunks = 0;
  
  	while (npages) {

-   pinned = min_t(unsigned long, npages, list_size);
-   ret = pin_user_pages(cur_base, pinned,
-gup_flags, page_list, NULL);
-   if (ret != pinned)
+   sz2pin = min_t(unsigned long, npages, list_size);
+   pinned = pin_user_pages(cur_base, sz2pin,
+   gup_flags, page_list, NULL);
+   if (sz2pin != pinned) {
+   if (pinned < 0) {
+   ret = pinned;
+   } else {
+   unpin_user_pages(page_list, pinned);
+   ret = -ENOMEM;
+   }
goto out;
+   }
+   nchunks++;
  
  		if (!last_pfn)

map_pfn = page_to_pfn(page_list[0]);
  
-		for (i = 0; i < ret; i++) {

+   for (i = 0; i < pinned; i++) {
unsigned long this_pfn = page_to_pfn(page_list[i]);
u64 csize;
  
  			if (last_pfn && (this_pfn != last_pfn + 1)) {

/* Pin a contiguous chunk of memory */
csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
-   if (vhost_vdpa_map(v, iova, csize,
-  map_pfn << PAGE_SHIFT,
-  msg->perm))
+   ret = vhost_vdpa_map(v, iova, csize,
+map_pfn << PAGE_SHIFT,
+msg->perm);
+   if (ret)
goto out;
+
map_pfn = this_pfn;
iova += csize;
+   nchunks = 0;
}
  
  			last_pfn = this_pfn;

}
  
-		cur_base += ret << PAGE_SHIFT;

-   npages -= ret;
+   cur_base += pinned << PAGE_SHIFT;
+   npages -= pinned;
}
  
  	/* Pin the rest chunk */

@@ -660,10 +676,22 @@ static int vhost_vdpa_process_iotlb_update(struct 
vhost_vdpa *v,
 map_pfn << PAGE_SHIFT,