Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-05-11 Thread Christoph Hellwig
It might make sense to move filemap_range_has_page into a separate
prep patch.

Otherwise this looks fine:

Reviewed-by: Christoph Hellwig 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-05-09 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

Find out if the write will trigger a wait due to writeback. If yes,
return -EAGAIN.

This introduces a new function filemap_range_has_page() which
returns true if the file's mapping has a page within the range
mentioned.

Return -EINVAL for buffered AIO: there are multiple causes of
delay such as page locks, dirty throttling logic, page loading
from disk etc. which cannot be taken care of.

Signed-off-by: Goldwyn Rodrigues 
---
 include/linux/fs.h |  2 ++
 mm/filemap.c   | 50 +++---
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4cb62e032b70..24d5c123788f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2514,6 +2514,8 @@ extern int filemap_fdatawait(struct address_space *);
 extern void filemap_fdatawait_keep_errors(struct address_space *);
 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
   loff_t lend);
+extern int filemap_range_has_page(struct address_space *, loff_t lstart,
+  loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend);
diff --git a/mm/filemap.c b/mm/filemap.c
index d51670b7fe6b..48b83d1d4a30 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -376,6 +376,39 @@ int filemap_flush(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_flush);
 
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping:   address space structure to wait for
+ * @start_byte:offset in bytes where the range starts
+ * @end_byte:  offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+int filemap_range_has_page(struct address_space *mapping,
+   loff_t start_byte, loff_t end_byte)
+{
+   pgoff_t index = start_byte >> PAGE_SHIFT;
+   pgoff_t end = end_byte >> PAGE_SHIFT;
+   struct pagevec pvec;
+   int ret;
+
+   if (end_byte < start_byte)
+   return 0;
+
+   if (mapping->nrpages == 0)
+   return 0;
+
+   pagevec_init(, 0);
+   ret = pagevec_lookup(, mapping, index, 1);
+   if (!ret)
+   return 0;
+   ret = (pvec.pages[0]->index <= end);
+   pagevec_release();
+   return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
 static int __filemap_fdatawait_range(struct address_space *mapping,
 loff_t start_byte, loff_t end_byte)
 {
@@ -2640,6 +2673,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, 
struct iov_iter *from)
 
pos = iocb->ki_pos;
 
+   if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+   return -EINVAL;
+
if (limit != RLIM_INFINITY) {
if (iocb->ki_pos >= limit) {
send_sig(SIGXFSZ, current, 0);
@@ -2709,9 +2745,17 @@ generic_file_direct_write(struct kiocb *iocb, struct 
iov_iter *from)
write_len = iov_iter_count(from);
end = (pos + write_len - 1) >> PAGE_SHIFT;
 
-   written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 
1);
-   if (written)
-   goto out;
+   if (iocb->ki_flags & IOCB_NOWAIT) {
+   /* If there are pages to writeback, return */
+   if (filemap_range_has_page(inode->i_mapping, pos,
+  pos + iov_iter_count(from)))
+   return -EAGAIN;
+   } else {
+   written = filemap_write_and_wait_range(mapping, pos,
+   pos + write_len - 1);
+   if (written)
+   goto out;
+   }
 
/*
 * After a write we want buffered reads to be sure to go to disk to get
-- 
2.12.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-04-14 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

Find out if the write will trigger a wait due to writeback. If yes,
return -EAGAIN.

This introduces a new function filemap_range_has_page() which
returns true if the file's mapping has a page within the range
mentioned.

Return -EINVAL for buffered AIO: there are multiple causes of
delay such as page locks, dirty throttling logic, page loading
from disk etc. which cannot be taken care of.

Signed-off-by: Goldwyn Rodrigues 
---
 include/linux/fs.h |  2 ++
 mm/filemap.c   | 50 +++---
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e44de1c981a0..b14eab5daeb2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2514,6 +2514,8 @@ extern int filemap_fdatawait(struct address_space *);
 extern void filemap_fdatawait_keep_errors(struct address_space *);
 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
   loff_t lend);
+extern int filemap_range_has_page(struct address_space *, loff_t lstart,
+  loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend);
diff --git a/mm/filemap.c b/mm/filemap.c
index d51670b7fe6b..48b83d1d4a30 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -376,6 +376,39 @@ int filemap_flush(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_flush);
 
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping:   address space structure to wait for
+ * @start_byte:offset in bytes where the range starts
+ * @end_byte:  offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+int filemap_range_has_page(struct address_space *mapping,
+   loff_t start_byte, loff_t end_byte)
+{
+   pgoff_t index = start_byte >> PAGE_SHIFT;
+   pgoff_t end = end_byte >> PAGE_SHIFT;
+   struct pagevec pvec;
+   int ret;
+
+   if (end_byte < start_byte)
+   return 0;
+
+   if (mapping->nrpages == 0)
+   return 0;
+
+   pagevec_init(, 0);
+   ret = pagevec_lookup(, mapping, index, 1);
+   if (!ret)
+   return 0;
+   ret = (pvec.pages[0]->index <= end);
+   pagevec_release();
+   return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
 static int __filemap_fdatawait_range(struct address_space *mapping,
 loff_t start_byte, loff_t end_byte)
 {
@@ -2640,6 +2673,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, 
struct iov_iter *from)
 
pos = iocb->ki_pos;
 
+   if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+   return -EINVAL;
+
if (limit != RLIM_INFINITY) {
if (iocb->ki_pos >= limit) {
send_sig(SIGXFSZ, current, 0);
@@ -2709,9 +2745,17 @@ generic_file_direct_write(struct kiocb *iocb, struct 
iov_iter *from)
write_len = iov_iter_count(from);
end = (pos + write_len - 1) >> PAGE_SHIFT;
 
-   written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 
1);
-   if (written)
-   goto out;
+   if (iocb->ki_flags & IOCB_NOWAIT) {
+   /* If there are pages to writeback, return */
+   if (filemap_range_has_page(inode->i_mapping, pos,
+  pos + iov_iter_count(from)))
+   return -EAGAIN;
+   } else {
+   written = filemap_write_and_wait_range(mapping, pos,
+   pos + write_len - 1);
+   if (written)
+   goto out;
+   }
 
/*
 * After a write we want buffered reads to be sure to go to disk to get
-- 
2.12.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-04-03 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

Find out if the write will trigger a wait due to writeback. If yes,
return -EAGAIN.

This introduces a new function filemap_range_has_page() which
returns true if the file's mapping has a page within the range
mentioned.

Return -EINVAL for buffered AIO: there are multiple causes of
delay such as page locks, dirty throttling logic, page loading
from disk etc. which cannot be taken care of.
---
 include/linux/fs.h |  2 ++
 mm/filemap.c   | 50 +++---
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 802cfe2..4721136 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2515,6 +2515,8 @@ extern int filemap_fdatawait(struct address_space *);
 extern void filemap_fdatawait_keep_errors(struct address_space *);
 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
   loff_t lend);
+extern int filemap_range_has_page(struct address_space *, loff_t lstart,
+  loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend);
diff --git a/mm/filemap.c b/mm/filemap.c
index e08f3b9..c020e23 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -376,6 +376,39 @@ int filemap_flush(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_flush);
 
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping:   address space structure to wait for
+ * @start_byte:offset in bytes where the range starts
+ * @end_byte:  offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+int filemap_range_has_page(struct address_space *mapping,
+   loff_t start_byte, loff_t end_byte)
+{
+   pgoff_t index = start_byte >> PAGE_SHIFT;
+   pgoff_t end = end_byte >> PAGE_SHIFT;
+   struct pagevec pvec;
+   int ret;
+
+   if (end_byte < start_byte)
+   return 0;
+
+   if (mapping->nrpages == 0)
+   return 0;
+
+   pagevec_init(, 0);
+   ret = pagevec_lookup(, mapping, index, 1);
+   if (!ret)
+   return 0;
+   ret = (pvec.pages[0]->index <= end);
+   pagevec_release();
+   return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
 static int __filemap_fdatawait_range(struct address_space *mapping,
 loff_t start_byte, loff_t end_byte)
 {
@@ -2640,6 +2673,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, 
struct iov_iter *from)
 
pos = iocb->ki_pos;
 
+   if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+   return -EINVAL;
+
if (limit != RLIM_INFINITY) {
if (iocb->ki_pos >= limit) {
send_sig(SIGXFSZ, current, 0);
@@ -2709,9 +2745,17 @@ generic_file_direct_write(struct kiocb *iocb, struct 
iov_iter *from)
write_len = iov_iter_count(from);
end = (pos + write_len - 1) >> PAGE_SHIFT;
 
-   written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 
1);
-   if (written)
-   goto out;
+   if (iocb->ki_flags & IOCB_NOWAIT) {
+   /* If there are pages to writeback, return */
+   if (filemap_range_has_page(inode->i_mapping, pos,
+  pos + iov_iter_count(from)))
+   return -EAGAIN;
+   } else {
+   written = filemap_write_and_wait_range(mapping, pos,
+   pos + write_len - 1);
+   if (written)
+   goto out;
+   }
 
/*
 * After a write we want buffered reads to be sure to go to disk to get
-- 
2.10.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-03-16 Thread Goldwyn Rodrigues


On 03/16/2017 08:20 AM, Matthew Wilcox wrote:
> On Wed, Mar 15, 2017 at 04:51:02PM -0500, Goldwyn Rodrigues wrote:
>> From: Goldwyn Rodrigues 
>>
>> Find out if the write will trigger a wait due to writeback. If yes,
>> return -EAGAIN.
>>
>> This introduces a new function filemap_range_has_page() which
>> returns true if the file's mapping has a page within the range
>> mentioned.
>>
>> Return -EINVAL for buffered AIO: there are multiple causes of
>> delay such as page locks, dirty throttling logic, page loading
>> from disk etc. which cannot be taken care of.
> 
> Also, this patch only touches the write path; we have a similar call to
> write_and_wait_range() in generic_file_read_iter().
> 

This patch series is concerned with direct-write AIO paths only.

-- 
Goldwyn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-03-16 Thread Goldwyn Rodrigues


On 03/16/2017 08:08 AM, Matthew Wilcox wrote:
> On Wed, Mar 15, 2017 at 04:51:02PM -0500, Goldwyn Rodrigues wrote:
>> This introduces a new function filemap_range_has_page() which
>> returns true if the file's mapping has a page within the range
>> mentioned.
> 
> I thought you were going to replace this patch with one that starts
> writeback for these pages but does not wait for them?
> 

As mentioned by Jan, Flags to filemap_write_and_wait_range are
unnecessarily complicated. The AIO-DIO API users who eye for performance
usually are careful with page writes/evictions. As a fallback, they can
(and should) go the wait route (without  IOCB_RW_FLAG_NOWAIT).

Finally, my take on this is that we don't want to perform tasks for a
following system call, which may or may not immediately follow the
current one. May not, because an application (DB) will offload the task
from the CPU thread to the I/O thread in case of -EAGAIN. A system call
should be complete in itself (and do the minimum, what is asked).


-- 
Goldwyn
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-03-16 Thread Matthew Wilcox
On Wed, Mar 15, 2017 at 04:51:02PM -0500, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues 
> 
> Find out if the write will trigger a wait due to writeback. If yes,
> return -EAGAIN.
> 
> This introduces a new function filemap_range_has_page() which
> returns true if the file's mapping has a page within the range
> mentioned.
> 
> Return -EINVAL for buffered AIO: there are multiple causes of
> delay such as page locks, dirty throttling logic, page loading
> from disk etc. which cannot be taken care of.

Also, this patch only touches the write path; we have a similar call to
write_and_wait_range() in generic_file_read_iter().

Actually, why do we even have that?  Why can't we satisfy an O_DIRECT
read from the cache?
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-03-16 Thread Matthew Wilcox
On Wed, Mar 15, 2017 at 04:51:02PM -0500, Goldwyn Rodrigues wrote:
> This introduces a new function filemap_range_has_page() which
> returns true if the file's mapping has a page within the range
> mentioned.

I thought you were going to replace this patch with one that starts
writeback for these pages but does not wait for them?

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-03-15 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

Find out if the write will trigger a wait due to writeback. If yes,
return -EAGAIN.

This introduces a new function filemap_range_has_page() which
returns true if the file's mapping has a page within the range
mentioned.

Return -EINVAL for buffered AIO: there are multiple causes of
delay such as page locks, dirty throttling logic, page loading
from disk etc. which cannot be taken care of.

Signed-off-by: Goldwyn Rodrigues 
---
 include/linux/fs.h |  2 ++
 mm/filemap.c   | 50 +++---
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e8d9346..4a30e8f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2514,6 +2514,8 @@ extern int filemap_fdatawait(struct address_space *);
 extern void filemap_fdatawait_keep_errors(struct address_space *);
 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
   loff_t lend);
+extern int filemap_range_has_page(struct address_space *, loff_t lstart,
+  loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend);
diff --git a/mm/filemap.c b/mm/filemap.c
index e08f3b9..c020e23 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -376,6 +376,39 @@ int filemap_flush(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_flush);
 
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping:   address space structure to wait for
+ * @start_byte:offset in bytes where the range starts
+ * @end_byte:  offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+int filemap_range_has_page(struct address_space *mapping,
+   loff_t start_byte, loff_t end_byte)
+{
+   pgoff_t index = start_byte >> PAGE_SHIFT;
+   pgoff_t end = end_byte >> PAGE_SHIFT;
+   struct pagevec pvec;
+   int ret;
+
+   if (end_byte < start_byte)
+   return 0;
+
+   if (mapping->nrpages == 0)
+   return 0;
+
+   pagevec_init(, 0);
+   ret = pagevec_lookup(, mapping, index, 1);
+   if (!ret)
+   return 0;
+   ret = (pvec.pages[0]->index <= end);
+   pagevec_release();
+   return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
 static int __filemap_fdatawait_range(struct address_space *mapping,
 loff_t start_byte, loff_t end_byte)
 {
@@ -2640,6 +2673,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, 
struct iov_iter *from)
 
pos = iocb->ki_pos;
 
+   if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+   return -EINVAL;
+
if (limit != RLIM_INFINITY) {
if (iocb->ki_pos >= limit) {
send_sig(SIGXFSZ, current, 0);
@@ -2709,9 +2745,17 @@ generic_file_direct_write(struct kiocb *iocb, struct 
iov_iter *from)
write_len = iov_iter_count(from);
end = (pos + write_len - 1) >> PAGE_SHIFT;
 
-   written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 
1);
-   if (written)
-   goto out;
+   if (iocb->ki_flags & IOCB_NOWAIT) {
+   /* If there are pages to writeback, return */
+   if (filemap_range_has_page(inode->i_mapping, pos,
+  pos + iov_iter_count(from)))
+   return -EAGAIN;
+   } else {
+   written = filemap_write_and_wait_range(mapping, pos,
+   pos + write_len - 1);
+   if (written)
+   goto out;
+   }
 
/*
 * After a write we want buffered reads to be sure to go to disk to get
-- 
2.10.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-03-02 Thread Jan Kara
On Thu 02-03-17 06:12:45, Matthew Wilcox wrote:
> On Thu, Mar 02, 2017 at 11:38:45AM +0100, Jan Kara wrote:
> > On Wed 01-03-17 07:38:57, Christoph Hellwig wrote:
> > > On Tue, Feb 28, 2017 at 07:46:06PM -0800, Matthew Wilcox wrote:
> > > > But what's going to kick these pages out of cache?  Shouldn't we rather
> > > > find the pages, kick them out if clean, start writeback if not, and 
> > > > *then*
> > > > return -EAGAIN?
> > > 
> > > As pointed out in the last round of these patches I think we really
> > > need to pass a flags argument to filemap_write_and_wait_range to
> > > communicate the non-blocking nature and only return -EAGAIN if we'd
> > > block.  As a bonus that can indeed start to kick the pages out.
> > 
> > Aren't flags to filemap_write_and_wait_range() unnecessary complication?
> > Realistically, most users wanting performance from AIO DIO so badly that
> > they bother with this API won't have any pages to write / evict. If they do
> > by some bad accident, they can fall back to standard "blocking" AIO DIO.
> > So I don't see much value in teaching filemap_write_and_wait_range() about
> > a non-blocking mode...
> 
> That lets me execute a DoS against a user using this API.  All I have
> to do is open the file they're using read-only and read a byte from it.
> Page goes into page-cache, and they'll only get -EAGAIN from calling
> this syscall until the page ages out.

It will not be a DoS. This non-blocking AIO can always return EAGAIN when
it feels like it and the caller is required to fall back to a blocking
version in that case if he wants to guarantee forward progress. It is just
a performance optimization which allows user (database) to submit IO from a
computation thread instead of having to offload it to an IO thread...

> Also, I don't understand why this is a flag.  Isn't the point of AIO to
> be non-blocking?  Why isn't this just a change to how we do AIO?

Because this is an API change and the caller has to implement some handling
to guarantee a forward progress of non-blocking IO...

Honza
-- 
Jan Kara 
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-03-02 Thread Matthew Wilcox
On Thu, Mar 02, 2017 at 11:38:45AM +0100, Jan Kara wrote:
> On Wed 01-03-17 07:38:57, Christoph Hellwig wrote:
> > On Tue, Feb 28, 2017 at 07:46:06PM -0800, Matthew Wilcox wrote:
> > > But what's going to kick these pages out of cache?  Shouldn't we rather
> > > find the pages, kick them out if clean, start writeback if not, and *then*
> > > return -EAGAIN?
> > 
> > As pointed out in the last round of these patches I think we really
> > need to pass a flags argument to filemap_write_and_wait_range to
> > communicate the non-blocking nature and only return -EAGAIN if we'd
> > block.  As a bonus that can indeed start to kick the pages out.
> 
> Aren't flags to filemap_write_and_wait_range() unnecessary complication?
> Realistically, most users wanting performance from AIO DIO so badly that
> they bother with this API won't have any pages to write / evict. If they do
> by some bad accident, they can fall back to standard "blocking" AIO DIO.
> So I don't see much value in teaching filemap_write_and_wait_range() about
> a non-blocking mode...

That lets me execute a DoS against a user using this API.  All I have
to do is open the file they're using read-only and read a byte from it.
Page goes into page-cache, and they'll only get -EAGAIN from calling
this syscall until the page ages out.

Also, I don't understand why this is a flag.  Isn't the point of AIO to
be non-blocking?  Why isn't this just a change to how we do AIO?
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-03-02 Thread Jan Kara
On Wed 01-03-17 07:38:57, Christoph Hellwig wrote:
> On Tue, Feb 28, 2017 at 07:46:06PM -0800, Matthew Wilcox wrote:
> > Ugh, this is pretty inefficient.  If that's all you want to know, then
> > using the radix tree directly will be far more efficient than spinning
> > up all the pagevec machinery only to discard the pages found.
> > 
> > But what's going to kick these pages out of cache?  Shouldn't we rather
> > find the pages, kick them out if clean, start writeback if not, and *then*
> > return -EAGAIN?
> > 
> > So maybe we want to spin up the pagevec machinery after all so we can
> > do that extra work?
> 
> As pointed out in the last round of these patches I think we really
> need to pass a flags argument to filemap_write_and_wait_range to
> communicate the non-blocking nature and only return -EAGAIN if we'd
> block.  As a bonus that can indeed start to kick the pages out.

Aren't flags to filemap_write_and_wait_range() unnecessary complication?
Realistically, most users wanting performance from AIO DIO so badly that
they bother with this API won't have any pages to write / evict. If they do
by some bad accident, they can fall back to standard "blocking" AIO DIO.
So I don't see much value in teaching filemap_write_and_wait_range() about
a non-blocking mode...

Honza

-- 
Jan Kara 
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-03-01 Thread Christoph Hellwig
On Tue, Feb 28, 2017 at 07:46:06PM -0800, Matthew Wilcox wrote:
> Ugh, this is pretty inefficient.  If that's all you want to know, then
> using the radix tree directly will be far more efficient than spinning
> up all the pagevec machinery only to discard the pages found.
> 
> But what's going to kick these pages out of cache?  Shouldn't we rather
> find the pages, kick them out if clean, start writeback if not, and *then*
> return -EAGAIN?
> 
> So maybe we want to spin up the pagevec machinery after all so we can
> do that extra work?

As pointed out in the last round of these patches I think we really
need to pass a flags argument to filemap_write_and_wait_range to
communicate the non-blocking nature and only return -EAGAIN if we'd
block.  As a bonus that can indeed start to kick the pages out.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-02-28 Thread Matthew Wilcox
On Tue, Feb 28, 2017 at 05:36:05PM -0600, Goldwyn Rodrigues wrote:
> Find out if the write will trigger a wait due to writeback. If yes,
> return -EAGAIN.
> 
> This introduces a new function filemap_range_has_page() which
> returns true if the file's mapping has a page within the range
> mentioned.

Ugh, this is pretty inefficient.  If that's all you want to know, then
using the radix tree directly will be far more efficient than spinning
up all the pagevec machinery only to discard the pages found.

But what's going to kick these pages out of cache?  Shouldn't we rather
find the pages, kick them out if clean, start writeback if not, and *then*
return -EAGAIN?

So maybe we want to spin up the pagevec machinery after all so we can
do that extra work?

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-02-28 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

Find out if the write will trigger a wait due to writeback. If yes,
return -EAGAIN.

This introduces a new function filemap_range_has_page() which
returns true if the file's mapping has a page within the range
mentioned.

Return -EINVAL for buffered AIO: there are multiple causes of
delay such as page locks, dirty throttling logic, page loading
from disk etc. which cannot be taken care of.

Signed-off-by: Goldwyn Rodrigues 
---
 include/linux/fs.h |  2 ++
 mm/filemap.c   | 50 +++---
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ab2f556..527ef53 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2494,6 +2494,8 @@ extern int filemap_fdatawait(struct address_space *);
 extern void filemap_fdatawait_keep_errors(struct address_space *);
 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
   loff_t lend);
+extern int filemap_range_has_page(struct address_space *, loff_t lstart,
+  loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend);
diff --git a/mm/filemap.c b/mm/filemap.c
index 78dd50e..82335f4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -375,6 +375,39 @@ int filemap_flush(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_flush);
 
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping:   address space structure to wait for
+ * @start_byte:offset in bytes where the range starts
+ * @end_byte:  offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+int filemap_range_has_page(struct address_space *mapping,
+   loff_t start_byte, loff_t end_byte)
+{
+   pgoff_t index = start_byte >> PAGE_SHIFT;
+   pgoff_t end = end_byte >> PAGE_SHIFT;
+   struct pagevec pvec;
+   int ret;
+
+   if (end_byte < start_byte)
+   return 0;
+
+   if (mapping->nrpages == 0)
+   return 0;
+
+   pagevec_init(, 0);
+   ret = pagevec_lookup(, mapping, index, 1);
+   if (!ret)
+   return 0;
+   ret = (pvec.pages[0]->index <= end);
+   pagevec_release();
+   return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
 static int __filemap_fdatawait_range(struct address_space *mapping,
 loff_t start_byte, loff_t end_byte)
 {
@@ -2631,6 +2664,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, 
struct iov_iter *from)
 
pos = iocb->ki_pos;
 
+   if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+   return -EINVAL;
+
if (limit != RLIM_INFINITY) {
if (iocb->ki_pos >= limit) {
send_sig(SIGXFSZ, current, 0);
@@ -2700,9 +2736,17 @@ generic_file_direct_write(struct kiocb *iocb, struct 
iov_iter *from)
write_len = iov_iter_count(from);
end = (pos + write_len - 1) >> PAGE_SHIFT;
 
-   written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 
1);
-   if (written)
-   goto out;
+   if (iocb->ki_flags & IOCB_NOWAIT) {
+   /* If there are pages to writeback, return */
+   if (filemap_range_has_page(inode->i_mapping, pos,
+  pos + iov_iter_count(from)))
+   return -EAGAIN;
+   } else {
+   written = filemap_write_and_wait_range(mapping, pos,
+   pos + write_len - 1);
+   if (written)
+   goto out;
+   }
 
/*
 * After a write we want buffered reads to be sure to go to disk to get
-- 
2.10.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html