[Devel] [PATCH rh7] ploop: io_direct: delay f_op->fsync() until FLUSH|FUA

2016-05-25 Thread Maxim Patlasov
Once we converted extent to initialized it can be part of uncompleted
journal transaction, so we have to force transaction commit at some point.

Instead of forcing transaction commit immediately, the patch delays it
until an incoming bio with FLUSH|FUA arrives. Then, as the very first
step of processing such a bio, we sends corresponding preq to fsync_thread
to perform f_op->fsync().

As a very unlikely case, it is also possible that processing a FLUSH|FUA
bio itself results in converting extents. Then, the patch calls f_op->fsync()
immediately after conversion to preserve FUA semantics.

https://jira.sw.ru/browse/PSBM-47026

Signed-off-by: Maxim Patlasov 
---
 drivers/block/ploop/dev.c   |   70 ---
 drivers/block/ploop/io_direct.c |   28 +++-
 include/linux/ploop/ploop.h |6 +++
 3 files changed, 76 insertions(+), 28 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 654b60b..03fc289 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1942,46 +1942,62 @@ err:
 
 /* Main preq state machine */
 
+static inline bool preq_is_special(struct ploop_request * preq)
+{
+   return test_bit(PLOOP_REQ_MERGE, &preq->state) ||
+   test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
+   test_bit(PLOOP_REQ_RELOC_S, &preq->state) ||
+   test_bit(PLOOP_REQ_DISCARD, &preq->state) ||
+   test_bit(PLOOP_REQ_ZERO, &preq->state);
+}
+
 static void
 ploop_entry_request(struct ploop_request * preq)
 {
struct ploop_device * plo   = preq->plo;
struct ploop_delta  * top_delta = ploop_top_delta(plo);
+   struct ploop_io * top_io= &top_delta->io;
struct ploop_delta  * delta;
int level;
int err;
iblock_t iblk;
 
-   /* Control request. */
-   if (unlikely(preq->bl.head == NULL &&
-!test_bit(PLOOP_REQ_MERGE, &preq->state) &&
-!test_bit(PLOOP_REQ_RELOC_A, &preq->state) &&
-!test_bit(PLOOP_REQ_RELOC_S, &preq->state) &&
-!test_bit(PLOOP_REQ_DISCARD, &preq->state) &&
-!test_bit(PLOOP_REQ_ZERO, &preq->state))) {
-   complete(plo->quiesce_comp);
-   wait_for_completion(&plo->relax_comp);
-   ploop_complete_request(preq);
-   complete(&plo->relaxed_comp);
-   return;
-   }
+   if (!preq_is_special(preq)) {
+   /* Control request */
+   if (unlikely(preq->bl.head == NULL)) {
+   complete(plo->quiesce_comp);
+   wait_for_completion(&plo->relax_comp);
+   ploop_complete_request(preq);
+   complete(&plo->relaxed_comp);
+   return;
+   }
 
-   /* Empty flush. */
-   if (unlikely(preq->req_size == 0 &&
-!test_bit(PLOOP_REQ_MERGE, &preq->state) &&
-!test_bit(PLOOP_REQ_RELOC_A, &preq->state) &&
-!test_bit(PLOOP_REQ_RELOC_S, &preq->state) &&
-!test_bit(PLOOP_REQ_ZERO, &preq->state))) {
-   if (preq->req_rw & REQ_FLUSH) {
-   if (top_delta->io.ops->issue_flush) {
-   top_delta->io.ops->issue_flush(&top_delta->io, 
preq);
-   return;
-   }
+   /* Need to fsync before start handling FLUSH */
+   if ((preq->req_rw & REQ_FLUSH) &&
+   test_bit(PLOOP_IO_FSYNC_DELAYED, &top_io->io_state) &&
+   !test_bit(PLOOP_REQ_FSYNC_DONE, &preq->state)) {
+   spin_lock_irq(&plo->lock);
+   list_add_tail(&preq->list, &top_io->fsync_queue);
+   if (waitqueue_active(&top_io->fsync_waitq))
+   wake_up_interruptible(&top_io->fsync_waitq);
+   spin_unlock_irq(&plo->lock);
+   return;
}
 
-   preq->eng_state = PLOOP_E_COMPLETE;
-   ploop_complete_request(preq);
-   return;
+   /* Empty flush or unknown zero-size request */
+   if (preq->req_size == 0) {
+   if (preq->req_rw & REQ_FLUSH &&
+   !test_bit(PLOOP_REQ_FSYNC_DONE, &preq->state)) {
+   if (top_io->ops->issue_flush) {
+   top_io->ops->issue_flush(top_io, preq);
+   return;
+   }
+   }
+
+   preq->eng_state = PLOOP_E_COMPLETE;
+   ploop_complete_request(preq);
+   return;
+   }
}
 
if (unlikely(test_bit(PLOOP_REQ_SYNC, &preq->state) &&
diff --git a/drivers/block/ploop/io

[Devel] [PATCH] scripts: add /usr/libexec/criu/scripts/spfs-release-replace.sh to vz-rst

2016-05-25 Thread Stanislav Kinsburskiy
Add one more spfs-related restore script (this time for "post-restore" stage),
which is required to release spfs manager replace processes (they are waiting
for it to proceed with actual remount and replacement).

Signed-off-by: Stanislav Kinsburskiy 
---
 scripts/vz-rst.in |   11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/scripts/vz-rst.in b/scripts/vz-rst.in
index 654cdd3..bfd804a 100755
--- a/scripts/vz-rst.in
+++ b/scripts/vz-rst.in
@@ -78,9 +78,14 @@ if [ -f "$autofs_actions_path" ]; then
 fi
 
 # SPFS actions
-spfs_actions_path=/usr/libexec/criu/scripts/spfs-change-mode.sh
-if [ -f "$spfs_actions_path" ]; then
-   spfs_actions="--action-script $spfs_actions_path"
+spfs_actions=""
+spfs_change_mode=/usr/libexec/criu/scripts/spfs-change-mode.sh
+if [ -f "$spfs_change_mode" ]; then
+   spfs_actions="--action-script $spfs_change_mode"
+fi
+spfs_release_replace=/usr/libexec/criu/scripts/spfs-release-replace.sh
+if [ -f "$spfs_release_replace" ]; then
+   spfs_actions="$spfs_actions --action-script $spfs_release_replace"
 fi
 
 criu restore -v$CRIU_LOGLEVEL  \

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH] scripts: add /usr/libexec/criu/scripts/spfs-release-replace.sh to vz-rst

2016-05-25 Thread Stanislav Kinsburskiy
Add one more spfs-related restore script (this time for "post-restore" stage),
which is required to release spfs manager replace processes (they are waiting
for it to proceed with actual remount and replacement).

Signed-off-by: Stanislav Kinsburskiy 
---
 scripts/vz-rst.in |   11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/scripts/vz-rst.in b/scripts/vz-rst.in
index 654cdd3..bfd804a 100755
--- a/scripts/vz-rst.in
+++ b/scripts/vz-rst.in
@@ -78,9 +78,14 @@ if [ -f "$autofs_actions_path" ]; then
 fi
 
 # SPFS actions
-spfs_actions_path=/usr/libexec/criu/scripts/spfs-change-mode.sh
-if [ -f "$spfs_actions_path" ]; then
-   spfs_actions="--action-script $spfs_actions_path"
+spfs_actions=""
+spfs_change_mode=/usr/libexec/criu/scripts/spfs-change-mode.sh
+if [ -f "$spfs_change_mode" ]; then
+   spfs_actions="--action-script $spfs_change_mode"
+fi
+spfs_release_replace=/usr/libexec/criu/scripts/spfs-release-replace.sh
+if [ -f "$spfs_release_replace" ]; then
+   spfs_actions="$spfs_actions --action-script $spfs_release_replace"
 fi
 
 criu restore -v$CRIU_LOGLEVEL  \

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH rh7] pfcache: pick page from peer mapping after readahead

2016-05-25 Thread Vladimir Davydov
There's a mistake in pick_peer_page. This function is called before
issuing read from pagefault and file read paths, in order to retrieve
the page from peer's mapping and thus save us io and page cache. In this
function we first lookup the page in the peer's mapping. If it fails, we
start readahead for the peer's mapping. Then we should retry lookup from
peer's mapping to get page allocated by the readahead, but instead we do
lookup from the original mapping, which obviously fails as we only call
pick_peer_page if there's no page in original mapping's cache. So we
fall back on reading a page for the original mapping instead of using
pages from pfcache.

Signed-off-by: Vladimir Davydov 
---
 mm/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memory.c b/mm/memory.c
index 7ba5f6f8eaac..c3a36558ac40 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4697,7 +4697,7 @@ struct page *pick_peer_page(struct address_space 
*mapping, pgoff_t index,
page = find_get_page(peer, index);
if (!page) {
page_cache_sync_readahead(peer, ra, file, index, ra_size);
-   page = find_get_page(mapping, index);
+   page = find_get_page(peer, index);
if (!page)
goto out;
}
-- 
2.1.4

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH rh7] kernel/sysrq: reset watchdog on all cpus while during sysrq-w

2016-05-25 Thread Andrey Ryabinin
Lengthy output of sysrq-w may take a lot of time on slow serial console.
Currently we reset NMI-watchdog on the current cpu to avoid softlockup,
however this doesn't work since watchdog might be triggered on the other
cpu which is waits for proceeding an IPI.

Reset watchdog on all cpus to prevent spurious softlockup messages.

https://jira.sw.ru/browse/PSBM-47486

Signed-off-by: Andrey Ryabinin 
---
 kernel/sched/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2c147c8..d21ccf0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5177,7 +5177,7 @@ void show_state_filter(unsigned long state_filter)
 * reset the NMI-timeout, listing all files on a slow
 * console might take a lot of time:
 */
-   touch_nmi_watchdog();
+   touch_all_softlockup_watchdogs();
if (!state_filter || (p->state & state_filter))
sched_show_task(p);
} while_each_thread(g, p);
-- 
2.7.3

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH rh7] cbt: fix possible race on alloc_page()

2016-05-25 Thread Dmitry Monakhov
Maxim Patlasov  writes:

> cbt_page_alloc() drops cbt->lock before calling alloc_page(),
> then re-acquires it. It's safer to re-check that cbt->map[idx]
> is still NULL after re-acquiring the lock.
>
> Signed-off-by: Maxim Patlasov 
Indeed. Ack.
> ---
>  block/blk-cbt.c |7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/block/blk-cbt.c b/block/blk-cbt.c
> index 8ba52fb..14ad1a2 100644
> --- a/block/blk-cbt.c
> +++ b/block/blk-cbt.c
> @@ -128,7 +128,12 @@ static int cbt_page_alloc(struct cbt_info  **cbt_pp, 
> unsigned long idx,
>   spin_unlock_irq(&cbt->lock);
>   return -ENOMEM;
>   }
> - cbt->map[idx] = page;
> +
> + if (likely(CBT_PAGE(cbt, idx) == NULL))
> + cbt->map[idx] = page;
> + else
> + __free_page(page);
> +
>   page = NULL;
>   spin_unlock_irq(&cbt->lock);
>  


signature.asc
Description: PGP signature
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel