[Devel] [PATCH rh7] ploop: fix barriers for ordinary requests

2016-06-21 Thread Maxim Patlasov
The way how io_direct.c handles FLUSH|FUA: b1:FLUSH,b2,b3,b4,b5:FLUSH|FUA
is completely wrong: to make sure that b1:FLUSH made effect we have to
wait for its completion. Similarly, even if we're sure that FUA will be
processed as post-FLUSH (also dubious!), we have to wait for completion
b1..b4 to make sure that that flush will cover them.

The patch fixes all these issues pretty simple: let's mark outgouing
bio-s with FLUSH|FUA based on those flags in *corresponing* incoming
bio-s.

Signed-off-by: Maxim Patlasov 
---
 drivers/block/ploop/dev.c   |1 -
 drivers/block/ploop/io_direct.c |   47 ---
 2 files changed, 15 insertions(+), 33 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 2ef1449..6b5702f 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -498,7 +498,6 @@ ploop_bio_queue(struct ploop_device * plo, struct bio * bio,
preq->req_sector = bio->bi_sector;
preq->req_size = bio->bi_size >> 9;
preq->req_rw = bio->bi_rw;
-   bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
preq->eng_state = PLOOP_E_ENTRY;
preq->state = 0;
preq->error = 0;
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index 6ef9cd8..84c9a48 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -92,7 +92,6 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq,
int preflush;
int postfua = 0;
int write = !!(rw & REQ_WRITE);
-   int bio_num;
 
trace_submit(preq);
 
@@ -233,13 +232,13 @@ flush_bio:
goto flush_bio;
}
 
+   bio->bi_rw |= bw.cur->bi_rw & (REQ_FLUSH | REQ_FUA);
bw.bv_off += copy;
size -= copy >> 9;
sec += copy >> 9;
}
ploop_extent_put(em);
 
-   bio_num = 0;
while (bl.head) {
struct bio * b = bl.head;
unsigned long rw2 = rw;
@@ -255,11 +254,10 @@ flush_bio:
preflush = 0;
}
if (unlikely(postfua && !bl.head))
-   rw2 |= (REQ_FUA | ((bio_num) ? REQ_FLUSH : 0));
+   rw2 |= REQ_FUA;
 
ploop_acc_ff_out(preq->plo, rw2 | b->bi_rw);
-   submit_bio(rw2, b);
-   bio_num++;
+   submit_bio(rw2 | b->bi_rw, b);
}
 
ploop_complete_io_request(preq);
@@ -567,7 +565,6 @@ dio_submit_pad(struct ploop_io *io, struct ploop_request * 
preq,
sector_t sec, end_sec, nsec, start, end;
struct bio_list_walk bw;
int err;
-   int preflush = !!(preq->req_rw & REQ_FLUSH);
 
bio_list_init(&bl);
 
@@ -598,14 +595,17 @@ dio_submit_pad(struct ploop_io *io, struct ploop_request 
* preq,
while (sec < end_sec) {
struct page * page;
unsigned int poff, plen;
+   bool zero_page;
 
if (sec < start) {
+   zero_page = true;
page = ZERO_PAGE(0);
poff = 0;
plen = start - sec;
if (plen > (PAGE_SIZE>>9))
plen = (PAGE_SIZE>>9);
} else if (sec >= end) {
+   zero_page = true;
page = ZERO_PAGE(0);
poff = 0;
plen = end_sec - sec;
@@ -614,6 +614,7 @@ dio_submit_pad(struct ploop_io *io, struct ploop_request * 
preq,
} else {
/* sec >= start && sec < end */
struct bio_vec * bv;
+   zero_page = false;
 
if (sec == start) {
bw.cur = sbl->head;
@@ -672,6 +673,10 @@ flush_bio:
goto flush_bio;
}
 
+   /* Handle FLUSH here, dio_post_submit will handle FUA */
+   if (!zero_page)
+   bio->bi_rw |= bw.cur->bi_rw & REQ_FLUSH;
+
bw.bv_off += (plen<<9);
BUG_ON(plen == 0);
sec += plen;
@@ -688,13 +693,9 @@ flush_bio:
b->bi_private = preq;
b->bi_end_io = dio_endio_async;
 
-   rw = sbl->head->bi_rw | WRITE;
-   if (unlikely(preflush)) {
-   rw |= REQ_FLUSH;
-   preflush = 0;
-   }
+   rw = preq->req_rw & ~(REQ_FLUSH | REQ_FUA);
ploop_acc_ff_out(preq->plo, rw | b->bi_rw);
-   submit_bio(rw, b);
+   submit_bio(rw | b->bi_rw, b);
}
 
ploop_complete_io_request(preq);
@@ -1422,13 +1423,6 @@ dio_io_page(struct ploop_io * io, unsigned long rw,
sector_t nsec;
int err;
int off;
-   int postfua;
-   int bio_num;
-   in

Re: [Devel] [PATCH 3/3] ploop: fixup FORCE_{FLUSH,FUA} handling v3

2016-06-21 Thread Maxim Patlasov

Dima,

After more thinking I realized that the whole idea of 
PLOOP_REQ_DELAYED_FLUSH might be bogus: it is possible that we simply do 
not have many enough incoming FUA-s to make delaying lucrative. This 
patch actually mixes three things: 1) fix barriers for RELOC_A|S 
requests, 2) fix barriers for ordinary requests, 3) DELAYED_FLUSH 
optimization. So, please, split the patch into three and make some 
measurements demonstrating that applying "DELAYED_FLUSH optimization" 
patch on top of previous patches improves performance.


I have an idea about how to fix barriers for ordinary requests -- see 
please the patch I'll send soon. The key point is that handling FLUSH-es 
is broken the same way as FUA: if you observe (rw & REQ_FLUSH) and sends 
first bio marked as REQ_FLUSH, it guarantees nothing unless you wait for 
completion before submitting further bio-s! And ploop simply does not 
have the logic of waiting the first before sending others. And, to make 
things worse, not only dio_submit is affected, dio_sibmit_pad and 
dio_io_page to be fixed too.


There are also some inline comments below...

On 06/21/2016 06:55 AM, Dmitry Monakhov wrote:

barrier code is broken in many ways:
Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly.
But request also can goes though ->dio_submit_alloc()->dio_submit_pad and 
write_page (for indexes)
So in case of grow_dev we have following sequance:

E_RELOC_DATA_READ:
  ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
   ->delta->allocate
  ->io->submit_allloc: dio_submit_alloc
->dio_submit_pad
E_DATA_WBI : data written, time to update index
   ->delta->allocate_complete:ploop_index_update
 ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
 ->write_page
 ->ploop_map_wb_complete
   ->ploop_wb_complete_post_process
 ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
E_RELOC_NULLIFY:

->submit()

BUG#2: currecntly kaio write_page silently ignores REQ_FLUSH
BUG#3: io_direct:dio_submit  if fua_delay is not possible we MUST tag all bios 
via REQ_FUA
not just latest one.
This patch unify barrier handling like follows:
- Get rid of FORCE_{FLUSH,FUA}
- Introduce DELAYED_FLUSH
- fix fua handling for dio_submit
- BUG_ON for REQ_FLUSH in kaio_page_write

This makes reloc sequence optimal:
io_direct
RELOC_S: R1, W2, WBI:FLUSH|FUA
RELOC_A: R1, W2, WBI:FLUSH|FUA, W1:NULLIFY|FUA
io_kaio
RELOC_S: R1, W2:FUA, WBI:FUA
RELOC_A: R1, W2:FUA, WBI:FUA, W1:NULLIFY|FUA

https://jira.sw.ru/browse/PSBM-47107
Signed-off-by: Dmitry Monakhov 
---
  drivers/block/ploop/dev.c   |  8 +---
  drivers/block/ploop/io_direct.c | 30 ++-
  drivers/block/ploop/io_kaio.c   | 23 +
  drivers/block/ploop/map.c   | 45 ++---
  include/linux/ploop/ploop.h | 19 +
  5 files changed, 60 insertions(+), 65 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 96f7850..fbc5f2f 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1224,6 +1224,9 @@ static void ploop_complete_request(struct ploop_request * 
preq)
  
  	__TRACE("Z %p %u\n", preq, preq->req_cluster);
  
+	if (!preq->error) {

+   WARN_ON(test_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state));
+   }
while (preq->bl.head) {
struct bio * bio = preq->bl.head;
preq->bl.head = bio->bi_next;
@@ -2530,9 +2533,8 @@ restart:
top_delta = ploop_top_delta(plo);
sbl.head = sbl.tail = preq->aux_bio;
  
-		/* Relocated data write required sync before BAT updatee */

-   set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
-
+   /* Relocated data write required sync before BAT updatee
+* this will happen inside index_update */
if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
preq->eng_state = PLOOP_E_DATA_WBI;
plo->st.bio_out++;
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index a6d83fe..303eb70 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -83,28 +83,19 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq,
int err;
struct bio_list_walk bw;
int preflush;
-   int postfua = 0;
+   int fua = 0;
int write = !!(rw & REQ_WRITE);
int bio_num;


Your patch obsoletes bio_num. Please remove it.

  
  	trace_submit(preq);
  
  	preflush = !!(rw & REQ_FLUSH);

-
-   if (test_and_clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state))
-   preflush = 1;
-
-   if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state))
-   postfua = 1;
-
-   if (!postfua && ploop_req_delay_fua_possible(rw, preq)) {
-
+   fua =

[Devel] [PATCH rh7] mm: memcontrol: reclaim when shrinking memory.high below usage

2016-06-21 Thread Vladimir Davydov
From: Johannes Weiner 

When setting memory.high below usage, nothing happens until the next
charge comes along, and then it will only reclaim its own charge and not
the now potentially huge excess of the new memory.high.  This can cause
groups to stay in excess of their memory.high indefinitely.

To fix that, when shrinking memory.high, kick off a reclaim cycle that
goes after the delta.

https://jira.sw.ru/browse/PSBM-48546

Signed-off-by: Johannes Weiner 
Acked-by: Michal Hocko 
Cc: Vladimir Davydov 
Cc: 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
(cherry picked from commit 588083bb37a3cea8533c392370a554417c8f29cb)
Signed-off-by: Vladimir Davydov 

Conflicts:
mm/memcontrol.c
---
 mm/memcontrol.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index de7c36295515..1f525f27e481 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5314,7 +5314,7 @@ static int mem_cgroup_high_write(struct cgroup *cont, 
struct cftype *cft,
 const char *buffer)
 {
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
-   unsigned long long val;
+   unsigned long long val, usage;
int ret;
 
ret = res_counter_memparse_write_strategy(buffer, &val);
@@ -5322,6 +5322,12 @@ static int mem_cgroup_high_write(struct cgroup *cont, 
struct cftype *cft,
return ret;
 
memcg->high = val;
+
+   usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+   if (usage > val)
+   try_to_free_mem_cgroup_pages(memcg,
+(usage - val) >> PAGE_SHIFT,
+GFP_KERNEL, false);
return 0;
 }
 
-- 
2.1.4

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH 3/3] ploop: fixup FORCE_{FLUSH,FUA} handling v2

2016-06-21 Thread Maxim Patlasov

On 06/21/2016 12:25 AM, Dmitry Monakhov wrote:

Maxim Patlasov  writes:


Dima,

I agree with general approach of this patch, but there are some
(easy-to-fix) issues. See, please, inline comments below...

On 06/20/2016 11:58 AM, Dmitry Monakhov wrote:

barrier code is broken in many ways:
Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly.
But request also can goes though ->dio_submit_alloc()->dio_submit_pad and 
write_page (for indexes)
So in case of grow_dev we have following sequance:

E_RELOC_DATA_READ:
   ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
->delta->allocate
   ->io->submit_allloc: dio_submit_alloc
 ->dio_submit_pad
E_DATA_WBI : data written, time to update index
->delta->allocate_complete:ploop_index_update
  ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
  ->write_page
  ->ploop_map_wb_complete
->ploop_wb_complete_post_process
  ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
E_RELOC_NULLIFY:

 ->submit()

BUG#2: currecntly kaio write_page silently ignores REQ_FUA

Sorry, I can't agree, it actually does not ignore:

I've misstyped. I ment to say REQ_FLUSH.

static void
kaio_write_page(struct ploop_io * io, struct ploop_request * preq,
  struct page * page, sector_t sec, int fua)
{
 /* No FUA in kaio, convert it to fsync */
 if (fua)
 set_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state);



BUG#3: io_direct:dio_submit  if fua_delay is not possible we MUST tag all bios 
via REQ_FUA
 not just latest one.

No need to tag *all*. See inline comments below.


This patch unify barrier handling like follows:
- Get rid of FORCE_{FLUSH,FUA}
- Introduce DELAYED_FLUSH, currecntly it supported only by io_direct
- fix up fua handling for dio_submit

This makes reloc sequence optimal:
io_direct
RELOC_S: R1, W2, WBI:FLUSH|FUA
RELOC_A: R1, W2, WBI:FLUSH|FUA, W1:NULLIFY|FUA
io_kaio
RELOC_S: R1, W2:FUA, WBI:FUA
RELOC_A: R1, W2:FUA, WBI:FUA, W1:NULLIFY|FUA

https://jira.sw.ru/browse/PSBM-47107
Signed-off-by: Dmitry Monakhov 
---
   drivers/block/ploop/dev.c   |  8 +---
   drivers/block/ploop/io_direct.c | 29 +-
   drivers/block/ploop/io_kaio.c   | 17 ++--
   drivers/block/ploop/map.c   | 45 
++---
   include/linux/ploop/ploop.h |  8 
   5 files changed, 54 insertions(+), 53 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 96f7850..fbc5f2f 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1224,6 +1224,9 @@ static void ploop_complete_request(struct ploop_request * 
preq)
   
   	__TRACE("Z %p %u\n", preq, preq->req_cluster);
   
+	if (!preq->error) {

+   WARN_ON(test_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state));
+   }
while (preq->bl.head) {
struct bio * bio = preq->bl.head;
preq->bl.head = bio->bi_next;
@@ -2530,9 +2533,8 @@ restart:
top_delta = ploop_top_delta(plo);
sbl.head = sbl.tail = preq->aux_bio;
   
-		/* Relocated data write required sync before BAT updatee */

-   set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
-
+   /* Relocated data write required sync before BAT updatee
+* this will happen inside index_update */
if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
preq->eng_state = PLOOP_E_DATA_WBI;
plo->st.bio_out++;
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index a6d83fe..d7ecd4a 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -90,21 +90,12 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq,
trace_submit(preq);
   
   	preflush = !!(rw & REQ_FLUSH);

-
-   if (test_and_clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state))
-   preflush = 1;
-
-   if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state))
-   postfua = 1;
-
-   if (!postfua && ploop_req_delay_fua_possible(rw, preq)) {
-
+   postfua = !!(rw & REQ_FUA);
+   if (ploop_req_delay_fua_possible(rw, preq)) {
/* Mark req that delayed flush required */
-   set_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state);
-   } else if (rw & REQ_FUA) {
-   postfua = 1;
+   set_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state);
+   postfua = 0;
}

"postfua" is a horrible name, let us see if we can get rid of it
completely. Also, the way how ploop_req_delay_fua_possible implemented
is prone to errors (see below an issue in kaio_complete_io_state). Let's
rework it like this:

Let it be postflush.

static inline bool ploop_req_delay_fua_possible(struct ploop_request
*preq)
{
 return preq->eng_state ==

[Devel] [PATCH 1/3] ploop: skip redundant fsync for REQ_FUA in post_submit

2016-06-21 Thread Dmitry Monakhov
Signed-off-by: Dmitry Monakhov 
---
 drivers/block/ploop/io_direct.c | 24 ++--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index b844a80..58d7580 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -517,27 +517,31 @@ dio_post_submit(struct ploop_io *io, struct ploop_request 
* preq)
struct ploop_device *plo = preq->plo;
sector_t sec = (sector_t)preq->iblock << preq->plo->cluster_log;
loff_t clu_siz = 1 << (preq->plo->cluster_log + 9);
+   int force_sync = preq->req_rw & REQ_FUA;
int err;
 
file_start_write(io->files.file);
 
-   /* Here io->io_count is even ... */
-   spin_lock_irq(&plo->lock);
-   io->io_count++;
-   set_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state);
-   spin_unlock_irq(&plo->lock);
-
+   if (!force_sync) {
+   /* Here io->io_count is even ... */
+   spin_lock_irq(&plo->lock);
+   io->io_count++;
+   set_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state);
+   spin_unlock_irq(&plo->lock);
+   }
err = io->files.file->f_op->fallocate(io->files.file,
  FALLOC_FL_CONVERT_UNWRITTEN,
  (loff_t)sec << 9, clu_siz);
 
/* highly unlikely case: FUA coming to a block not provisioned yet */
-   if (!err && (preq->req_rw & REQ_FUA))
+   if (!err && force_sync)
err = io->ops->sync(io);
 
-   spin_lock_irq(&plo->lock);
-   io->io_count++;
-   spin_unlock_irq(&plo->lock);
+   if (!force_sync) {
+   spin_lock_irq(&plo->lock);
+   io->io_count++;
+   spin_unlock_irq(&plo->lock);
+   }
/* and here io->io_count is even (+2) again. */
 
file_end_write(io->files.file);
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH 3/3] ploop: fixup FORCE_{FLUSH,FUA} handling v3

2016-06-21 Thread Dmitry Monakhov
barrier code is broken in many ways:
Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly.
But request also can goes though ->dio_submit_alloc()->dio_submit_pad and 
write_page (for indexes)
So in case of grow_dev we have following sequance:

E_RELOC_DATA_READ:
 ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
  ->delta->allocate
 ->io->submit_allloc: dio_submit_alloc
   ->dio_submit_pad
E_DATA_WBI : data written, time to update index
  ->delta->allocate_complete:ploop_index_update
->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
->write_page
->ploop_map_wb_complete
  ->ploop_wb_complete_post_process
->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
E_RELOC_NULLIFY:

   ->submit()

BUG#2: currecntly kaio write_page silently ignores REQ_FLUSH
BUG#3: io_direct:dio_submit  if fua_delay is not possible we MUST tag all bios 
via REQ_FUA
   not just latest one.
This patch unify barrier handling like follows:
- Get rid of FORCE_{FLUSH,FUA}
- Introduce DELAYED_FLUSH
- fix fua handling for dio_submit
- BUG_ON for REQ_FLUSH in kaio_page_write

This makes reloc sequence optimal:
io_direct
RELOC_S: R1, W2, WBI:FLUSH|FUA
RELOC_A: R1, W2, WBI:FLUSH|FUA, W1:NULLIFY|FUA
io_kaio
RELOC_S: R1, W2:FUA, WBI:FUA
RELOC_A: R1, W2:FUA, WBI:FUA, W1:NULLIFY|FUA

https://jira.sw.ru/browse/PSBM-47107
Signed-off-by: Dmitry Monakhov 
---
 drivers/block/ploop/dev.c   |  8 +---
 drivers/block/ploop/io_direct.c | 30 ++-
 drivers/block/ploop/io_kaio.c   | 23 +
 drivers/block/ploop/map.c   | 45 ++---
 include/linux/ploop/ploop.h | 19 +
 5 files changed, 60 insertions(+), 65 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 96f7850..fbc5f2f 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -1224,6 +1224,9 @@ static void ploop_complete_request(struct ploop_request * 
preq)
 
__TRACE("Z %p %u\n", preq, preq->req_cluster);
 
+   if (!preq->error) {
+   WARN_ON(test_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state));
+   }
while (preq->bl.head) {
struct bio * bio = preq->bl.head;
preq->bl.head = bio->bi_next;
@@ -2530,9 +2533,8 @@ restart:
top_delta = ploop_top_delta(plo);
sbl.head = sbl.tail = preq->aux_bio;
 
-   /* Relocated data write required sync before BAT updatee */
-   set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
-
+   /* Relocated data write required sync before BAT updatee
+* this will happen inside index_update */
if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
preq->eng_state = PLOOP_E_DATA_WBI;
plo->st.bio_out++;
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index a6d83fe..303eb70 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -83,28 +83,19 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq,
int err;
struct bio_list_walk bw;
int preflush;
-   int postfua = 0;
+   int fua = 0;
int write = !!(rw & REQ_WRITE);
int bio_num;
 
trace_submit(preq);
 
preflush = !!(rw & REQ_FLUSH);
-
-   if (test_and_clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state))
-   preflush = 1;
-
-   if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state))
-   postfua = 1;
-
-   if (!postfua && ploop_req_delay_fua_possible(rw, preq)) {
-
+   fua = !!(rw & REQ_FUA);
+   if (fua && ploop_req_delay_fua_possible(rw, preq)) {
/* Mark req that delayed flush required */
-   set_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state);
-   } else if (rw & REQ_FUA) {
-   postfua = 1;
+   set_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state);
+   fua = 0;
}
-
rw &= ~(REQ_FLUSH | REQ_FUA);
 
 
@@ -238,8 +229,10 @@ flush_bio:
rw2 |= REQ_FLUSH;
preflush = 0;
}
-   if (unlikely(postfua && !bl.head))
-   rw2 |= (REQ_FUA | ((bio_num) ? REQ_FLUSH : 0));
+   /* Very unlikely, but correct.
+* TODO: Optimize postfua via DELAY_FLUSH for any req state */
+   if (unlikely(fua))
+   rw2 |= REQ_FUA;
 
ploop_acc_ff_out(preq->plo, rw2 | b->bi_rw);
submit_bio(rw2, b);
@@ -1520,15 +1513,14 @@ dio_read_page(struct ploop_io * io, struct 
ploop_request * preq,
 
 static void
 dio_write_page(struct ploop_io * io, struct ploop_request * preq,
-  struct page * page, sector_t sec, int fua)
+  st

[Devel] [PATCH 2/3] ploop: deadcode cleanup

2016-06-21 Thread Dmitry Monakhov
(rw & REQ_FUA) branch is impossible because REQ_FUA was cleared line above.
Logic was moved to ploop_req_delay_fua_possible() long time ago.

Signed-off-by: Dmitry Monakhov 
---
 drivers/block/ploop/io_direct.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index 58d7580..a6d83fe 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -108,15 +108,6 @@ dio_submit(struct ploop_io *io, struct ploop_request * 
preq,
rw &= ~(REQ_FLUSH | REQ_FUA);
 
 
-   /* In case of eng_state != COMPLETE, we'll do FUA in
-* ploop_index_update(). Otherwise, we should mark
-* last bio as FUA here. */
-   if (rw & REQ_FUA) {
-   rw &= ~REQ_FUA;
-   if (preq->eng_state == PLOOP_E_COMPLETE)
-   postfua = 1;
-   }
-
bio_list_init(&bl);
 
if (iblk == PLOOP_ZERO_INDEX)
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH] ve: mark DEF_PERMS feature deprecated

2016-06-21 Thread Kirill Tkhai
On 21.06.2016 14:23, Evgenii Shatokhin wrote:
> "def_perms" is not mentioned in the man pages for prlctl and vzctl.
> 
> VE_FEATURE_DEF_PERMS is only used in the kernel code as a part of
> VE_FEATURES_DEF ("ve->features = VE_FEATURES_DEF;" in ve_create()).
> No code checks if the bit for this feature is set in ve->features.
> 
> Let us mark this feature deprecated, similar to SYSFS and IPGRE
> features.
> 
> https://jira.sw.ru/browse/PSBM-40280
> 
> Signed-off-by: Evgenii Shatokhin 

Reviewed-by: Kirill Tkhai 

> ---
>  include/uapi/linux/vzcalluser.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/vzcalluser.h b/include/uapi/linux/vzcalluser.h
> index 2b340cf..bc55bb3 100644
> --- a/include/uapi/linux/vzcalluser.h
> +++ b/include/uapi/linux/vzcalluser.h
> @@ -115,7 +115,7 @@ struct env_create_param3 {
>  
>  #define VE_FEATURE_SYSFS (1ULL << 0) /* deprecated */
>  #define VE_FEATURE_NFS   (1ULL << 1)
> -#define VE_FEATURE_DEF_PERMS (1ULL << 2)
> +#define VE_FEATURE_DEF_PERMS (1ULL << 2) /* deprecated */
>  #define VE_FEATURE_SIT  (1ULL << 3)
>  #define VE_FEATURE_IPIP (1ULL << 4)
>  #define VE_FEATURE_PPP   (1ULL << 5)
> 
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH] ve: mark DEF_PERMS feature deprecated

2016-06-21 Thread Evgenii Shatokhin
"def_perms" is not mentioned in the man pages for prlctl and vzctl.

VE_FEATURE_DEF_PERMS is only used in the kernel code as a part of
VE_FEATURES_DEF ("ve->features = VE_FEATURES_DEF;" in ve_create()).
No code checks if the bit for this feature is set in ve->features.

Let us mark this feature deprecated, similar to SYSFS and IPGRE
features.

https://jira.sw.ru/browse/PSBM-40280

Signed-off-by: Evgenii Shatokhin 
---
 include/uapi/linux/vzcalluser.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/vzcalluser.h b/include/uapi/linux/vzcalluser.h
index 2b340cf..bc55bb3 100644
--- a/include/uapi/linux/vzcalluser.h
+++ b/include/uapi/linux/vzcalluser.h
@@ -115,7 +115,7 @@ struct env_create_param3 {
 
 #define VE_FEATURE_SYSFS   (1ULL << 0) /* deprecated */
 #define VE_FEATURE_NFS (1ULL << 1)
-#define VE_FEATURE_DEF_PERMS   (1ULL << 2)
+#define VE_FEATURE_DEF_PERMS   (1ULL << 2) /* deprecated */
 #define VE_FEATURE_SIT  (1ULL << 3)
 #define VE_FEATURE_IPIP (1ULL << 4)
 #define VE_FEATURE_PPP (1ULL << 5)
-- 
2.7.3

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [TRD] Autofs migration

2016-06-21 Thread Konstantin Khorenko

Disclaimer: not sending this into us...@openvz.org because external jira issue 
has not been created due to unknown reason.

One more known issue:
* "browse" autofs config option is not supported

https://jira.sw.ru/browse/PSBM-48659

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 04/19/2016 12:03 PM, Stanislav Kinsburskiy wrote:

1. Feature

Autofs mount points migration via CRIU
https://jira.sw.ru/browse/PSBM-41217

2. Description

CRIU now supports autofs file system migration, including direct,
indirect and offset mount types.

3. Products

Virtuozzo 7

Packages:
  criu-2.1.0.4.vz7
  libvzctl-7.0.199

4. Testing

4.1 Basics
  ** Install criu and libvzctl rpm packages
  ** Create a container, and check
  ** Check, that autofs is listed in /proc/filesystems in the container
  ** Check, that /dev/autofs is accessible
  ** Install autofs package inside the container
  ** Follow autofs guide to create an autofs _direct_ mount point
with some file system, mounted on top (tmpfs, for example). Command "man
autofs" might help
  ** Follow autofs guide to create an autofs _indirect_ mount point
with some file system, mounted on top (tmpfs, for example).
   ** Follow autofs guide to create an autofs _offset_ mount point with
some file system, mounted on top (tmpfs, for example).
  ** Suspend and restore container
  ** Check, that autofs mounts and nested were mounts migrated
successfully (via /proc, for example).

4.2 Systemd autofs services
  ** Start any systemds autofs service (for example,
proc-sys-fs-binfmt_misc.automount) in the container
  ** Check, that service started successfully
  ** Suspend and restore container
  ** Check, that autofs and nested mount points were migrated
successfully.
  ** Check, that systemd service has active status
  ** Unmount nested file system manually
  ** Access systemd autofs mount point and check, that nested file
system is re-mounted again

4.3 Automount expiration
  ** setup autofs mount  with short timeout (10 seconds, for example)
in a container via any master: automount, systemd or else
  ** Activate autofs mount point (nested mount point should be
mounted by autofs master)
  ** Migrate (or suspend/resume) the container.
  ** Check, that nested mount point is unmounted after restore within
timeout.

5. Known issues

Autofs migration has an issue, related to systemd-controlled autofs
mount points. Systemd saves autofs mount point device number in it's
internals and compare this number to actual one, taken  from mount
point, on each autofs request from kernel (mount, umount, expire, etc).
The problem is that after migration all mount points are created
manually and has _another_ device id, which leads to ignorance of kernel
requests from systemd side.
This problem can't be solved without some kind of "device namespaces"
abstraction. However, some of the systemd services like
proc-sys-fs-binfmt_misc.automount can be painlessly restarted after
restore, thus illuminating this issue.
Restart of proc-sys-fs-binfmt_misc.automount service is done by CRIU via
action script, provided by vzctl.

6. What was checked by developer

Both 4.1 and 4.2 test sequences

7. Feature owners

skinsbur...@virtuozzo.com

.


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [NEW KERNEL] 3.10.0-327.18.2.vz7.14.17 (rhel7)

2016-06-21 Thread builder
Changelog:

OpenVZ kernel rh7-3.10.0-327.18.2.vz7.14.17

* fix race between memory cgroup destruction and uncharge that might
  lead to kernel panic (OVZ-6756)
* fix timer bug leading to long delays in userspace apps (PSBM-48475)
* support for containers in machine.slice (PSBM-48629)


Generated changelog:

* Tue Jun 21 2016 Vladimir Davydov  
[3.10.0-327.18.2.vz7.14.17]
- cgroup: fix path mangling for ve cgroups (Vladimir Davydov) [PSBM-48629]
- cgroup: use cgroup_path_ve helper in cgroup_show_path (Vladimir Davydov)
- Drop fairsched syscalls (Vladimir Davydov)
- Use ve init task's css instead of opening cgroup via vfs (Vladimir Davydov) 
[PSBM-48629]
- Drop VZCTL_ENV_CREATE (Vladimir Davydov)
- Drop vz_compat boot param (Vladimir Davydov)
- mm: memcontrol: fix race between kmem uncharge and charge reparenting 
(Vladimir Davydov) [OVZ-6756]
- timers should not get negative argument (Vasily Averin) [PSBM-48475]


Built packages: 
http://kojistorage.eng.sw.ru/packages/vzkernel/3.10.0/327.18.2.vz7.14.17/
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


[Devel] [PATCH rh7] ve/fs: namespace -- Ignore device permissions during restore

2016-06-21 Thread Cyrill Gorcunov
To support several storage backends (ploops) inside container
we've hacks in libvzctl which setup "old" permissions when
restore procedure initiated. But the former idea was simply
allow CRIU to do all the works and restore ploops mounts
by its own (since CRIU fetches all mount options and such).

For this sake we turn off mount options filtering provisionally
if @is_pseudosuper is set, and CRIU restore mounts as regular
ones.

https://jira.sw.ru/browse/PSBM-48188

Signed-off-by: Cyrill Gorcunov 
CC: Igor Sukhih 
CC: Vladimir Davydov 
CC: Konstantin Khorenko 
---
 fs/namespace.c |7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

Index: linux-pcs7.git/fs/namespace.c
===
--- linux-pcs7.git.orig/fs/namespace.c
+++ linux-pcs7.git/fs/namespace.c
@@ -1933,7 +1933,12 @@ again:
if (devmnt->dev == dev) {
err = ve_devmnt_check(data, devmnt->allowed_options);
 
-   if (!err && !remount)
+   /*
+* In case of @is_pseudouser set, ie restore procedure,
+* we don't check for allowed options filtering, since
+* restore mode is special.
+*/
+   if ((ve->is_pseudosuper || !err) && !remount)
err = ve_devmnt_insert(data, 
devmnt->hidden_options);
 
break;
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel


Re: [Devel] [PATCH 3/3] ploop: fixup FORCE_{FLUSH,FUA} handling v2

2016-06-21 Thread Dmitry Monakhov
Maxim Patlasov  writes:

> Dima,
>
> I agree with general approach of this patch, but there are some 
> (easy-to-fix) issues. See, please, inline comments below...
>
> On 06/20/2016 11:58 AM, Dmitry Monakhov wrote:
>> barrier code is broken in many ways:
>> Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly.
>> But request also can goes though ->dio_submit_alloc()->dio_submit_pad and 
>> write_page (for indexes)
>> So in case of grow_dev we have following sequance:
>>
>> E_RELOC_DATA_READ:
>>   ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
>>->delta->allocate
>>   ->io->submit_allloc: dio_submit_alloc
>> ->dio_submit_pad
>> E_DATA_WBI : data written, time to update index
>>->delta->allocate_complete:ploop_index_update
>>  ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
>>  ->write_page
>>  ->ploop_map_wb_complete
>>->ploop_wb_complete_post_process
>>  ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
>> E_RELOC_NULLIFY:
>>
>> ->submit()
>>
>> BUG#2: currecntly kaio write_page silently ignores REQ_FUA
>
> Sorry, I can't agree, it actually does not ignore:
I've misstyped. I ment to say REQ_FLUSH.
>
>> static void
>> kaio_write_page(struct ploop_io * io, struct ploop_request * preq,
>>  struct page * page, sector_t sec, int fua)
>> {
>> /* No FUA in kaio, convert it to fsync */
>> if (fua)
>> set_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state);
>
>
>> BUG#3: io_direct:dio_submit  if fua_delay is not possible we MUST tag all 
>> bios via REQ_FUA
>> not just latest one.
>
> No need to tag *all*. See inline comments below.
>
>> This patch unify barrier handling like follows:
>> - Get rid of FORCE_{FLUSH,FUA}
>> - Introduce DELAYED_FLUSH, currecntly it supported only by io_direct
>> - fix up fua handling for dio_submit
>>
>> This makes reloc sequence optimal:
>> io_direct
>> RELOC_S: R1, W2, WBI:FLUSH|FUA
>> RELOC_A: R1, W2, WBI:FLUSH|FUA, W1:NULLIFY|FUA
>> io_kaio
>> RELOC_S: R1, W2:FUA, WBI:FUA
>> RELOC_A: R1, W2:FUA, WBI:FUA, W1:NULLIFY|FUA
>>
>> https://jira.sw.ru/browse/PSBM-47107
>> Signed-off-by: Dmitry Monakhov 
>> ---
>>   drivers/block/ploop/dev.c   |  8 +---
>>   drivers/block/ploop/io_direct.c | 29 +-
>>   drivers/block/ploop/io_kaio.c   | 17 ++--
>>   drivers/block/ploop/map.c   | 45 
>> ++---
>>   include/linux/ploop/ploop.h |  8 
>>   5 files changed, 54 insertions(+), 53 deletions(-)
>>
>> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
>> index 96f7850..fbc5f2f 100644
>> --- a/drivers/block/ploop/dev.c
>> +++ b/drivers/block/ploop/dev.c
>> @@ -1224,6 +1224,9 @@ static void ploop_complete_request(struct 
>> ploop_request * preq)
>>   
>>  __TRACE("Z %p %u\n", preq, preq->req_cluster);
>>   
>> +if (!preq->error) {
>> +WARN_ON(test_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state));
>> +}
>>  while (preq->bl.head) {
>>  struct bio * bio = preq->bl.head;
>>  preq->bl.head = bio->bi_next;
>> @@ -2530,9 +2533,8 @@ restart:
>>  top_delta = ploop_top_delta(plo);
>>  sbl.head = sbl.tail = preq->aux_bio;
>>   
>> -/* Relocated data write required sync before BAT updatee */
>> -set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
>> -
>> +/* Relocated data write required sync before BAT updatee
>> + * this will happen inside index_update */
>>  if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) {
>>  preq->eng_state = PLOOP_E_DATA_WBI;
>>  plo->st.bio_out++;
>> diff --git a/drivers/block/ploop/io_direct.c 
>> b/drivers/block/ploop/io_direct.c
>> index a6d83fe..d7ecd4a 100644
>> --- a/drivers/block/ploop/io_direct.c
>> +++ b/drivers/block/ploop/io_direct.c
>> @@ -90,21 +90,12 @@ dio_submit(struct ploop_io *io, struct ploop_request * 
>> preq,
>>  trace_submit(preq);
>>   
>>  preflush = !!(rw & REQ_FLUSH);
>> -
>> -if (test_and_clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state))
>> -preflush = 1;
>> -
>> -if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state))
>> -postfua = 1;
>> -
>> -if (!postfua && ploop_req_delay_fua_possible(rw, preq)) {
>> -
>> +postfua = !!(rw & REQ_FUA);
>> +if (ploop_req_delay_fua_possible(rw, preq)) {
>>  /* Mark req that delayed flush required */
>> -set_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state);
>> -} else if (rw & REQ_FUA) {
>> -postfua = 1;
>> +set_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state);
>> +postfua = 0;
>>  }
>
> "postfua" is a horrible name, let us see if we can get rid of it 
> completely. Also, the way how ploop_req_delay_fua_possible implemented 
> is prone to err