[Devel] [PATCH rh7] ploop: fix barriers for ordinary requests
The way how io_direct.c handles FLUSH|FUA: b1:FLUSH,b2,b3,b4,b5:FLUSH|FUA is completely wrong: to make sure that b1:FLUSH made effect we have to wait for its completion. Similarly, even if we're sure that FUA will be processed as post-FLUSH (also dubious!), we have to wait for completion b1..b4 to make sure that that flush will cover them. The patch fixes all these issues pretty simple: let's mark outgouing bio-s with FLUSH|FUA based on those flags in *corresponing* incoming bio-s. Signed-off-by: Maxim Patlasov --- drivers/block/ploop/dev.c |1 - drivers/block/ploop/io_direct.c | 47 --- 2 files changed, 15 insertions(+), 33 deletions(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 2ef1449..6b5702f 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -498,7 +498,6 @@ ploop_bio_queue(struct ploop_device * plo, struct bio * bio, preq->req_sector = bio->bi_sector; preq->req_size = bio->bi_size >> 9; preq->req_rw = bio->bi_rw; - bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); preq->eng_state = PLOOP_E_ENTRY; preq->state = 0; preq->error = 0; diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c index 6ef9cd8..84c9a48 100644 --- a/drivers/block/ploop/io_direct.c +++ b/drivers/block/ploop/io_direct.c @@ -92,7 +92,6 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq, int preflush; int postfua = 0; int write = !!(rw & REQ_WRITE); - int bio_num; trace_submit(preq); @@ -233,13 +232,13 @@ flush_bio: goto flush_bio; } + bio->bi_rw |= bw.cur->bi_rw & (REQ_FLUSH | REQ_FUA); bw.bv_off += copy; size -= copy >> 9; sec += copy >> 9; } ploop_extent_put(em); - bio_num = 0; while (bl.head) { struct bio * b = bl.head; unsigned long rw2 = rw; @@ -255,11 +254,10 @@ flush_bio: preflush = 0; } if (unlikely(postfua && !bl.head)) - rw2 |= (REQ_FUA | ((bio_num) ? REQ_FLUSH : 0)); + rw2 |= REQ_FUA; ploop_acc_ff_out(preq->plo, rw2 | b->bi_rw); - submit_bio(rw2, b); - bio_num++; + submit_bio(rw2 | b->bi_rw, b); } ploop_complete_io_request(preq); @@ -567,7 +565,6 @@ dio_submit_pad(struct ploop_io *io, struct ploop_request * preq, sector_t sec, end_sec, nsec, start, end; struct bio_list_walk bw; int err; - int preflush = !!(preq->req_rw & REQ_FLUSH); bio_list_init(&bl); @@ -598,14 +595,17 @@ dio_submit_pad(struct ploop_io *io, struct ploop_request * preq, while (sec < end_sec) { struct page * page; unsigned int poff, plen; + bool zero_page; if (sec < start) { + zero_page = true; page = ZERO_PAGE(0); poff = 0; plen = start - sec; if (plen > (PAGE_SIZE>>9)) plen = (PAGE_SIZE>>9); } else if (sec >= end) { + zero_page = true; page = ZERO_PAGE(0); poff = 0; plen = end_sec - sec; @@ -614,6 +614,7 @@ dio_submit_pad(struct ploop_io *io, struct ploop_request * preq, } else { /* sec >= start && sec < end */ struct bio_vec * bv; + zero_page = false; if (sec == start) { bw.cur = sbl->head; @@ -672,6 +673,10 @@ flush_bio: goto flush_bio; } + /* Handle FLUSH here, dio_post_submit will handle FUA */ + if (!zero_page) + bio->bi_rw |= bw.cur->bi_rw & REQ_FLUSH; + bw.bv_off += (plen<<9); BUG_ON(plen == 0); sec += plen; @@ -688,13 +693,9 @@ flush_bio: b->bi_private = preq; b->bi_end_io = dio_endio_async; - rw = sbl->head->bi_rw | WRITE; - if (unlikely(preflush)) { - rw |= REQ_FLUSH; - preflush = 0; - } + rw = preq->req_rw & ~(REQ_FLUSH | REQ_FUA); ploop_acc_ff_out(preq->plo, rw | b->bi_rw); - submit_bio(rw, b); + submit_bio(rw | b->bi_rw, b); } ploop_complete_io_request(preq); @@ -1422,13 +1423,6 @@ dio_io_page(struct ploop_io * io, unsigned long rw, sector_t nsec; int err; int off; - int postfua; - int bio_num; - in
Re: [Devel] [PATCH 3/3] ploop: fixup FORCE_{FLUSH,FUA} handling v3
Dima, After more thinking I realized that the whole idea of PLOOP_REQ_DELAYED_FLUSH might be bogus: it is possible that we simply do not have many enough incoming FUA-s to make delaying lucrative. This patch actually mixes three things: 1) fix barriers for RELOC_A|S requests, 2) fix barriers for ordinary requests, 3) DELAYED_FLUSH optimization. So, please, split the patch into three and make some measurements demonstrating that applying "DELAYED_FLUSH optimization" patch on top of previous patches improves performance. I have an idea about how to fix barriers for ordinary requests -- see please the patch I'll send soon. The key point is that handling FLUSH-es is broken the same way as FUA: if you observe (rw & REQ_FLUSH) and sends first bio marked as REQ_FLUSH, it guarantees nothing unless you wait for completion before submitting further bio-s! And ploop simply does not have the logic of waiting the first before sending others. And, to make things worse, not only dio_submit is affected, dio_sibmit_pad and dio_io_page to be fixed too. There are also some inline comments below... On 06/21/2016 06:55 AM, Dmitry Monakhov wrote: barrier code is broken in many ways: Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly. But request also can goes though ->dio_submit_alloc()->dio_submit_pad and write_page (for indexes) So in case of grow_dev we have following sequance: E_RELOC_DATA_READ: ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); ->delta->allocate ->io->submit_allloc: dio_submit_alloc ->dio_submit_pad E_DATA_WBI : data written, time to update index ->delta->allocate_complete:ploop_index_update ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); ->write_page ->ploop_map_wb_complete ->ploop_wb_complete_post_process ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); E_RELOC_NULLIFY: ->submit() BUG#2: currecntly kaio write_page silently ignores REQ_FLUSH BUG#3: io_direct:dio_submit if fua_delay is not possible we MUST tag all bios via REQ_FUA not just latest one. This patch unify barrier handling like follows: - Get rid of FORCE_{FLUSH,FUA} - Introduce DELAYED_FLUSH - fix fua handling for dio_submit - BUG_ON for REQ_FLUSH in kaio_page_write This makes reloc sequence optimal: io_direct RELOC_S: R1, W2, WBI:FLUSH|FUA RELOC_A: R1, W2, WBI:FLUSH|FUA, W1:NULLIFY|FUA io_kaio RELOC_S: R1, W2:FUA, WBI:FUA RELOC_A: R1, W2:FUA, WBI:FUA, W1:NULLIFY|FUA https://jira.sw.ru/browse/PSBM-47107 Signed-off-by: Dmitry Monakhov --- drivers/block/ploop/dev.c | 8 +--- drivers/block/ploop/io_direct.c | 30 ++- drivers/block/ploop/io_kaio.c | 23 + drivers/block/ploop/map.c | 45 ++--- include/linux/ploop/ploop.h | 19 + 5 files changed, 60 insertions(+), 65 deletions(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 96f7850..fbc5f2f 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -1224,6 +1224,9 @@ static void ploop_complete_request(struct ploop_request * preq) __TRACE("Z %p %u\n", preq, preq->req_cluster); + if (!preq->error) { + WARN_ON(test_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state)); + } while (preq->bl.head) { struct bio * bio = preq->bl.head; preq->bl.head = bio->bi_next; @@ -2530,9 +2533,8 @@ restart: top_delta = ploop_top_delta(plo); sbl.head = sbl.tail = preq->aux_bio; - /* Relocated data write required sync before BAT updatee */ - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); - + /* Relocated data write required sync before BAT updatee +* this will happen inside index_update */ if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) { preq->eng_state = PLOOP_E_DATA_WBI; plo->st.bio_out++; diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c index a6d83fe..303eb70 100644 --- a/drivers/block/ploop/io_direct.c +++ b/drivers/block/ploop/io_direct.c @@ -83,28 +83,19 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq, int err; struct bio_list_walk bw; int preflush; - int postfua = 0; + int fua = 0; int write = !!(rw & REQ_WRITE); int bio_num; Your patch obsoletes bio_num. Please remove it. trace_submit(preq); preflush = !!(rw & REQ_FLUSH); - - if (test_and_clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state)) - preflush = 1; - - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state)) - postfua = 1; - - if (!postfua && ploop_req_delay_fua_possible(rw, preq)) { - + fua =
[Devel] [PATCH rh7] mm: memcontrol: reclaim when shrinking memory.high below usage
From: Johannes Weiner When setting memory.high below usage, nothing happens until the next charge comes along, and then it will only reclaim its own charge and not the now potentially huge excess of the new memory.high. This can cause groups to stay in excess of their memory.high indefinitely. To fix that, when shrinking memory.high, kick off a reclaim cycle that goes after the delta. https://jira.sw.ru/browse/PSBM-48546 Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 588083bb37a3cea8533c392370a554417c8f29cb) Signed-off-by: Vladimir Davydov Conflicts: mm/memcontrol.c --- mm/memcontrol.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index de7c36295515..1f525f27e481 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5314,7 +5314,7 @@ static int mem_cgroup_high_write(struct cgroup *cont, struct cftype *cft, const char *buffer) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); - unsigned long long val; + unsigned long long val, usage; int ret; ret = res_counter_memparse_write_strategy(buffer, &val); @@ -5322,6 +5322,12 @@ static int mem_cgroup_high_write(struct cgroup *cont, struct cftype *cft, return ret; memcg->high = val; + + usage = res_counter_read_u64(&memcg->res, RES_USAGE); + if (usage > val) + try_to_free_mem_cgroup_pages(memcg, +(usage - val) >> PAGE_SHIFT, +GFP_KERNEL, false); return 0; } -- 2.1.4 ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
Re: [Devel] [PATCH 3/3] ploop: fixup FORCE_{FLUSH,FUA} handling v2
On 06/21/2016 12:25 AM, Dmitry Monakhov wrote: Maxim Patlasov writes: Dima, I agree with general approach of this patch, but there are some (easy-to-fix) issues. See, please, inline comments below... On 06/20/2016 11:58 AM, Dmitry Monakhov wrote: barrier code is broken in many ways: Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly. But request also can goes though ->dio_submit_alloc()->dio_submit_pad and write_page (for indexes) So in case of grow_dev we have following sequance: E_RELOC_DATA_READ: ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); ->delta->allocate ->io->submit_allloc: dio_submit_alloc ->dio_submit_pad E_DATA_WBI : data written, time to update index ->delta->allocate_complete:ploop_index_update ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); ->write_page ->ploop_map_wb_complete ->ploop_wb_complete_post_process ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); E_RELOC_NULLIFY: ->submit() BUG#2: currecntly kaio write_page silently ignores REQ_FUA Sorry, I can't agree, it actually does not ignore: I've misstyped. I ment to say REQ_FLUSH. static void kaio_write_page(struct ploop_io * io, struct ploop_request * preq, struct page * page, sector_t sec, int fua) { /* No FUA in kaio, convert it to fsync */ if (fua) set_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state); BUG#3: io_direct:dio_submit if fua_delay is not possible we MUST tag all bios via REQ_FUA not just latest one. No need to tag *all*. See inline comments below. This patch unify barrier handling like follows: - Get rid of FORCE_{FLUSH,FUA} - Introduce DELAYED_FLUSH, currecntly it supported only by io_direct - fix up fua handling for dio_submit This makes reloc sequence optimal: io_direct RELOC_S: R1, W2, WBI:FLUSH|FUA RELOC_A: R1, W2, WBI:FLUSH|FUA, W1:NULLIFY|FUA io_kaio RELOC_S: R1, W2:FUA, WBI:FUA RELOC_A: R1, W2:FUA, WBI:FUA, W1:NULLIFY|FUA https://jira.sw.ru/browse/PSBM-47107 Signed-off-by: Dmitry Monakhov --- drivers/block/ploop/dev.c | 8 +--- drivers/block/ploop/io_direct.c | 29 +- drivers/block/ploop/io_kaio.c | 17 ++-- drivers/block/ploop/map.c | 45 ++--- include/linux/ploop/ploop.h | 8 5 files changed, 54 insertions(+), 53 deletions(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 96f7850..fbc5f2f 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -1224,6 +1224,9 @@ static void ploop_complete_request(struct ploop_request * preq) __TRACE("Z %p %u\n", preq, preq->req_cluster); + if (!preq->error) { + WARN_ON(test_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state)); + } while (preq->bl.head) { struct bio * bio = preq->bl.head; preq->bl.head = bio->bi_next; @@ -2530,9 +2533,8 @@ restart: top_delta = ploop_top_delta(plo); sbl.head = sbl.tail = preq->aux_bio; - /* Relocated data write required sync before BAT updatee */ - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); - + /* Relocated data write required sync before BAT updatee +* this will happen inside index_update */ if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) { preq->eng_state = PLOOP_E_DATA_WBI; plo->st.bio_out++; diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c index a6d83fe..d7ecd4a 100644 --- a/drivers/block/ploop/io_direct.c +++ b/drivers/block/ploop/io_direct.c @@ -90,21 +90,12 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq, trace_submit(preq); preflush = !!(rw & REQ_FLUSH); - - if (test_and_clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state)) - preflush = 1; - - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state)) - postfua = 1; - - if (!postfua && ploop_req_delay_fua_possible(rw, preq)) { - + postfua = !!(rw & REQ_FUA); + if (ploop_req_delay_fua_possible(rw, preq)) { /* Mark req that delayed flush required */ - set_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state); - } else if (rw & REQ_FUA) { - postfua = 1; + set_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state); + postfua = 0; } "postfua" is a horrible name, let us see if we can get rid of it completely. Also, the way how ploop_req_delay_fua_possible implemented is prone to errors (see below an issue in kaio_complete_io_state). Let's rework it like this: Let it be postflush. static inline bool ploop_req_delay_fua_possible(struct ploop_request *preq) { return preq->eng_state ==
[Devel] [PATCH 1/3] ploop: skip redundant fsync for REQ_FUA in post_submit
Signed-off-by: Dmitry Monakhov --- drivers/block/ploop/io_direct.c | 24 ++-- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c index b844a80..58d7580 100644 --- a/drivers/block/ploop/io_direct.c +++ b/drivers/block/ploop/io_direct.c @@ -517,27 +517,31 @@ dio_post_submit(struct ploop_io *io, struct ploop_request * preq) struct ploop_device *plo = preq->plo; sector_t sec = (sector_t)preq->iblock << preq->plo->cluster_log; loff_t clu_siz = 1 << (preq->plo->cluster_log + 9); + int force_sync = preq->req_rw & REQ_FUA; int err; file_start_write(io->files.file); - /* Here io->io_count is even ... */ - spin_lock_irq(&plo->lock); - io->io_count++; - set_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state); - spin_unlock_irq(&plo->lock); - + if (!force_sync) { + /* Here io->io_count is even ... */ + spin_lock_irq(&plo->lock); + io->io_count++; + set_bit(PLOOP_IO_FSYNC_DELAYED, &io->io_state); + spin_unlock_irq(&plo->lock); + } err = io->files.file->f_op->fallocate(io->files.file, FALLOC_FL_CONVERT_UNWRITTEN, (loff_t)sec << 9, clu_siz); /* highly unlikely case: FUA coming to a block not provisioned yet */ - if (!err && (preq->req_rw & REQ_FUA)) + if (!err && force_sync) err = io->ops->sync(io); - spin_lock_irq(&plo->lock); - io->io_count++; - spin_unlock_irq(&plo->lock); + if (!force_sync) { + spin_lock_irq(&plo->lock); + io->io_count++; + spin_unlock_irq(&plo->lock); + } /* and here io->io_count is even (+2) again. */ file_end_write(io->files.file); -- 1.8.3.1 ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH 3/3] ploop: fixup FORCE_{FLUSH,FUA} handling v3
barrier code is broken in many ways: Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly. But request also can goes though ->dio_submit_alloc()->dio_submit_pad and write_page (for indexes) So in case of grow_dev we have following sequance: E_RELOC_DATA_READ: ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); ->delta->allocate ->io->submit_allloc: dio_submit_alloc ->dio_submit_pad E_DATA_WBI : data written, time to update index ->delta->allocate_complete:ploop_index_update ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); ->write_page ->ploop_map_wb_complete ->ploop_wb_complete_post_process ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); E_RELOC_NULLIFY: ->submit() BUG#2: currecntly kaio write_page silently ignores REQ_FLUSH BUG#3: io_direct:dio_submit if fua_delay is not possible we MUST tag all bios via REQ_FUA not just latest one. This patch unify barrier handling like follows: - Get rid of FORCE_{FLUSH,FUA} - Introduce DELAYED_FLUSH - fix fua handling for dio_submit - BUG_ON for REQ_FLUSH in kaio_page_write This makes reloc sequence optimal: io_direct RELOC_S: R1, W2, WBI:FLUSH|FUA RELOC_A: R1, W2, WBI:FLUSH|FUA, W1:NULLIFY|FUA io_kaio RELOC_S: R1, W2:FUA, WBI:FUA RELOC_A: R1, W2:FUA, WBI:FUA, W1:NULLIFY|FUA https://jira.sw.ru/browse/PSBM-47107 Signed-off-by: Dmitry Monakhov --- drivers/block/ploop/dev.c | 8 +--- drivers/block/ploop/io_direct.c | 30 ++- drivers/block/ploop/io_kaio.c | 23 + drivers/block/ploop/map.c | 45 ++--- include/linux/ploop/ploop.h | 19 + 5 files changed, 60 insertions(+), 65 deletions(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 96f7850..fbc5f2f 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -1224,6 +1224,9 @@ static void ploop_complete_request(struct ploop_request * preq) __TRACE("Z %p %u\n", preq, preq->req_cluster); + if (!preq->error) { + WARN_ON(test_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state)); + } while (preq->bl.head) { struct bio * bio = preq->bl.head; preq->bl.head = bio->bi_next; @@ -2530,9 +2533,8 @@ restart: top_delta = ploop_top_delta(plo); sbl.head = sbl.tail = preq->aux_bio; - /* Relocated data write required sync before BAT updatee */ - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); - + /* Relocated data write required sync before BAT updatee +* this will happen inside index_update */ if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) { preq->eng_state = PLOOP_E_DATA_WBI; plo->st.bio_out++; diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c index a6d83fe..303eb70 100644 --- a/drivers/block/ploop/io_direct.c +++ b/drivers/block/ploop/io_direct.c @@ -83,28 +83,19 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq, int err; struct bio_list_walk bw; int preflush; - int postfua = 0; + int fua = 0; int write = !!(rw & REQ_WRITE); int bio_num; trace_submit(preq); preflush = !!(rw & REQ_FLUSH); - - if (test_and_clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state)) - preflush = 1; - - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state)) - postfua = 1; - - if (!postfua && ploop_req_delay_fua_possible(rw, preq)) { - + fua = !!(rw & REQ_FUA); + if (fua && ploop_req_delay_fua_possible(rw, preq)) { /* Mark req that delayed flush required */ - set_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state); - } else if (rw & REQ_FUA) { - postfua = 1; + set_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state); + fua = 0; } - rw &= ~(REQ_FLUSH | REQ_FUA); @@ -238,8 +229,10 @@ flush_bio: rw2 |= REQ_FLUSH; preflush = 0; } - if (unlikely(postfua && !bl.head)) - rw2 |= (REQ_FUA | ((bio_num) ? REQ_FLUSH : 0)); + /* Very unlikely, but correct. +* TODO: Optimize postfua via DELAY_FLUSH for any req state */ + if (unlikely(fua)) + rw2 |= REQ_FUA; ploop_acc_ff_out(preq->plo, rw2 | b->bi_rw); submit_bio(rw2, b); @@ -1520,15 +1513,14 @@ dio_read_page(struct ploop_io * io, struct ploop_request * preq, static void dio_write_page(struct ploop_io * io, struct ploop_request * preq, - struct page * page, sector_t sec, int fua) + st
[Devel] [PATCH 2/3] ploop: deadcode cleanup
(rw & REQ_FUA) branch is impossible because REQ_FUA was cleared line above. Logic was moved to ploop_req_delay_fua_possible() long time ago. Signed-off-by: Dmitry Monakhov --- drivers/block/ploop/io_direct.c | 9 - 1 file changed, 9 deletions(-) diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c index 58d7580..a6d83fe 100644 --- a/drivers/block/ploop/io_direct.c +++ b/drivers/block/ploop/io_direct.c @@ -108,15 +108,6 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq, rw &= ~(REQ_FLUSH | REQ_FUA); - /* In case of eng_state != COMPLETE, we'll do FUA in -* ploop_index_update(). Otherwise, we should mark -* last bio as FUA here. */ - if (rw & REQ_FUA) { - rw &= ~REQ_FUA; - if (preq->eng_state == PLOOP_E_COMPLETE) - postfua = 1; - } - bio_list_init(&bl); if (iblk == PLOOP_ZERO_INDEX) -- 1.8.3.1 ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
Re: [Devel] [PATCH] ve: mark DEF_PERMS feature deprecated
On 21.06.2016 14:23, Evgenii Shatokhin wrote: > "def_perms" is not mentioned in the man pages for prlctl and vzctl. > > VE_FEATURE_DEF_PERMS is only used in the kernel code as a part of > VE_FEATURES_DEF ("ve->features = VE_FEATURES_DEF;" in ve_create()). > No code checks if the bit for this feature is set in ve->features. > > Let us mark this feature deprecated, similar to SYSFS and IPGRE > features. > > https://jira.sw.ru/browse/PSBM-40280 > > Signed-off-by: Evgenii Shatokhin Reviewed-by: Kirill Tkhai > --- > include/uapi/linux/vzcalluser.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/include/uapi/linux/vzcalluser.h b/include/uapi/linux/vzcalluser.h > index 2b340cf..bc55bb3 100644 > --- a/include/uapi/linux/vzcalluser.h > +++ b/include/uapi/linux/vzcalluser.h > @@ -115,7 +115,7 @@ struct env_create_param3 { > > #define VE_FEATURE_SYSFS (1ULL << 0) /* deprecated */ > #define VE_FEATURE_NFS (1ULL << 1) > -#define VE_FEATURE_DEF_PERMS (1ULL << 2) > +#define VE_FEATURE_DEF_PERMS (1ULL << 2) /* deprecated */ > #define VE_FEATURE_SIT (1ULL << 3) > #define VE_FEATURE_IPIP (1ULL << 4) > #define VE_FEATURE_PPP (1ULL << 5) > ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH] ve: mark DEF_PERMS feature deprecated
"def_perms" is not mentioned in the man pages for prlctl and vzctl. VE_FEATURE_DEF_PERMS is only used in the kernel code as a part of VE_FEATURES_DEF ("ve->features = VE_FEATURES_DEF;" in ve_create()). No code checks if the bit for this feature is set in ve->features. Let us mark this feature deprecated, similar to SYSFS and IPGRE features. https://jira.sw.ru/browse/PSBM-40280 Signed-off-by: Evgenii Shatokhin --- include/uapi/linux/vzcalluser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/vzcalluser.h b/include/uapi/linux/vzcalluser.h index 2b340cf..bc55bb3 100644 --- a/include/uapi/linux/vzcalluser.h +++ b/include/uapi/linux/vzcalluser.h @@ -115,7 +115,7 @@ struct env_create_param3 { #define VE_FEATURE_SYSFS (1ULL << 0) /* deprecated */ #define VE_FEATURE_NFS (1ULL << 1) -#define VE_FEATURE_DEF_PERMS (1ULL << 2) +#define VE_FEATURE_DEF_PERMS (1ULL << 2) /* deprecated */ #define VE_FEATURE_SIT (1ULL << 3) #define VE_FEATURE_IPIP (1ULL << 4) #define VE_FEATURE_PPP (1ULL << 5) -- 2.7.3 ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
Re: [Devel] [TRD] Autofs migration
Disclaimer: not sending this into us...@openvz.org because external jira issue has not been created due to unknown reason. One more known issue: * "browse" autofs config option is not supported https://jira.sw.ru/browse/PSBM-48659 -- Best regards, Konstantin Khorenko, Virtuozzo Linux Kernel Team On 04/19/2016 12:03 PM, Stanislav Kinsburskiy wrote: 1. Feature Autofs mount points migration via CRIU https://jira.sw.ru/browse/PSBM-41217 2. Description CRIU now supports autofs file system migration, including direct, indirect and offset mount types. 3. Products Virtuozzo 7 Packages: criu-2.1.0.4.vz7 libvzctl-7.0.199 4. Testing 4.1 Basics ** Install criu and libvzctl rpm packages ** Create a container, and check ** Check, that autofs is listed in /proc/filesystems in the container ** Check, that /dev/autofs is accessible ** Install autofs package inside the container ** Follow autofs guide to create an autofs _direct_ mount point with some file system, mounted on top (tmpfs, for example). Command "man autofs" might help ** Follow autofs guide to create an autofs _indirect_ mount point with some file system, mounted on top (tmpfs, for example). ** Follow autofs guide to create an autofs _offset_ mount point with some file system, mounted on top (tmpfs, for example). ** Suspend and restore container ** Check, that autofs mounts and nested were mounts migrated successfully (via /proc, for example). 4.2 Systemd autofs services ** Start any systemds autofs service (for example, proc-sys-fs-binfmt_misc.automount) in the container ** Check, that service started successfully ** Suspend and restore container ** Check, that autofs and nested mount points were migrated successfully. ** Check, that systemd service has active status ** Unmount nested file system manually ** Access systemd autofs mount point and check, that nested file system is re-mounted again 4.3 Automount expiration ** setup autofs mount with short timeout (10 seconds, for example) in a container via any master: automount, systemd or else ** Activate autofs mount point (nested mount point should be mounted by autofs master) ** Migrate (or suspend/resume) the container. ** Check, that nested mount point is unmounted after restore within timeout. 5. Known issues Autofs migration has an issue, related to systemd-controlled autofs mount points. Systemd saves autofs mount point device number in it's internals and compare this number to actual one, taken from mount point, on each autofs request from kernel (mount, umount, expire, etc). The problem is that after migration all mount points are created manually and has _another_ device id, which leads to ignorance of kernel requests from systemd side. This problem can't be solved without some kind of "device namespaces" abstraction. However, some of the systemd services like proc-sys-fs-binfmt_misc.automount can be painlessly restarted after restore, thus illuminating this issue. Restart of proc-sys-fs-binfmt_misc.automount service is done by CRIU via action script, provided by vzctl. 6. What was checked by developer Both 4.1 and 4.2 test sequences 7. Feature owners skinsbur...@virtuozzo.com . ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [NEW KERNEL] 3.10.0-327.18.2.vz7.14.17 (rhel7)
Changelog: OpenVZ kernel rh7-3.10.0-327.18.2.vz7.14.17 * fix race between memory cgroup destruction and uncharge that might lead to kernel panic (OVZ-6756) * fix timer bug leading to long delays in userspace apps (PSBM-48475) * support for containers in machine.slice (PSBM-48629) Generated changelog: * Tue Jun 21 2016 Vladimir Davydov [3.10.0-327.18.2.vz7.14.17] - cgroup: fix path mangling for ve cgroups (Vladimir Davydov) [PSBM-48629] - cgroup: use cgroup_path_ve helper in cgroup_show_path (Vladimir Davydov) - Drop fairsched syscalls (Vladimir Davydov) - Use ve init task's css instead of opening cgroup via vfs (Vladimir Davydov) [PSBM-48629] - Drop VZCTL_ENV_CREATE (Vladimir Davydov) - Drop vz_compat boot param (Vladimir Davydov) - mm: memcontrol: fix race between kmem uncharge and charge reparenting (Vladimir Davydov) [OVZ-6756] - timers should not get negative argument (Vasily Averin) [PSBM-48475] Built packages: http://kojistorage.eng.sw.ru/packages/vzkernel/3.10.0/327.18.2.vz7.14.17/ ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
[Devel] [PATCH rh7] ve/fs: namespace -- Ignore device permissions during restore
To support several storage backends (ploops) inside container we've hacks in libvzctl which setup "old" permissions when restore procedure initiated. But the former idea was simply allow CRIU to do all the works and restore ploops mounts by its own (since CRIU fetches all mount options and such). For this sake we turn off mount options filtering provisionally if @is_pseudosuper is set, and CRIU restore mounts as regular ones. https://jira.sw.ru/browse/PSBM-48188 Signed-off-by: Cyrill Gorcunov CC: Igor Sukhih CC: Vladimir Davydov CC: Konstantin Khorenko --- fs/namespace.c |7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) Index: linux-pcs7.git/fs/namespace.c === --- linux-pcs7.git.orig/fs/namespace.c +++ linux-pcs7.git/fs/namespace.c @@ -1933,7 +1933,12 @@ again: if (devmnt->dev == dev) { err = ve_devmnt_check(data, devmnt->allowed_options); - if (!err && !remount) + /* +* In case of @is_pseudouser set, ie restore procedure, +* we don't check for allowed options filtering, since +* restore mode is special. +*/ + if ((ve->is_pseudosuper || !err) && !remount) err = ve_devmnt_insert(data, devmnt->hidden_options); break; ___ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel
Re: [Devel] [PATCH 3/3] ploop: fixup FORCE_{FLUSH,FUA} handling v2
Maxim Patlasov writes: > Dima, > > I agree with general approach of this patch, but there are some > (easy-to-fix) issues. See, please, inline comments below... > > On 06/20/2016 11:58 AM, Dmitry Monakhov wrote: >> barrier code is broken in many ways: >> Currently only ->dio_submit() handles PLOOP_REQ_FORCE_{FLUSH,FUA} correctly. >> But request also can goes though ->dio_submit_alloc()->dio_submit_pad and >> write_page (for indexes) >> So in case of grow_dev we have following sequance: >> >> E_RELOC_DATA_READ: >> ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); >>->delta->allocate >> ->io->submit_allloc: dio_submit_alloc >> ->dio_submit_pad >> E_DATA_WBI : data written, time to update index >>->delta->allocate_complete:ploop_index_update >> ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); >> ->write_page >> ->ploop_map_wb_complete >>->ploop_wb_complete_post_process >> ->set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); >> E_RELOC_NULLIFY: >> >> ->submit() >> >> BUG#2: currecntly kaio write_page silently ignores REQ_FUA > > Sorry, I can't agree, it actually does not ignore: I've misstyped. I ment to say REQ_FLUSH. > >> static void >> kaio_write_page(struct ploop_io * io, struct ploop_request * preq, >> struct page * page, sector_t sec, int fua) >> { >> /* No FUA in kaio, convert it to fsync */ >> if (fua) >> set_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state); > > >> BUG#3: io_direct:dio_submit if fua_delay is not possible we MUST tag all >> bios via REQ_FUA >> not just latest one. > > No need to tag *all*. See inline comments below. > >> This patch unify barrier handling like follows: >> - Get rid of FORCE_{FLUSH,FUA} >> - Introduce DELAYED_FLUSH, currecntly it supported only by io_direct >> - fix up fua handling for dio_submit >> >> This makes reloc sequence optimal: >> io_direct >> RELOC_S: R1, W2, WBI:FLUSH|FUA >> RELOC_A: R1, W2, WBI:FLUSH|FUA, W1:NULLIFY|FUA >> io_kaio >> RELOC_S: R1, W2:FUA, WBI:FUA >> RELOC_A: R1, W2:FUA, WBI:FUA, W1:NULLIFY|FUA >> >> https://jira.sw.ru/browse/PSBM-47107 >> Signed-off-by: Dmitry Monakhov >> --- >> drivers/block/ploop/dev.c | 8 +--- >> drivers/block/ploop/io_direct.c | 29 +- >> drivers/block/ploop/io_kaio.c | 17 ++-- >> drivers/block/ploop/map.c | 45 >> ++--- >> include/linux/ploop/ploop.h | 8 >> 5 files changed, 54 insertions(+), 53 deletions(-) >> >> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c >> index 96f7850..fbc5f2f 100644 >> --- a/drivers/block/ploop/dev.c >> +++ b/drivers/block/ploop/dev.c >> @@ -1224,6 +1224,9 @@ static void ploop_complete_request(struct >> ploop_request * preq) >> >> __TRACE("Z %p %u\n", preq, preq->req_cluster); >> >> +if (!preq->error) { >> +WARN_ON(test_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state)); >> +} >> while (preq->bl.head) { >> struct bio * bio = preq->bl.head; >> preq->bl.head = bio->bi_next; >> @@ -2530,9 +2533,8 @@ restart: >> top_delta = ploop_top_delta(plo); >> sbl.head = sbl.tail = preq->aux_bio; >> >> -/* Relocated data write required sync before BAT updatee */ >> -set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); >> - >> +/* Relocated data write required sync before BAT updatee >> + * this will happen inside index_update */ >> if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) { >> preq->eng_state = PLOOP_E_DATA_WBI; >> plo->st.bio_out++; >> diff --git a/drivers/block/ploop/io_direct.c >> b/drivers/block/ploop/io_direct.c >> index a6d83fe..d7ecd4a 100644 >> --- a/drivers/block/ploop/io_direct.c >> +++ b/drivers/block/ploop/io_direct.c >> @@ -90,21 +90,12 @@ dio_submit(struct ploop_io *io, struct ploop_request * >> preq, >> trace_submit(preq); >> >> preflush = !!(rw & REQ_FLUSH); >> - >> -if (test_and_clear_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state)) >> -preflush = 1; >> - >> -if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state)) >> -postfua = 1; >> - >> -if (!postfua && ploop_req_delay_fua_possible(rw, preq)) { >> - >> +postfua = !!(rw & REQ_FUA); >> +if (ploop_req_delay_fua_possible(rw, preq)) { >> /* Mark req that delayed flush required */ >> -set_bit(PLOOP_REQ_FORCE_FLUSH, &preq->state); >> -} else if (rw & REQ_FUA) { >> -postfua = 1; >> +set_bit(PLOOP_REQ_DELAYED_FLUSH, &preq->state); >> +postfua = 0; >> } > > "postfua" is a horrible name, let us see if we can get rid of it > completely. Also, the way how ploop_req_delay_fua_possible implemented > is prone to err