Re: [PATCH] nilfs2: add tracepoints for analyzing reading and writing metadata files
On Sun, Oct 4, 2015 at 10:33 PM, Ryusuke Konishi <konishi.ryus...@lab.ntt.co.jp> wrote: > On Sun, 4 Oct 2015 01:02:42 +0900, Mitake Hitoshi wrote: >> This patch adds tracepoints for analyzing requests of reading and >> writing metadata files. The tracepoints cover every in-place mdt files >> (cpfile, sufile, and datfile). >> >> Example of tracing mdt_insert_new_block(): >> cp-14635 [000] ...1 30598.199309: nilfs2_mdt_insert_new_block: >> inode = 88022a8d0178 ino = 3 block = 155 >> cp-14635 [000] ...1 30598.199520: nilfs2_mdt_insert_new_block: >> inode = 88022a8d0178 ino = 3 block = 5 >> cp-14635 [000] ...1 30598.200828: nilfs2_mdt_insert_new_block: >> inode = 88022a8d0178 ino = 3 block = 253 >> >> Cc: TK Kato <tk.k...@wdc.com> >> Signed-off-by: Hitoshi Mitake <mitake.hito...@lab.ntt.co.jp> > > Applied to the tracepoints branch. Thanks. Thanks, could you send the patches in the tracepoints branch to upstream? Thanks, Hitoshi > > Ryusuke Konishi > >> --- >> fs/nilfs2/mdt.c | 6 + >> include/trace/events/nilfs2.h | 54 >> +++ >> 2 files changed, 60 insertions(+) >> >> diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c >> index dee34d9..1125f40 100644 >> --- a/fs/nilfs2/mdt.c >> +++ b/fs/nilfs2/mdt.c >> @@ -33,6 +33,7 @@ >> #include "page.h" >> #include "mdt.h" >> >> +#include >> >> #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) >> >> @@ -68,6 +69,9 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned >> long block, >> set_buffer_uptodate(bh); >> mark_buffer_dirty(bh); >> nilfs_mdt_mark_dirty(inode); >> + >> + trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block); >> + >> return 0; >> } >> >> @@ -158,6 +162,8 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned >> long blkoff, >> get_bh(bh); >> submit_bh(mode, bh); >> ret = 0; >> + >> + trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode); >> out: >> get_bh(bh); >> *out_bh = bh; >> diff --git a/include/trace/events/nilfs2.h b/include/trace/events/nilfs2.h >> index 1b65ba6..c780581 100644 >> --- a/include/trace/events/nilfs2.h >> +++ b/include/trace/events/nilfs2.h >> @@ -162,6 +162,60 @@ TRACE_EVENT(nilfs2_segment_usage_freed, >> __entry->segnum) >> ); >> >> +TRACE_EVENT(nilfs2_mdt_insert_new_block, >> + TP_PROTO(struct inode *inode, >> + unsigned long ino, >> + unsigned long block), >> + >> + TP_ARGS(inode, ino, block), >> + >> + TP_STRUCT__entry( >> + __field(struct inode *, inode) >> + __field(unsigned long, ino) >> + __field(unsigned long, block) >> + ), >> + >> + TP_fast_assign( >> + __entry->inode = inode; >> + __entry->ino = ino; >> + __entry->block = block; >> + ), >> + >> + TP_printk("inode = %p ino = %lu block = %lu", >> + __entry->inode, >> + __entry->ino, >> + __entry->block) >> +); >> + >> +TRACE_EVENT(nilfs2_mdt_submit_block, >> + TP_PROTO(struct inode *inode, >> + unsigned long ino, >> + unsigned long blkoff, >> + int mode), >> + >> + TP_ARGS(inode, ino, blkoff, mode), >> + >> + TP_STRUCT__entry( >> + __field(struct inode *, inode) >> + __field(unsigned long, ino) >> + __field(unsigned long, blkoff) >> + __field(int, mode) >> + ), >> + >> + TP_fast_assign( >> + __entry->inode = inode; >> + __entry->ino = ino; >> + __entry->blkoff = blkoff; >> + __entry->mode = mode; >> + ), >> + >> + TP_printk("inode = %p ino = %lu blkoff = %lu mode = %x", >> + __entry->inode, >> + __entry->ino, >> + __entry->blkoff, >> + __entry->mode) >> +); >> + >> #endif /* _TRACE_NILFS2_H */ >> >> /* This part must be outside protection */ >> -- >> 1.9.1 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in >> the body of a message to majord...@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] nilfs2: add tracepoints for analyzing reading and writing metadata files
This patch adds tracepoints for analyzing requests of reading and writing metadata files. The tracepoints cover every in-place mdt files (cpfile, sufile, and datfile). Example of tracing mdt_insert_new_block(): cp-14635 [000] ...1 30598.199309: nilfs2_mdt_insert_new_block: inode = 88022a8d0178 ino = 3 block = 155 cp-14635 [000] ...1 30598.199520: nilfs2_mdt_insert_new_block: inode = 88022a8d0178 ino = 3 block = 5 cp-14635 [000] ...1 30598.200828: nilfs2_mdt_insert_new_block: inode = 88022a8d0178 ino = 3 block = 253 Cc: TK Kato <tk.k...@wdc.com> Signed-off-by: Hitoshi Mitake <mitake.hito...@lab.ntt.co.jp> --- fs/nilfs2/mdt.c | 6 + include/trace/events/nilfs2.h | 54 +++ 2 files changed, 60 insertions(+) diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index dee34d9..1125f40 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -33,6 +33,7 @@ #include "page.h" #include "mdt.h" +#include #define NILFS_MDT_MAX_RA_BLOCKS(16 - 1) @@ -68,6 +69,9 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, set_buffer_uptodate(bh); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); + + trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block); + return 0; } @@ -158,6 +162,8 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, get_bh(bh); submit_bh(mode, bh); ret = 0; + + trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode); out: get_bh(bh); *out_bh = bh; diff --git a/include/trace/events/nilfs2.h b/include/trace/events/nilfs2.h index 1b65ba6..c780581 100644 --- a/include/trace/events/nilfs2.h +++ b/include/trace/events/nilfs2.h @@ -162,6 +162,60 @@ TRACE_EVENT(nilfs2_segment_usage_freed, __entry->segnum) ); +TRACE_EVENT(nilfs2_mdt_insert_new_block, + TP_PROTO(struct inode *inode, +unsigned long ino, +unsigned long block), + + TP_ARGS(inode, ino, block), + + TP_STRUCT__entry( + __field(struct inode *, inode) + __field(unsigned long, ino) + __field(unsigned long, block) + ), + + TP_fast_assign( + __entry->inode = inode; + __entry->ino = ino; + __entry->block = block; + ), + + TP_printk("inode = %p ino = %lu block = %lu", + __entry->inode, + __entry->ino, + __entry->block) +); + +TRACE_EVENT(nilfs2_mdt_submit_block, + TP_PROTO(struct inode *inode, +unsigned long ino, +unsigned long blkoff, +int mode), + + TP_ARGS(inode, ino, blkoff, mode), + + TP_STRUCT__entry( + __field(struct inode *, inode) + __field(unsigned long, ino) + __field(unsigned long, blkoff) + __field(int, mode) + ), + + TP_fast_assign( + __entry->inode = inode; + __entry->ino = ino; + __entry->blkoff = blkoff; + __entry->mode = mode; + ), + + TP_printk("inode = %p ino = %lu blkoff = %lu mode = %x", + __entry->inode, + __entry->ino, + __entry->blkoff, + __entry->mode) +); + #endif /* _TRACE_NILFS2_H */ /* This part must be outside protection */ -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH tracepoints 0/2] refine coding style of the tracepoints branch
This patchset refines the tracepoints branch of nilfs tree (https://github.com/konis/nilfs2/tree/tracepoints). The change unifies TP_printk() style. The new style doesn't put ',' between fields. Ryusuke-san, could you replace the tracepoints branch with this patchset? Hitoshi Mitake (2): nilfs2: add a tracepoint for tracking stage transition of segment construction nilfs2: add a tracepoint for transaction events fs/nilfs2/segment.c | 104 +- fs/nilfs2/segment.h | 3 +- include/trace/events/nilfs2.h | 103 + 3 files changed, 188 insertions(+), 22 deletions(-) create mode 100644 include/trace/events/nilfs2.h -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH tracepoints 2/2] nilfs2: add a tracepoint for transaction events
This patch adds a tracepoint for transaction events of nilfs. With the tracepoint, these events can be tracked: begin, abort, commit, trylock, lock, and unlock. Basically, these events have corresponding functions e.g. begin event corresponds nilfs_transaction_begin(). The unlock event is an exception. It corresponds to the iteration in nilfs_transaction_lock(). Only one tracepoint is introcued: nilfs2_transaction_transition. The above events are distinguished with newly introduced enum. With this tracepoint, we can analyse a critical section of segment constructoin. Sample output by tpoint of perf-tools: cp-4457 [000] ...163.266220: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800bf5ccc58 count = 1 flags = 9 state = BEGIN cp-4457 [000] ...163.266221: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800bf5ccc58 count = 0 flags = 9 state = COMMIT cp-4457 [000] ...163.266221: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800bf5ccc58 count = 0 flags = 9 state = COMMIT segctord-4371 [001] ...168.261196: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800b889bdf8 count = 0 flags = 10 state = TRYLOCK segctord-4371 [001] ...168.261280: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800b889bdf8 count = 0 flags = 10 state = LOCK segctord-4371 [001] ...168.261877: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800b889bdf8 count = 1 flags = 10 state = BEGIN segctord-4371 [001] ...168.262116: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800b889bdf8 count = 0 flags = 18 state = COMMIT segctord-4371 [001] ...168.265032: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800b889bdf8 count = 0 flags = 18 state = UNLOCK segctord-4371 [001] ...1 132.376847: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800b889bdf8 count = 0 flags = 10 state = TRYLOCK This patch also does trivial cleaning of comma usage in collection stage transition event for consistent coding style. Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- fs/nilfs2/segment.c | 33 ++- include/trace/events/nilfs2.h | 53 +++ 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 0fcf8e7..1dd9330 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -213,11 +213,18 @@ int nilfs_transaction_begin(struct super_block *sb, { struct the_nilfs *nilfs; int ret = nilfs_prepare_segment_lock(ti); + struct nilfs_transaction_info *trace_ti; if (unlikely(ret 0)) return ret; - if (ret 0) + if (ret 0) { + trace_ti = current-journal_info; + + trace_nilfs2_transaction_transition(sb, trace_ti, + trace_ti-ti_count, trace_ti-ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; + } sb_start_intwrite(sb); @@ -228,6 +235,11 @@ int nilfs_transaction_begin(struct super_block *sb, ret = -ENOSPC; goto failed; } + + trace_ti = current-journal_info; + trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti-ti_count, + trace_ti-ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; failed: @@ -260,6 +272,8 @@ int nilfs_transaction_commit(struct super_block *sb) ti-ti_flags |= NILFS_TI_COMMIT; if (ti-ti_count 0) { ti-ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); return 0; } if (nilfs-ns_writer) { @@ -271,6 +285,9 @@ int nilfs_transaction_commit(struct super_block *sb) nilfs_segctor_do_flush(sci, 0); } up_read(nilfs-ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); + current-journal_info = ti-ti_save; if (ti-ti_flags NILFS_TI_SYNC) @@ -289,10 +306,15 @@ void nilfs_transaction_abort(struct super_block *sb) BUG_ON(ti == NULL || ti-ti_magic != NILFS_TI_MAGIC); if (ti-ti_count 0) { ti-ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); return; } up_read(nilfs-ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_ABORT
[PATCH tracepoints 1/2] nilfs2: add a tracepoint for tracking stage transition of segment construction
This patch adds a tracepoint for tracking stage transition of block collection in segment construction. With the tracepoint, we can analysis the behavior of segment construction in depth. It would be useful for bottleneck detection and debugging, etc. The tracepoint is created with the standard trace API of linux (like ext3, ext4, f2fs and btrfs). So we can analysis with existing tools easily. Of course, more detailed analysis will be possible if we can create nilfs specific analysis tools. Below is an example of event dump with Brendan Gregg's perf-tools (https://github.com/brendangregg/perf-tools). Time consumption between each stage can be obtained. $ sudo bin/tpoint nilfs2:nilfs2_collection_stage_transition Tracing nilfs2:nilfs2_collection_stage_transition. Ctrl-C to end. segctord-14875 [003] ...1 28311.067794: nilfs2_collection_stage_transition: sci = 8800ce6de000 stage = ST_INIT segctord-14875 [003] ...1 28311.068139: nilfs2_collection_stage_transition: sci = 8800ce6de000 stage = ST_GC segctord-14875 [003] ...1 28311.068139: nilfs2_collection_stage_transition: sci = 8800ce6de000 stage = ST_FILE segctord-14875 [003] ...1 28311.068486: nilfs2_collection_stage_transition: sci = 8800ce6de000 stage = ST_IFILE segctord-14875 [003] ...1 28311.068540: nilfs2_collection_stage_transition: sci = 8800ce6de000 stage = ST_CPFILE segctord-14875 [003] ...1 28311.068561: nilfs2_collection_stage_transition: sci = 8800ce6de000 stage = ST_SUFILE segctord-14875 [003] ...1 28311.068565: nilfs2_collection_stage_transition: sci = 8800ce6de000 stage = ST_DAT segctord-14875 [003] ...1 28311.068573: nilfs2_collection_stage_transition: sci = 8800ce6de000 stage = ST_SR segctord-14875 [003] ...1 28311.068574: nilfs2_collection_stage_transition: sci = 8800ce6de000 stage = ST_DONE For capturing transition correctly, this patch adds wrappers for the member scnt of nilfs_cstage. With this change, every transition of the stage can produce trace event in a correct manner. Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- fs/nilfs2/segment.c | 71 +++ fs/nilfs2/segment.h | 3 +- include/trace/events/nilfs2.h | 50 ++ 3 files changed, 103 insertions(+), 21 deletions(-) create mode 100644 include/trace/events/nilfs2.h diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a1a1916..0fcf8e7 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -76,6 +76,36 @@ enum { NILFS_ST_DONE, }; +#define CREATE_TRACE_POINTS +#include trace/events/nilfs2.h + +/* + * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are + * wrapper functions of stage count (nilfs_sc_info-sc_stage.scnt). Users of + * the variable must use them because transition of stage count must involve + * trace events (trace_nilfs2_collection_stage_transition). + * + * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't + * produce tracepoint events. It is provided just for making the intention + * clear. + */ +static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci) +{ + sci-sc_stage.scnt++; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt) +{ + sci-sc_stage.scnt = next_scnt; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci) +{ + return sci-sc_stage.scnt; +} + /* State flags of collection */ #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ @@ -1055,7 +1085,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) size_t ndone; int err = 0; - switch (sci-sc_stage.scnt) { + switch (nilfs_sc_cstage_get(sci)) { case NILFS_ST_INIT: /* Pre-processes */ sci-sc_stage.flags = 0; @@ -1064,7 +1094,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci-sc_nblk_inc = 0; sci-sc_curseg-sb_sum.flags = NILFS_SS_LOGBGN; if (mode == SC_LSEG_DSYNC) { - sci-sc_stage.scnt = NILFS_ST_DSYNC; + nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC); goto dsync_mode; } } @@ -1072,10 +1102,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci-sc_stage.dirty_file_ptr = NULL; sci-sc_stage.gc_inode_ptr = NULL; if (mode == SC_FLUSH_DAT) { - sci-sc_stage.scnt = NILFS_ST_DAT; + nilfs_sc_cstage_set(sci
Re: [PATCH tracepoints v2] nilfs2: add a tracepoint for transaction events
On Fri, Sep 26, 2014 at 12:55 AM, Ryusuke Konishi konishi.ryus...@lab.ntt.co.jp wrote: Hi, On Thu, 25 Sep 2014 22:20:32 +0900, Mitake Hitoshi wrote: This patch adds a tracepoint for transaction events of nilfs. With the tracepoint, these events can be tracked: begin, abort, commit, trylock, lock, and unlock. Basically, these events have corresponding functions e.g. begin event corresponds nilfs_transaction_begin(). The unlock event is an exception. It corresponds to the iteration in nilfs_transaction_lock(). Only one tracepoint is introcued: nilfs2_transaction_transition. The above events are distinguished with newly introduced enum. With this tracepoint, we can analyse a critical section of segment constructoin. Sample output by tpoint of perf-tools: cp-4457 [000] ...163.266220: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800bf5ccc58, count = 1, flags = 9 state = BEGIN cp-4457 [000] ...163.266221: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800bf5ccc58, count = 0, flags = 9 state = COMMIT cp-4457 [000] ...163.266221: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800bf5ccc58, count = 0, flags = 9 state = COMMIT segctord-4371 [001] ...168.261196: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 10 state = TRYLOCK segctord-4371 [001] ...168.261280: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 10 state = LOCK segctord-4371 [001] ...168.261877: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 1, flags = 10 state = BEGIN segctord-4371 [001] ...168.262116: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 18 state = COMMIT segctord-4371 [001] ...168.265032: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 18 state = UNLOCK segctord-4371 [001] ...1 132.376847: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 10 state = TRYLOCK Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- fs/nilfs2/segment.c | 33 ++- include/trace/events/nilfs2.h | 53 +++ 2 files changed, 85 insertions(+), 1 deletion(-) v2: fix a style problem diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 0fcf8e7..1dd9330 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -213,11 +213,18 @@ int nilfs_transaction_begin(struct super_block *sb, { struct the_nilfs *nilfs; int ret = nilfs_prepare_segment_lock(ti); + struct nilfs_transaction_info *trace_ti; if (unlikely(ret 0)) return ret; - if (ret 0) + if (ret 0) { + trace_ti = current-journal_info; + + trace_nilfs2_transaction_transition(sb, trace_ti, + trace_ti-ti_count, trace_ti-ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; + } sb_start_intwrite(sb); @@ -228,6 +235,11 @@ int nilfs_transaction_begin(struct super_block *sb, ret = -ENOSPC; goto failed; } + + trace_ti = current-journal_info; + trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti-ti_count, + trace_ti-ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; failed: @@ -260,6 +272,8 @@ int nilfs_transaction_commit(struct super_block *sb) ti-ti_flags |= NILFS_TI_COMMIT; if (ti-ti_count 0) { ti-ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); return 0; } if (nilfs-ns_writer) { @@ -271,6 +285,9 @@ int nilfs_transaction_commit(struct super_block *sb) nilfs_segctor_do_flush(sci, 0); } up_read(nilfs-ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); + current-journal_info = ti-ti_save; if (ti-ti_flags NILFS_TI_SYNC) @@ -289,10 +306,15 @@ void nilfs_transaction_abort(struct super_block *sb) BUG_ON(ti == NULL || ti-ti_magic != NILFS_TI_MAGIC); if (ti-ti_count 0) { ti-ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); return; } up_read(nilfs-ns_segctor_sem); + trace_nilfs2_transaction_transition
[PATCH tracepoints v3] nilfs2: add a tracepoint for transaction events
This patch adds a tracepoint for transaction events of nilfs. With the tracepoint, these events can be tracked: begin, abort, commit, trylock, lock, and unlock. Basically, these events have corresponding functions e.g. begin event corresponds nilfs_transaction_begin(). The unlock event is an exception. It corresponds to the iteration in nilfs_transaction_lock(). Only one tracepoint is introcued: nilfs2_transaction_transition. The above events are distinguished with newly introduced enum. With this tracepoint, we can analyse a critical section of segment constructoin. Sample output by tpoint of perf-tools: cp-4457 [000] ...163.266220: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800bf5ccc58 count = 1 flags = 9 state = BEGIN cp-4457 [000] ...163.266221: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800bf5ccc58 count = 0 flags = 9 state = COMMIT cp-4457 [000] ...163.266221: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800bf5ccc58 count = 0 flags = 9 state = COMMIT segctord-4371 [001] ...168.261196: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800b889bdf8 count = 0 flags = 10 state = TRYLOCK segctord-4371 [001] ...168.261280: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800b889bdf8 count = 0 flags = 10 state = LOCK segctord-4371 [001] ...168.261877: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800b889bdf8 count = 1 flags = 10 state = BEGIN segctord-4371 [001] ...168.262116: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800b889bdf8 count = 0 flags = 18 state = COMMIT segctord-4371 [001] ...168.265032: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800b889bdf8 count = 0 flags = 18 state = UNLOCK segctord-4371 [001] ...1 132.376847: nilfs2_transaction_transition: sb = 8802112b8800 ti = 8800b889bdf8 count = 0 flags = 10 state = TRYLOCK This patch also does trivial cleaning of comma usage in collection stage transition event for consistent coding style. Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- v3: fix a style problem in TP_printk() v2: fix a style problem fs/nilfs2/segment.c | 33 +- include/trace/events/nilfs2.h | 55 ++- 2 files changed, 86 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 0fcf8e7..1dd9330 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -213,11 +213,18 @@ int nilfs_transaction_begin(struct super_block *sb, { struct the_nilfs *nilfs; int ret = nilfs_prepare_segment_lock(ti); + struct nilfs_transaction_info *trace_ti; if (unlikely(ret 0)) return ret; - if (ret 0) + if (ret 0) { + trace_ti = current-journal_info; + + trace_nilfs2_transaction_transition(sb, trace_ti, + trace_ti-ti_count, trace_ti-ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; + } sb_start_intwrite(sb); @@ -228,6 +235,11 @@ int nilfs_transaction_begin(struct super_block *sb, ret = -ENOSPC; goto failed; } + + trace_ti = current-journal_info; + trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti-ti_count, + trace_ti-ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; failed: @@ -260,6 +272,8 @@ int nilfs_transaction_commit(struct super_block *sb) ti-ti_flags |= NILFS_TI_COMMIT; if (ti-ti_count 0) { ti-ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); return 0; } if (nilfs-ns_writer) { @@ -271,6 +285,9 @@ int nilfs_transaction_commit(struct super_block *sb) nilfs_segctor_do_flush(sci, 0); } up_read(nilfs-ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); + current-journal_info = ti-ti_save; if (ti-ti_flags NILFS_TI_SYNC) @@ -289,10 +306,15 @@ void nilfs_transaction_abort(struct super_block *sb) BUG_ON(ti == NULL || ti-ti_magic != NILFS_TI_MAGIC); if (ti-ti_count 0) { ti-ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); return; } up_read(nilfs-ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count
[PATCH tracepoints v2] nilfs2: add a tracepoint for transaction events
This patch adds a tracepoint for transaction events of nilfs. With the tracepoint, these events can be tracked: begin, abort, commit, trylock, lock, and unlock. Basically, these events have corresponding functions e.g. begin event corresponds nilfs_transaction_begin(). The unlock event is an exception. It corresponds to the iteration in nilfs_transaction_lock(). Only one tracepoint is introcued: nilfs2_transaction_transition. The above events are distinguished with newly introduced enum. With this tracepoint, we can analyse a critical section of segment constructoin. Sample output by tpoint of perf-tools: cp-4457 [000] ...163.266220: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800bf5ccc58, count = 1, flags = 9 state = BEGIN cp-4457 [000] ...163.266221: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800bf5ccc58, count = 0, flags = 9 state = COMMIT cp-4457 [000] ...163.266221: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800bf5ccc58, count = 0, flags = 9 state = COMMIT segctord-4371 [001] ...168.261196: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 10 state = TRYLOCK segctord-4371 [001] ...168.261280: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 10 state = LOCK segctord-4371 [001] ...168.261877: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 1, flags = 10 state = BEGIN segctord-4371 [001] ...168.262116: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 18 state = COMMIT segctord-4371 [001] ...168.265032: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 18 state = UNLOCK segctord-4371 [001] ...1 132.376847: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 10 state = TRYLOCK Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- fs/nilfs2/segment.c | 33 ++- include/trace/events/nilfs2.h | 53 +++ 2 files changed, 85 insertions(+), 1 deletion(-) v2: fix a style problem diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 0fcf8e7..1dd9330 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -213,11 +213,18 @@ int nilfs_transaction_begin(struct super_block *sb, { struct the_nilfs *nilfs; int ret = nilfs_prepare_segment_lock(ti); + struct nilfs_transaction_info *trace_ti; if (unlikely(ret 0)) return ret; - if (ret 0) + if (ret 0) { + trace_ti = current-journal_info; + + trace_nilfs2_transaction_transition(sb, trace_ti, + trace_ti-ti_count, trace_ti-ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; + } sb_start_intwrite(sb); @@ -228,6 +235,11 @@ int nilfs_transaction_begin(struct super_block *sb, ret = -ENOSPC; goto failed; } + + trace_ti = current-journal_info; + trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti-ti_count, + trace_ti-ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; failed: @@ -260,6 +272,8 @@ int nilfs_transaction_commit(struct super_block *sb) ti-ti_flags |= NILFS_TI_COMMIT; if (ti-ti_count 0) { ti-ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); return 0; } if (nilfs-ns_writer) { @@ -271,6 +285,9 @@ int nilfs_transaction_commit(struct super_block *sb) nilfs_segctor_do_flush(sci, 0); } up_read(nilfs-ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); + current-journal_info = ti-ti_save; if (ti-ti_flags NILFS_TI_SYNC) @@ -289,10 +306,15 @@ void nilfs_transaction_abort(struct super_block *sb) BUG_ON(ti == NULL || ti-ti_magic != NILFS_TI_MAGIC); if (ti-ti_count 0) { ti-ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); return; } up_read(nilfs-ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); + current-journal_info = ti-ti_save; if (ti-ti_flags
[PATCH tracepoints] nilfs2: add a tracepoint for transaction events
This patch adds a tracepoint for transaction events of nilfs. With the tracepoint, these events can be tracked: begin, abort, commit, trylock, lock, and unlock. Basically, these events have corresponding functions e.g. begin event corresponds nilfs_transaction_begin(). The unlock event is an exception. It corresponds to the iteration in nilfs_transaction_lock(). Only one tracepoint is introcued: nilfs2_transaction_transition. The above events are distinguished with newly introduced enum. With this tracepoint, we can analyse a critical section of segment constructoin. Sample output by tpoint of perf-tools: cp-4457 [000] ...163.266220: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800bf5ccc58, count = 1, flags = 9 state = BEGIN cp-4457 [000] ...163.266221: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800bf5ccc58, count = 0, flags = 9 state = COMMIT cp-4457 [000] ...163.266221: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800bf5ccc58, count = 0, flags = 9 state = COMMIT segctord-4371 [001] ...168.261196: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 10 state = TRYLOCK segctord-4371 [001] ...168.261280: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 10 state = LOCK segctord-4371 [001] ...168.261877: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 1, flags = 10 state = BEGIN segctord-4371 [001] ...168.262116: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 18 state = COMMIT segctord-4371 [001] ...168.265032: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 18 state = UNLOCK segctord-4371 [001] ...1 132.376847: nilfs2_transaction_transition: sb = 8802112b8800, ti = 8800b889bdf8, count = 0, flags = 10 state = TRYLOCK Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- fs/nilfs2/segment.c | 33 ++- include/trace/events/nilfs2.h | 53 +++ 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 0fcf8e7..1dd9330 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -213,11 +213,18 @@ int nilfs_transaction_begin(struct super_block *sb, { struct the_nilfs *nilfs; int ret = nilfs_prepare_segment_lock(ti); + struct nilfs_transaction_info *trace_ti; if (unlikely(ret 0)) return ret; - if (ret 0) + if (ret 0) { + trace_ti = current-journal_info; + + trace_nilfs2_transaction_transition(sb, trace_ti, + trace_ti-ti_count, trace_ti-ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; + } sb_start_intwrite(sb); @@ -228,6 +235,11 @@ int nilfs_transaction_begin(struct super_block *sb, ret = -ENOSPC; goto failed; } + + trace_ti = current-journal_info; + trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti-ti_count, + trace_ti-ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; failed: @@ -260,6 +272,8 @@ int nilfs_transaction_commit(struct super_block *sb) ti-ti_flags |= NILFS_TI_COMMIT; if (ti-ti_count 0) { ti-ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); return 0; } if (nilfs-ns_writer) { @@ -271,6 +285,9 @@ int nilfs_transaction_commit(struct super_block *sb) nilfs_segctor_do_flush(sci, 0); } up_read(nilfs-ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); + current-journal_info = ti-ti_save; if (ti-ti_flags NILFS_TI_SYNC) @@ -289,10 +306,15 @@ void nilfs_transaction_abort(struct super_block *sb) BUG_ON(ti == NULL || ti-ti_magic != NILFS_TI_MAGIC); if (ti-ti_count 0) { ti-ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); return; } up_read(nilfs-ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti-ti_count, + ti-ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); + current-journal_info = ti-ti_save; if (ti-ti_flags NILFS_TI_DYNAMIC_ALLOC
Re: [PATCH v3] nilfs2: add a tracepoint for tracking stage transition of segment construction
On Sun, Sep 14, 2014 at 1:09 PM, Ryusuke Konishi konishi.ryus...@lab.ntt.co.jp wrote: On Sun, 14 Sep 2014 00:14:36 +0900, Mitake Hitoshi wrote: This patch adds a tracepoint for tracking stage transition of block collection in segment construction. With the tracepoint, we can analysis the behavior of segment construction in depth. It would be useful for bottleneck detection and debugging, etc. The tracepoint is created with the standard trace API of linux (like ext3, ext4, f2fs and btrfs). So we can analysis with existing tools easily. Of course, more detailed analysis will be possible if we can create nilfs specific analysis tools. Below is an example of event dump with Brendan Gregg's perf-tools (https://github.com/brendangregg/perf-tools). Time consumption between each stage can be obtained. $ sudo bin/tpoint nilfs2:nilfs2_collection_stage_transition Tracing nilfs2:nilfs2_collection_stage_transition. Ctrl-C to end. segctord-14875 [003] ...1 28311.067794: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_INIT segctord-14875 [003] ...1 28311.068139: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_GC segctord-14875 [003] ...1 28311.068139: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_FILE segctord-14875 [003] ...1 28311.068486: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_IFILE segctord-14875 [003] ...1 28311.068540: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_CPFILE segctord-14875 [003] ...1 28311.068561: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_SUFILE segctord-14875 [003] ...1 28311.068565: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_DAT segctord-14875 [003] ...1 28311.068573: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_SR segctord-14875 [003] ...1 28311.068574: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_DONE For capturing transition correctly, this patch adds wrappers for the member scnt of nilfs_cstage. With this change, every transition of the stage can produce trace event in a correct manner. Of course the tracepoint added by this patch is very limited, so we need to add more points for detailed analysis. This patch is something like demonstration. If this concept is acceptable for the nilfs community, I'd like to add more tracepoints and prepare analysis tools. Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- fs/nilfs2/segment.c | 71 +++ fs/nilfs2/segment.h | 3 +- include/trace/events/nilfs2.h | 50 ++ 3 files changed, 103 insertions(+), 21 deletions(-) create mode 100644 include/trace/events/nilfs2.h v3: undo rename v2: correct the email address of author snip Looks good. I pushed out this patch as tracepoints branch of nilfs2.git[1]. If you hope it to be sent to upstream separately at this time, please let me know. (In that case, the following description should be revised to be ready for mainline merge). Of course the tracepoint added by this patch is very limited, so we need to add more points for detailed analysis. This patch is something like demonstration. If this concept is acceptable for the nilfs community, I'd like to add more tracepoints and prepare analysis tools. Otherwise, I'll keep it in the tracepoints branch for now. Thanks a lot for your review and pushing. The change for tracepoints doesn't need to be sent to upstream immediately. I'll enhance the feature for a while on the tracepoints branch. Thanks, Hitoshi [1] https://github.com/konis/nilfs2.git Thanks, Ryusuke Konishi -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] nilfs2: add a tracepoint for tracking stage transition of segment construction
On Fri, Sep 5, 2014 at 3:40 PM, Vyacheslav Dubeyko sl...@dubeyko.com wrote: On Fri, 2014-09-05 at 11:41 +0900, Hitoshi Mitake wrote: [snip] diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 38a1d00..1e9b931 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -66,13 +66,14 @@ struct nilfs_recovery_info { /** * struct nilfs_cstage - Context of collection stage - * @scnt: Stage count + * @__scnt: Stage count, must be accessed via wrappers: + * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() * @flags: State flags * @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file * @gc_inode_ptr: Pointer on the list of gc-inodes */ struct nilfs_cstage { - int scnt; + int __scnt; It doesn't make sense this change for my taste. As far as I can judge, you've made this change because scnt field should be accessed by means of wrappers. But __scnt name doesn't provide from direct using this field. So, I suppose that extended comment will be enough. OK, I'll rename it in v3. By the way, do you plan to add tracepoints for another NILFS2 subsystems? Yes. Thanks, Hitoshi Thanks, Vyacheslav Dubeyko. -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3] nilfs2: add a tracepoint for tracking stage transition of segment construction
This patch adds a tracepoint for tracking stage transition of block collection in segment construction. With the tracepoint, we can analysis the behavior of segment construction in depth. It would be useful for bottleneck detection and debugging, etc. The tracepoint is created with the standard trace API of linux (like ext3, ext4, f2fs and btrfs). So we can analysis with existing tools easily. Of course, more detailed analysis will be possible if we can create nilfs specific analysis tools. Below is an example of event dump with Brendan Gregg's perf-tools (https://github.com/brendangregg/perf-tools). Time consumption between each stage can be obtained. $ sudo bin/tpoint nilfs2:nilfs2_collection_stage_transition Tracing nilfs2:nilfs2_collection_stage_transition. Ctrl-C to end. segctord-14875 [003] ...1 28311.067794: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_INIT segctord-14875 [003] ...1 28311.068139: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_GC segctord-14875 [003] ...1 28311.068139: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_FILE segctord-14875 [003] ...1 28311.068486: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_IFILE segctord-14875 [003] ...1 28311.068540: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_CPFILE segctord-14875 [003] ...1 28311.068561: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_SUFILE segctord-14875 [003] ...1 28311.068565: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_DAT segctord-14875 [003] ...1 28311.068573: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_SR segctord-14875 [003] ...1 28311.068574: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_DONE For capturing transition correctly, this patch adds wrappers for the member scnt of nilfs_cstage. With this change, every transition of the stage can produce trace event in a correct manner. Of course the tracepoint added by this patch is very limited, so we need to add more points for detailed analysis. This patch is something like demonstration. If this concept is acceptable for the nilfs community, I'd like to add more tracepoints and prepare analysis tools. Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- fs/nilfs2/segment.c | 71 +++ fs/nilfs2/segment.h | 3 +- include/trace/events/nilfs2.h | 50 ++ 3 files changed, 103 insertions(+), 21 deletions(-) create mode 100644 include/trace/events/nilfs2.h v3: undo rename v2: correct the email address of author diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a1a1916..0fcf8e7 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -76,6 +76,36 @@ enum { NILFS_ST_DONE, }; +#define CREATE_TRACE_POINTS +#include trace/events/nilfs2.h + +/* + * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are + * wrapper functions of stage count (nilfs_sc_info-sc_stage.scnt). Users of + * the variable must use them because transition of stage count must involve + * trace events (trace_nilfs2_collection_stage_transition). + * + * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't + * produce tracepoint events. It is provided just for making the intention + * clear. + */ +static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci) +{ + sci-sc_stage.scnt++; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt) +{ + sci-sc_stage.scnt = next_scnt; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci) +{ + return sci-sc_stage.scnt; +} + /* State flags of collection */ #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ @@ -1055,7 +1085,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) size_t ndone; int err = 0; - switch (sci-sc_stage.scnt) { + switch (nilfs_sc_cstage_get(sci)) { case NILFS_ST_INIT: /* Pre-processes */ sci-sc_stage.flags = 0; @@ -1064,7 +1094,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci-sc_nblk_inc = 0; sci-sc_curseg-sb_sum.flags = NILFS_SS_LOGBGN; if (mode == SC_LSEG_DSYNC) { - sci-sc_stage.scnt = NILFS_ST_DSYNC; + nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC); goto dsync_mode; } } @@ -1072,10
Re: [PATCH RFC] nilfs2: add a tracepoint for tracking stage transition of segment construction
On Thu, Sep 4, 2014 at 3:23 AM, Ryusuke Konishi konishi.ryus...@lab.ntt.co.jp wrote: Hi Mitake-san, On Tue, 2 Sep 2014 21:19:39 +0900, Mitake Hitoshi wrote: From: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp This patch adds a tracepoint for tracking stage transition of block collection in segment construction. With the tracepoint, we can analysis the behavior of segment construction in depth. It would be useful for bottleneck detection and debugging, etc. The tracepoint is created with the standard trace API of linux (like ext3, ext4, f2fs and btrfs). So we can analysis with existing tools easily. Of course, more detailed analysis will be possible if we can create nilfs specific analysis tools. Below is an example of event dump with Brendan Gregg's perf-tools (https://github.com/brendangregg/perf-tools). Time consumption between each stage can be obtained. $ sudo bin/tpoint nilfs2:nilfs2_collection_stage_transition Tracing nilfs2:nilfs2_collection_stage_transition. Ctrl-C to end. segctord-14875 [003] ...1 28311.067794: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_INIT segctord-14875 [003] ...1 28311.068139: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_GC segctord-14875 [003] ...1 28311.068139: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_FILE segctord-14875 [003] ...1 28311.068486: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_IFILE segctord-14875 [003] ...1 28311.068540: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_CPFILE segctord-14875 [003] ...1 28311.068561: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_SUFILE segctord-14875 [003] ...1 28311.068565: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_DAT segctord-14875 [003] ...1 28311.068573: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_SR segctord-14875 [003] ...1 28311.068574: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_DONE For capturing transition correctly, this patch renames the member scnt of nilfs_cstage and adds wrappers for the member. With this change, every transition of the stage can produce trace event in a correct manner. Of course the tracepoint added by this patch is very limited, so we need to add more points for detailed analysis. This patch is something like demonstration. If this concept is acceptable for the nilfs community, I'd like to add more tracepoints and prepare analysis tools. Great! This tracepoint support looks to be what I wanted to introduce to nilfs2 to help debugging and performance analysis. I felt it's really nice after I tried this patch with the perf-tools though I am not familiar with the manner of the tracepoints. Could you proceed this work from what you think useful ? I will help sending this work to upstream step by step, and would like to extend it learning various tracepoint features. Sure, I'd like to work on improving the tracepoints for better analysis and debugging. By the way, your mail addresses differ between the author line (from line) and the sob line. Can you include a From line so that the mail addresses match between them. Sorry, I'll fix the mismatch in v2. Thanks for your pointing. Thanks, Hitoshi Thanks, Ryusuke Konishi Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- fs/nilfs2/segment.c | 70 ++- fs/nilfs2/segment.h | 5 ++-- include/trace/events/nilfs2.h | 50 +++ 3 files changed, 103 insertions(+), 22 deletions(-) create mode 100644 include/trace/events/nilfs2.h diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a1a1916..e841e22 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -76,6 +76,35 @@ enum { NILFS_ST_DONE, }; +#define CREATE_TRACE_POINTS +#include trace/events/nilfs2.h + +/* + * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are + * wrapper functions of stage count (nilfs_sc_info-sc_stage.__scnt). Users of + * the variable must use them because transition of stage count must involve + * trace events (trace_nilfs2_collection_stage_transition). + * + * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't + * produce events. It is provided just for making the intention clear. + */ +static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci) +{ + sci-sc_stage.__scnt++; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt) +{ + sci-sc_stage.__scnt = next_scnt; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci
[PATCH RFC] nilfs2: add a tracepoint for tracking stage transition of segment construction
From: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp This patch adds a tracepoint for tracking stage transition of block collection in segment construction. With the tracepoint, we can analysis the behavior of segment construction in depth. It would be useful for bottleneck detection and debugging, etc. The tracepoint is created with the standard trace API of linux (like ext3, ext4, f2fs and btrfs). So we can analysis with existing tools easily. Of course, more detailed analysis will be possible if we can create nilfs specific analysis tools. Below is an example of event dump with Brendan Gregg's perf-tools (https://github.com/brendangregg/perf-tools). Time consumption between each stage can be obtained. $ sudo bin/tpoint nilfs2:nilfs2_collection_stage_transition Tracing nilfs2:nilfs2_collection_stage_transition. Ctrl-C to end. segctord-14875 [003] ...1 28311.067794: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_INIT segctord-14875 [003] ...1 28311.068139: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_GC segctord-14875 [003] ...1 28311.068139: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_FILE segctord-14875 [003] ...1 28311.068486: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_IFILE segctord-14875 [003] ...1 28311.068540: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_CPFILE segctord-14875 [003] ...1 28311.068561: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_SUFILE segctord-14875 [003] ...1 28311.068565: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_DAT segctord-14875 [003] ...1 28311.068573: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_SR segctord-14875 [003] ...1 28311.068574: nilfs2_collection_stage_transition: sci = 8800ce6de000, stage = ST_DONE For capturing transition correctly, this patch renames the member scnt of nilfs_cstage and adds wrappers for the member. With this change, every transition of the stage can produce trace event in a correct manner. Of course the tracepoint added by this patch is very limited, so we need to add more points for detailed analysis. This patch is something like demonstration. If this concept is acceptable for the nilfs community, I'd like to add more tracepoints and prepare analysis tools. Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- fs/nilfs2/segment.c | 70 ++- fs/nilfs2/segment.h | 5 ++-- include/trace/events/nilfs2.h | 50 +++ 3 files changed, 103 insertions(+), 22 deletions(-) create mode 100644 include/trace/events/nilfs2.h diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a1a1916..e841e22 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -76,6 +76,35 @@ enum { NILFS_ST_DONE, }; +#define CREATE_TRACE_POINTS +#include trace/events/nilfs2.h + +/* + * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are + * wrapper functions of stage count (nilfs_sc_info-sc_stage.__scnt). Users of + * the variable must use them because transition of stage count must involve + * trace events (trace_nilfs2_collection_stage_transition). + * + * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't + * produce events. It is provided just for making the intention clear. + */ +static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci) +{ + sci-sc_stage.__scnt++; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt) +{ + sci-sc_stage.__scnt = next_scnt; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci) +{ + return sci-sc_stage.__scnt; +} + /* State flags of collection */ #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ @@ -1055,7 +1084,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) size_t ndone; int err = 0; - switch (sci-sc_stage.scnt) { + switch (nilfs_sc_cstage_get(sci)) { case NILFS_ST_INIT: /* Pre-processes */ sci-sc_stage.flags = 0; @@ -1064,7 +1093,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci-sc_nblk_inc = 0; sci-sc_curseg-sb_sum.flags = NILFS_SS_LOGBGN; if (mode == SC_LSEG_DSYNC) { - sci-sc_stage.scnt = NILFS_ST_DSYNC; + nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC); goto dsync_mode
Re: [PATCH] lib/cleaner_exec.c: get process ID of cleanerd through pipe
At Wed, 22 Jan 2014 22:35:20 +0900, Ryusuke Konishi wrote: After applying commit 8ce4b524a1654ece478f54fce772cc0c16e3c559 cleanerd: call _exit(2) twice for ensuring not being a session leader, the mount helper program of nilfs2 no longer gets proper process ID (pid) of cleaner daemon, and umount.nilfs2 fails to kill cleanerd. This fixes the issue by letting cleanerd print out the pid of spawned daemon and letting nilfs_launch_cleanerd() function read it through pipe. Signed-off-by: Ryusuke Konishi konishi.ryus...@lab.ntt.co.jp Cc: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp Looks good to me and sorry for my bug! Reviewed-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp Thanks, Hitoshi --- lib/cleaner_exec.c | 84 ++ man/nilfs_cleanerd.8 |3 ++ sbin/cleanerd/cleanerd.c |3 ++ 3 files changed, 83 insertions(+), 7 deletions(-) diff --git a/lib/cleaner_exec.c b/lib/cleaner_exec.c index edc55e3..5336c32 100644 --- a/lib/cleaner_exec.c +++ b/lib/cleaner_exec.c @@ -114,6 +114,43 @@ static inline int process_is_alive(pid_t pid) return (kill(pid, 0) == 0); } +static int receive_pid(int fd, pid_t *ppid) +{ + char buf[100]; + unsigned long pid; + FILE *fp; + int ret; + + fp = fdopen(fd, r); + if (!fp) { + nilfs_cleaner_logger( + LOG_ERR, _(Error: fdopen failed: %m)); + close(fd); + goto fail; + } + + /* read process ID of cleanerd through the given fd */ + while (fgets(buf, sizeof(buf), fp) != NULL) { + /* + * fgets() is blocked during no child processes + * respond, but it will escape returning a NULL value + * and terminate the loop when all child processes + * close the given pipe (fd) including call of exit(). + */ + ret = sscanf(buf, NILFS_CLEANERD_PID=%lu, pid); + if (ret == 1) { + *ppid = pid; + fclose(fp); /* this also closes fd */ + return 0; + } + } + fclose(fp); +fail: + nilfs_cleaner_logger( + LOG_WARNING, _(Warning: cannot get pid of cleanerd)); + return -1; +} + int nilfs_launch_cleanerd(const char *device, const char *mntdir, unsigned long protperiod, pid_t *ppid) { @@ -123,6 +160,7 @@ int nilfs_launch_cleanerd(const char *device, const char *mntdir, int i = 0; int ret; char buf[256]; + int pipes[2]; if (stat(cleanerd, statbuf) != 0) { nilfs_cleaner_logger(LOG_ERR, _(Error: %s not found), @@ -130,8 +168,16 @@ int nilfs_launch_cleanerd(const char *device, const char *mntdir, return -1; } + ret = pipe(pipes); + if (ret 0) { + nilfs_cleaner_logger( + LOG_ERR, _(Error: failed to create pipe: %m)); + return -1; + } + ret = fork(); if (ret == 0) { + /* child */ if (setgid(getgid()) 0) { nilfs_cleaner_logger( LOG_ERR, @@ -159,16 +205,40 @@ int nilfs_launch_cleanerd(const char *device, const char *mntdir, sigdelset(sigs, SIGSEGV); sigprocmask(SIG_UNBLOCK, sigs, NULL); + close(pipes[0]); + ret = dup2(pipes[1], STDOUT_FILENO); + if (ret 0) { + nilfs_cleaner_logger( + LOG_ERR, + _(Error: failed to duplicate pipe: %m)); + exit(1); + } + close(pipes[1]); + execv(cleanerd, (char **)dargs); exit(1); /* reach only if failed */ - } else if (ret != -1) { - *ppid = ret; - return 0; /* cleanerd started */ + } else if (ret 0) { + /* parent */ + close(pipes[1]); + + /* + * fork() returns a pid of the child process, but we + * cannot use it because cleanerd internally fork()s + * and changes pid. + */ + ret = receive_pid(pipes[0], ppid); + if (ret 0) + *ppid = 0; + /* + * always return a success code because cleanerd has + * already started. + */ + return 0; } else { - int errsv = errno; - nilfs_cleaner_logger(LOG_ERR, - _(Error: could not fork: %s), - strerror(errsv)); + nilfs_cleaner_logger( + LOG_ERR, _(Error: could not fork: %m)); + close(pipes[0]); + close(pipes[1
Re: [PATCH] mkfs: check sizes of important structs at build time
At Sat, 4 Jan 2014 18:39:58 +0300, Vyacheslav Dubeyko wrote: On Jan 4, 2014, at 4:54 PM, Hitoshi Mitake wrote: On Sat, Jan 4, 2014 at 11:52 PM, Vyacheslav Dubeyko sl...@dubeyko.com wrote: On Jan 4, 2014, at 4:29 PM, Hitoshi Mitake wrote: Current nilfs_check_ondisk_sizes() checks sizes of important structs at run time. The checking should be done at build time. This patch adds a new macro, BUILD_BUG_ON(), for this purpose. It is similar to static_assert() of C++11. If an argument is true, the macro causes a bulid error. Below is an example of BUILD_BUG_ON(). When the checked conditions are true like below: /* intentional change for testing BUILD_BUG_ON() */ static __attribute__((used)) void nilfs_check_ondisk_sizes(void) { BUILD_BUG_ON(sizeof(struct nilfs_inode) NILFS_MIN_BLOCKSIZE); So, why do we need to have function for the case of checking on compilation phase? Just for excluding the checking from other part of code and improve readability. I think that we can have only macro instead of the function nilfs_check_ondisk_sizes(). And this macros can be placed in the begin of main() call. I think that it will be enough for the compilation phase check. Ah, I see. I suppose that we need to have some run-time check anyway. Your approach is correct for the current state of the code. But I feel a necessity in run-time check anyway. Maybe it looks like a paranoia. :) Maybe it needs to extend checking in this place. Do you mean both of the build time check and the run time check? If so, I agree with your opinion. I'll send v2 based on this policy. I mean that block size can be different during volume creation and maybe it makes sense to extend a block size related checking for run-time phase. That's all. But right now I haven't any concrete suggestions. Currently, the check is comparison between sizes of important structs and the minimal block size. So we don't need a function for it. I think adding the function for runtime checking when nilfs requires it would be enough. Thanks, Hitoshi -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mkfs: check sizes of important structs at build time
At Sun, 05 Jan 2014 01:08:43 +0900 (JST), Ryusuke Konishi wrote: On Sat, 4 Jan 2014 22:29:31 +0900, Hitoshi Mitake wrote: Current nilfs_check_ondisk_sizes() checks sizes of important structs at run time. The checking should be done at build time. This patch adds a new macro, BUILD_BUG_ON(), for this purpose. It is similar to static_assert() of C++11. If an argument is true, the macro causes a bulid error. Below is an example of BUILD_BUG_ON(). When the checked conditions are true like below: /* intentional change for testing BUILD_BUG_ON() */ static __attribute__((used)) void nilfs_check_ondisk_sizes(void) { BUILD_BUG_ON(sizeof(struct nilfs_inode) NILFS_MIN_BLOCKSIZE); ... build process of mkfs.o causes errors like this: gcc -std=gnu99 -DHAVE_CONFIG_H -I. -I../.. -I../../include -Wall -g -O2 -MT mkfs.o -MD -MP -MF .deps/mkfs.Tpo -c -o mkfs.o mkfs.c mkfs.c: In function 'nilfs_check_ondisk_sizes': mkfs.c:429:2: error: negative width in bit-field 'anonymous' mkfs.c:430:2: error: negative width in bit-field 'anonymous' mkfs.c:431:2: error: negative width in bit-field 'anonymous' mkfs.c:432:2: error: negative width in bit-field 'anonymous' mkfs.c:433:2: error: negative width in bit-field 'anonymous' mkfs.c:434:2: error: negative width in bit-field 'anonymous' mkfs.c:435:2: error: negative width in bit-field 'anonymous' Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp This is an interesting patch. I am inclined to apply this since the every test in the nilfs_check_ondisk_sizes function is static. If we will add a new check that depends on block size in a future, we need to add a separate runtime check function as Vyacheslav wrote, but I think you are doing right thing. One my question is why you used bit operator. The BUILD_BUG_ON marcro of kernel is implemented with negative array index. Is there any reason for this ? If I remember correctly, I found in the BUILD_BUG_ON() in the code of Xen. I don't have any opinion about how we implement the check. If you like the way of array with negative length, I will employ it in v2. BTW, there is another approach of the implementation. #define static_assert(a, b) do { switch (0) case 0: case (a): ; } while (0) # from xv6: http://pdos.csail.mit.edu/6.828/2012/xv6.html This duplicated case of switch statement can be used to implement BUILD_BUG_ON() (the b can be used as an error message). Which one do you like? Thanks, Hitoshi -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH nilfs-utils 1/2] cleanerd: call _exit(2) twice for ensuring not being a session leader
At Sat, 04 Jan 2014 23:28:06 +0900 (JST), Ryusuke Konishi wrote: Hi, On Sat, 4 Jan 2014 22:18:00 +0900, Hitoshi Mitake wrote: On Wed, Jan 1, 2014 at 6:30 PM, Ryusuke Konishi konishi.ryus...@lab.ntt.co.jp wrote: On Wed, 1 Jan 2014 16:30:48 +0900, Hitoshi Mitake wrote: Current daemonize() function of cleanerd call _exit(2) only once during its process of becoming a daemon process. But in the linux environment, a daemon process should call _exit(2) twice for ensuring not being a session leader. If a process don't do that, unexpected SIGHUP can be sent to the process (though it happens rarely). The signal would be confusing event for cleanerd of nilfs. This patch removes this potential problem. Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- sbin/cleanerd/cleanerd.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/sbin/cleanerd/cleanerd.c b/sbin/cleanerd/cleanerd.c index 26067bd..edfa083 100644 --- a/sbin/cleanerd/cleanerd.c +++ b/sbin/cleanerd/cleanerd.c @@ -676,6 +676,16 @@ static int daemonize(int nochdir, int noclose, int nofork) /* umask(0); */ + /* for ensuring I'm not a session leader */ + if (!nofork) { + pid = fork(); + if (pid 0) + return -1; + else if (pid != 0) + /* parent */ + _exit(0); + } + I tried your patch, but the cleaner daemon still was a session leader. Thanks for your review and testing. This turned out because nilfs_cleanerd is usually executed by mount.nilfs2 program with the nofork option (-n). To fix this problem, it looks like the above !nofork check of the second fork() should be removed even though it becomes confusing. In that case, we may need to add some explanation why fork() should be called even if nofork is specified. For ensuring not being a session leader, fork() should be called twice. Removing the second condition of !nofork is not enough. For this purpose, we need to remove both of the conditions of !nofork. Yes, I supposed here that the caller (the mount helper program) already did a fork() call when -n option is specified. But, anyway, removing only the latter check of !nofork isn't a good idea. It's a hacky. BTW, what is an intention of -n option of cleanerd? I read the code of nilfs_launch_cleanerd() but couldn't understand the reason of this option. This is an option just to avoid fork doubly when mount.nilfs2 already did a fork(). If this option is aiming to reduce calling of fork(), I think this can be eliminated. Calling 3 fork()s (1 in mount.nilfs2, 2 in cleanerd) would be acceptable. Okay, accepting 3 forks()s seems reasonable. So, how about changing both programs as follow? 1) Change cleanerd to simply ignore -n option as a historical option (remove the existing !nofork check). 2) Change cleanerd always fork twice to ensure that it will not be a session leader. 3) Change cleaner_exec.c not to add -n option. I agree with the above 3 policies. I'll send v2 based on them later. Thanks, Hitoshi -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nilfs-utils v2 1/4] cleanerd: ignore nofork option
The nofork option aims to reduce call of fork(), but it is not effective. This patch lets cleanerd ignore the option simply even if it is passed. Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- sbin/cleanerd/cleanerd.c | 28 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/sbin/cleanerd/cleanerd.c b/sbin/cleanerd/cleanerd.c index e1f6a04..0b5bb70 100644 --- a/sbin/cleanerd/cleanerd.c +++ b/sbin/cleanerd/cleanerd.c @@ -113,7 +113,7 @@ do {\ const static struct option long_option[] = { {conffile, required_argument, NULL, 'c'}, {help, no_argument, NULL, 'h'}, - /* internal option for mount.nilfs2 only */ + /* nofork option is obsolete. It does nothing even if passed */ {nofork, no_argument, NULL, 'n'}, {protection-period, required_argument, NULL, 'p'}, {version, no_argument, NULL, 'V'}, @@ -691,20 +691,18 @@ static int oom_adjust(void) #define DEVNULL/dev/null #define ROOTDIR/ -static int daemonize(int nochdir, int noclose, int nofork) +static int daemonize(int nochdir, int noclose) { pid_t pid; - if (!nofork) { - pid = fork(); - if (pid 0) - return -1; - else if (pid != 0) - /* parent */ - _exit(0); - } + pid = fork(); + if (pid 0) + return -1; + else if (pid != 0) + /* parent */ + _exit(0); - /* child or nofork */ + /* child */ if (setsid() 0) return -1; @@ -1491,7 +1489,7 @@ int main(int argc, char *argv[]) char canonical[PATH_MAX + 2]; const char *dev, *dir; char *endptr; - int status, nofork, c; + int status, c; #ifdef _GNU_SOURCE int option_index; #endif /* _GNU_SOURCE */ @@ -1499,7 +1497,6 @@ int main(int argc, char *argv[]) progname = (strrchr(argv[0], '/') != NULL) ? strrchr(argv[0], '/') + 1 : argv[0]; conffile = NILFS_CLEANERD_CONFFILE; - nofork = 0; status = 0; protection_period = ULONG_MAX; dev = NULL; @@ -1520,8 +1517,7 @@ int main(int argc, char *argv[]) nilfs_cleanerd_usage(progname); exit(0); case 'n': - /* internal option for mount.nilfs2 only */ - nofork = 1; + /* ignore nofork option, do nothing */ break; case 'p': protection_period = strtoul(optarg, endptr, 10); @@ -1568,7 +1564,7 @@ int main(int argc, char *argv[]) } } - if (daemonize(0, 0, nofork) 0) { + if (daemonize(0, 0) 0) { fprintf(stderr, %s: %s\n, progname, strerror(errno)); exit(1); } -- 1.8.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nilfs-utils v2 3/4] mount: don't pass -n option to cleanerd
This patch lets mount.nilfs2 not pass the -n option to cleanerd, because it is obsoleted. Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- lib/cleaner_exec.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/cleaner_exec.c b/lib/cleaner_exec.c index 299451c..edc55e3 100644 --- a/lib/cleaner_exec.c +++ b/lib/cleaner_exec.c @@ -75,7 +75,6 @@ #define WAIT_CLEANERD_RETRY_TIMEOUT8 /* in seconds */ static const char cleanerd[] = /sbin/ NILFS_CLEANERD_NAME; -static const char cleanerd_nofork_opt[] = -n; static const char cleanerd_protperiod_opt[] = -p; static void default_logger(int priority, const char *fmt, ...) @@ -118,7 +117,7 @@ static inline int process_is_alive(pid_t pid) int nilfs_launch_cleanerd(const char *device, const char *mntdir, unsigned long protperiod, pid_t *ppid) { - const char *dargs[7]; + const char *dargs[6]; struct stat statbuf; sigset_t sigs; int i = 0; @@ -146,7 +145,6 @@ int nilfs_launch_cleanerd(const char *device, const char *mntdir, exit(1); } dargs[i++] = cleanerd; - dargs[i++] = cleanerd_nofork_opt; if (protperiod != ULONG_MAX) { dargs[i++] = cleanerd_protperiod_opt; snprintf(buf, sizeof(buf), %lu, protperiod); -- 1.8.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nilfs-utils v2 2/4] cleanerd: call _exit(2) twice for ensuring not being a session leader
Current daemonize() function of cleanerd call _exit(2) only once during its process of becoming a daemon process. But in the linux environment, a daemon should call _exit(2) twice for ensuring not being a session leader. If a process don't do that, unexpected SIGHUP can be sent to the process (though it happens rarely). The signal would be confusing event for cleanerd of nilfs. This patch removes this potential problem. Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- sbin/cleanerd/cleanerd.c | 8 1 file changed, 8 insertions(+) diff --git a/sbin/cleanerd/cleanerd.c b/sbin/cleanerd/cleanerd.c index 0b5bb70..86dfcf7 100644 --- a/sbin/cleanerd/cleanerd.c +++ b/sbin/cleanerd/cleanerd.c @@ -708,6 +708,14 @@ static int daemonize(int nochdir, int noclose) /* umask(0); */ + /* for ensuring I'm not a session leader */ + pid = fork(); + if (pid 0) + return -1; + else if (pid != 0) + /* parent */ + _exit(0); + if (!nochdir (chdir(ROOTDIR) 0)) return -1; -- 1.8.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH nilfs-utils v2 2/4] cleanerd: call _exit(2) twice for ensuring not being a session leader
At Sun, 5 Jan 2014 22:44:41 +0400, Сергей Александров wrote: [1 text/plain; KOI8-R (quoted-printable)] Hello, May be it makes sense to use pid files in /var/run folder with special names like nilfs_cleanerd_dev.pid (nilfs_cleanerd_sda1.pid for instance) for this purpose? But it looks a little bit over engineered and may also have some issues while mounting from initrd. Regards, Aleksandrov Sergey OKay, I'll try to solve the problem with a way of pidfile or other methods. If we can't find a suitable way, let's revert the series. Thanks, Hitoshi -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH nilfs-utils 1/2] cleanerd: call _exit(2) twice for ensuring not being a session leader
On Wed, Jan 1, 2014 at 6:30 PM, Ryusuke Konishi konishi.ryus...@lab.ntt.co.jp wrote: On Wed, 1 Jan 2014 16:30:48 +0900, Hitoshi Mitake wrote: Current daemonize() function of cleanerd call _exit(2) only once during its process of becoming a daemon process. But in the linux environment, a daemon process should call _exit(2) twice for ensuring not being a session leader. If a process don't do that, unexpected SIGHUP can be sent to the process (though it happens rarely). The signal would be confusing event for cleanerd of nilfs. This patch removes this potential problem. Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- sbin/cleanerd/cleanerd.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/sbin/cleanerd/cleanerd.c b/sbin/cleanerd/cleanerd.c index 26067bd..edfa083 100644 --- a/sbin/cleanerd/cleanerd.c +++ b/sbin/cleanerd/cleanerd.c @@ -676,6 +676,16 @@ static int daemonize(int nochdir, int noclose, int nofork) /* umask(0); */ + /* for ensuring I'm not a session leader */ + if (!nofork) { + pid = fork(); + if (pid 0) + return -1; + else if (pid != 0) + /* parent */ + _exit(0); + } + I tried your patch, but the cleaner daemon still was a session leader. Thanks for your review and testing. This turned out because nilfs_cleanerd is usually executed by mount.nilfs2 program with the nofork option (-n). To fix this problem, it looks like the above !nofork check of the second fork() should be removed even though it becomes confusing. In that case, we may need to add some explanation why fork() should be called even if nofork is specified. For ensuring not being a session leader, fork() should be called twice. Removing the second condition of !nofork is not enough. For this purpose, we need to remove both of the conditions of !nofork. BTW, what is an intention of -n option of cleanerd? I read the code of nilfs_launch_cleanerd() but couldn't understand the reason of this option. If this option is aiming to reduce calling of fork(), I think this can be eliminated. Calling 3 fork()s (1 in mount.nilfs2, 2 in cleanerd) would be acceptable. Thanks, Hitoshi -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] mkfs: check sizes of important structs at build time
Current nilfs_check_ondisk_sizes() checks sizes of important structs at run time. The checking should be done at build time. This patch adds a new macro, BUILD_BUG_ON(), for this purpose. It is similar to static_assert() of C++11. If an argument is true, the macro causes a bulid error. Below is an example of BUILD_BUG_ON(). When the checked conditions are true like below: /* intentional change for testing BUILD_BUG_ON() */ static __attribute__((used)) void nilfs_check_ondisk_sizes(void) { BUILD_BUG_ON(sizeof(struct nilfs_inode) NILFS_MIN_BLOCKSIZE); ... build process of mkfs.o causes errors like this: gcc -std=gnu99 -DHAVE_CONFIG_H -I. -I../.. -I../../include -Wall -g -O2 -MT mkfs.o -MD -MP -MF .deps/mkfs.Tpo -c -o mkfs.o mkfs.c mkfs.c: In function 'nilfs_check_ondisk_sizes': mkfs.c:429:2: error: negative width in bit-field 'anonymous' mkfs.c:430:2: error: negative width in bit-field 'anonymous' mkfs.c:431:2: error: negative width in bit-field 'anonymous' mkfs.c:432:2: error: negative width in bit-field 'anonymous' mkfs.c:433:2: error: negative width in bit-field 'anonymous' mkfs.c:434:2: error: negative width in bit-field 'anonymous' mkfs.c:435:2: error: negative width in bit-field 'anonymous' Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- sbin/mkfs/mkfs.c | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sbin/mkfs/mkfs.c b/sbin/mkfs/mkfs.c index 4e153ce..8eb00bf 100644 --- a/sbin/mkfs/mkfs.c +++ b/sbin/mkfs/mkfs.c @@ -85,6 +85,9 @@ typedef __u64 blocknr_t; #define BUG_ON(x) assert(!(x)) +/* Force a compilation error if the condition is true */ +#define BUILD_BUG_ON(condition) ((void)sizeof(struct { int: -!!(condition); })) + #define ROUNDUP_DIV(n, m) (((n) - 1) / (m) + 1) #define max_t(type, x, y) \ ({ type __x = (x); type __y = (y); __x __y ? __x : __y; }) @@ -417,16 +420,15 @@ static unsigned count_dat_blocks(unsigned nr_dat_entries) return nblocks; } -static void nilfs_check_ondisk_sizes(void) +static __attribute__((used)) void nilfs_check_ondisk_sizes(void) { - if (sizeof(struct nilfs_inode) NILFS_MIN_BLOCKSIZE || - sizeof(struct nilfs_sufile_header) NILFS_MIN_BLOCKSIZE || - sizeof(struct nilfs_segment_usage) NILFS_MIN_BLOCKSIZE || - sizeof(struct nilfs_cpfile_header) NILFS_MIN_BLOCKSIZE || - sizeof(struct nilfs_checkpoint) NILFS_MIN_BLOCKSIZE || - sizeof(struct nilfs_dat_entry) NILFS_MIN_BLOCKSIZE || - sizeof(struct nilfs_super_root) NILFS_MIN_BLOCKSIZE) - perr(Internal error: too large on-disk structure); + BUILD_BUG_ON(sizeof(struct nilfs_inode) NILFS_MIN_BLOCKSIZE); + BUILD_BUG_ON(sizeof(struct nilfs_sufile_header) NILFS_MIN_BLOCKSIZE); + BUILD_BUG_ON(sizeof(struct nilfs_segment_usage) NILFS_MIN_BLOCKSIZE); + BUILD_BUG_ON(sizeof(struct nilfs_cpfile_header) NILFS_MIN_BLOCKSIZE); + BUILD_BUG_ON(sizeof(struct nilfs_checkpoint) NILFS_MIN_BLOCKSIZE); + BUILD_BUG_ON(sizeof(struct nilfs_dat_entry) NILFS_MIN_BLOCKSIZE); + BUILD_BUG_ON(sizeof(struct nilfs_super_root) NILFS_MIN_BLOCKSIZE); } static unsigned long @@ -523,8 +525,6 @@ static void init_disk_layout(struct nilfs_disk_info *di, int fd, or shorten segments with -B option., dev_size, (unsigned long long)segment_size * min_nsegments); di-nseginfo = 0; - - nilfs_check_ondisk_sizes(); } static struct nilfs_segment_info *new_segment(struct nilfs_disk_info *di) -- 1.8.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mkfs: check sizes of important structs at build time
On Sat, Jan 4, 2014 at 11:52 PM, Vyacheslav Dubeyko sl...@dubeyko.com wrote: On Jan 4, 2014, at 4:29 PM, Hitoshi Mitake wrote: Current nilfs_check_ondisk_sizes() checks sizes of important structs at run time. The checking should be done at build time. This patch adds a new macro, BUILD_BUG_ON(), for this purpose. It is similar to static_assert() of C++11. If an argument is true, the macro causes a bulid error. Below is an example of BUILD_BUG_ON(). When the checked conditions are true like below: /* intentional change for testing BUILD_BUG_ON() */ static __attribute__((used)) void nilfs_check_ondisk_sizes(void) { BUILD_BUG_ON(sizeof(struct nilfs_inode) NILFS_MIN_BLOCKSIZE); So, why do we need to have function for the case of checking on compilation phase? Just for excluding the checking from other part of code and improve readability. I suppose that we need to have some run-time check anyway. Your approach is correct for the current state of the code. But I feel a necessity in run-time check anyway. Maybe it looks like a paranoia. :) Maybe it needs to extend checking in this place. Do you mean both of the build time check and the run time check? If so, I agree with your opinion. I'll send v2 based on this policy. Thanks, Hitoshi -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nilfs-utils 2/2] cleanerd: adjust the OOM killer
cleanerd is a very important process in a system which uses nilfs. So it should adjust the OOM killer for reducing possibility of the killing as much as possible. Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- sbin/cleanerd/cleanerd.c | 39 +++ 1 file changed, 39 insertions(+) diff --git a/sbin/cleanerd/cleanerd.c b/sbin/cleanerd/cleanerd.c index edfa083..3494a9a 100644 --- a/sbin/cleanerd/cleanerd.c +++ b/sbin/cleanerd/cleanerd.c @@ -654,6 +654,40 @@ nilfs_cleanerd_select_segments(struct nilfs_cleanerd *cleanerd, return nssegs; } +static int oom_adjust(void) +{ + int fd, err; + const char *path, *score; + struct stat st; + + /* Avoid oom-killer */ + path = /proc/self/oom_score_adj; + score = -1000\n; + + if (stat(path, st)) { + /* oom_score_adj cannot be used, try oom_adj */ + path = /proc/self/oom_adj; + score = -17\n; + } + + fd = open(path, O_WRONLY); + if (fd 0) { + fprintf(stderr, can't adjust oom-killer's pardon %s, %m\n, + path); + return errno; + } + + err = write(fd, score, strlen(score)); + if (err 0) { + fprintf(stderr, can't adjust oom-killer's pardon %s, %m\n, + path); + close(fd); + return errno; + } + close(fd); + return 0; +} + #define DEVNULL/dev/null #define ROOTDIR/ @@ -1549,6 +1583,11 @@ int main(int argc, char *argv[]) exit(1); } + if (oom_adjust() 0) { + fprintf(stderr, adjusting the OOM killer falied: %m\n); + exit(1); + } + openlog(progname, LOG_PID, LOG_DAEMON); syslog(LOG_INFO, start); -- 1.8.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH nilfs-utils 1/2] cleanerd: call _exit(2) twice for ensuring not being a session leader
Current daemonize() function of cleanerd call _exit(2) only once during its process of becoming a daemon process. But in the linux environment, a daemon process should call _exit(2) twice for ensuring not being a session leader. If a process don't do that, unexpected SIGHUP can be sent to the process (though it happens rarely). The signal would be confusing event for cleanerd of nilfs. This patch removes this potential problem. Signed-off-by: Hitoshi Mitake mitake.hito...@lab.ntt.co.jp --- sbin/cleanerd/cleanerd.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/sbin/cleanerd/cleanerd.c b/sbin/cleanerd/cleanerd.c index 26067bd..edfa083 100644 --- a/sbin/cleanerd/cleanerd.c +++ b/sbin/cleanerd/cleanerd.c @@ -676,6 +676,16 @@ static int daemonize(int nochdir, int noclose, int nofork) /* umask(0); */ + /* for ensuring I'm not a session leader */ + if (!nofork) { + pid = fork(); + if (pid 0) + return -1; + else if (pid != 0) + /* parent */ + _exit(0); + } + if (!nochdir (chdir(ROOTDIR) 0)) return -1; -- 1.8.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-nilfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html