From: Rémi Palancher <[email protected]> Homogenize memory management after error detection in _parse_options() with exit_code and breaks only. Always delay memory free of structure file_opts at the end of the function when exit_code is checked. This avoids double free() and segfaults with certain errors. For instance, with a line0 with 2 options w/o '=':
# cat slurmdbd.dump Cluster - test:Fairshare=1:QOS='normal' Parent - root Account - name1:name2:Description='none':Organization='none':Fairshare=1 # sacctmgr load slurmdbd.dump For cluster test Bad format on name2: End your option with an '=' sign Segmentation fault Here is the backtrace according to gdb (with slurm 2.5.7 but code did not change on relevant parts except line numbers): Program received signal SIGSEGV, Segmentation fault. __pthread_mutex_lock (mutex=0xd8) at pthread_mutex_lock.c:50 50 pthread_mutex_lock.c: No such file or directory. (gdb) bt #0 __pthread_mutex_lock (mutex=0xd8) at pthread_mutex_lock.c:50 #1 0x000000000046716a in list_destroy (l=0xb0) at list.c:306 #2 0x0000000000438bd0 in _destroy_sacctmgr_file_opts (object=0x79b3d8) at file_functions.c:227 #3 0x0000000000439bc1 in _parse_options (options=0x7ffffff7e98a "name1:name2:Description='none':Organization='none':Fairshare=1\n") at file_functions.c:542 #4 0x000000000043e5d1 in load_sacctmgr_cfg_file (argc=1, argv=0x78f0b0) at file_functions.c:2240 #5 0x0000000000440b81 in _process_command (argc=2, argv=0x78f0a8) at sacctmgr.c:395 #6 0x0000000000440649 in main (argc=3, argv=0x7fffffffec88) at sacctmgr.c:217 After this commit, the result is better: # sacctmgr load slurmdbd.dump For cluster test Bad format on name2: End your option with an '=' sign Problem with line(3) Problem with requests: Unspecified error All these _destroy() calls removed should not leak memory. Here are the valgrind summary as a proof: Before: # valgrind sacctmgr load slurmdbd.dump ... ==21552== Invalid free() / delete / delete[] ==21552== at 0x4C240FD: free (vg_replace_malloc.c:366) ==21552== by 0x463A27: slurm_xfree (xmalloc.c:270) ==21552== by 0x438BF9: _destroy_sacctmgr_file_opts (file_functions.c:230) ==21552== by 0x439C05: _parse_options (file_functions.c:548) ==21552== by 0x43E5F5: load_sacctmgr_cfg_file (file_functions.c:2243) ==21552== by 0x440BA4: _process_command (sacctmgr.c:395) ==21552== by 0x44066C: main (sacctmgr.c:217) ==21552== Address 0x5cd8360 is 0 bytes inside a block of size 168 free'd ==21552== at 0x4C240FD: free (vg_replace_malloc.c:366) ==21552== by 0x463A27: slurm_xfree (xmalloc.c:270) ==21552== by 0x438BF9: _destroy_sacctmgr_file_opts (file_functions.c:230) ==21552== by 0x438E5D: _parse_options (file_functions.c:291) ==21552== by 0x43E5F5: load_sacctmgr_cfg_file (file_functions.c:2243) ==21552== by 0x440BA4: _process_command (sacctmgr.c:395) ==21552== by 0x44066C: main (sacctmgr.c:217) ==21552== Problem with line(3) Problem with requests: Unspecified error ==21552== ==21552== HEAP SUMMARY: ==21552== in use at exit: 66,471 bytes in 819 blocks ==21552== total heap usage: 3,819 allocs, 3,001 frees, 363,156 bytes allocated ==21552== ==21552== LEAK SUMMARY: ==21552== definitely lost: 60 bytes in 1 blocks ==21552== indirectly lost: 240 bytes in 10 blocks ==21552== possibly lost: 35,300 bytes in 516 blocks ==21552== still reachable: 30,871 bytes in 292 blocks ==21552== suppressed: 0 bytes in 0 blocks ==21552== Rerun with --leak-check=full to see details of leaked memory ==21552== ==21552== For counts of detected and suppressed errors, rerun with: -v ==21552== ERROR SUMMARY: 11 errors from 11 contexts (suppressed: 4 from 4) After: # valgrind sacctmgr load slurmdbd.dump ==26228== Memcheck, a memory error detector ==26228== Copyright (C) 2002-2010, and GNU GPL'd, by Julian Seward et al. ==26228== Using Valgrind-3.6.0.SVN-Debian and LibVEX; rerun with -h for copyright info ==26228== Command: sacctmgr load slurmdbd.dump ==26228== For cluster test Bad format on name2: End your option with an '=' sign Problem with line(3) Problem with requests: Unspecified error ==26228== ==26228== HEAP SUMMARY: ==26228== in use at exit: 66,471 bytes in 819 blocks ==26228== total heap usage: 3,819 allocs, 3,000 frees, 363,156 bytes allocated ==26228== ==26228== LEAK SUMMARY: ==26228== definitely lost: 60 bytes in 1 blocks ==26228== indirectly lost: 240 bytes in 10 blocks ==26228== possibly lost: 35,300 bytes in 516 blocks ==26228== still reachable: 30,871 bytes in 292 blocks ==26228== suppressed: 0 bytes in 0 blocks ==26228== Rerun with --leak-check=full to see details of leaked memory ==26228== ==26228== For counts of detected and suppressed errors, rerun with: -v ==26228== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 4 from 4) Exact same numbers before and after. Signed-off-by: Rémi Palancher <[email protected]> --- src/sacctmgr/file_functions.c | 18 ++---------------- 1 files changed, 2 insertions(+), 16 deletions(-) diff --git a/src/sacctmgr/file_functions.c b/src/sacctmgr/file_functions.c index 7359dd1..249e89c 100644 --- a/src/sacctmgr/file_functions.c +++ b/src/sacctmgr/file_functions.c @@ -287,7 +287,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) fprintf(stderr, " Bad format on %s: " "End your option with " "an '=' sign\n", sub); - _destroy_sacctmgr_file_opts(file_opts); break; } file_opts->name = xstrdup(option); @@ -320,12 +319,12 @@ static sacctmgr_file_opts_t *_parse_options(char *options) g_qos_list, option); if (file_opts->def_qos_id == NO_VAL) { + exit_code=1; fprintf(stderr, "You gave a bad qos '%s'. " "Use 'list qos' to get " "complete list.\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "DefaultWCKey", @@ -347,7 +346,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) exit_code=1; fprintf(stderr, " Bad FairShare value: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "GrpCPUMins", @@ -357,7 +355,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) exit_code=1; fprintf(stderr, " Bad GrpCPUMins value: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "GrpCPUs", MAX(command_len, 7))) { @@ -366,7 +363,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) exit_code=1; fprintf(stderr, " Bad GrpCPUs value: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "GrpJobs", MAX(command_len, 4))) { @@ -375,7 +371,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) exit_code=1; fprintf(stderr, " Bad GrpJobs value: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "GrpMemory", @@ -385,7 +380,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) exit_code=1; fprintf(stderr, " Bad GrpMemory value: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "GrpNodes", @@ -395,7 +389,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) exit_code=1; fprintf(stderr, " Bad GrpNodes value: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "GrpSubmitJobs", @@ -405,7 +398,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) exit_code=1; fprintf(stderr, " Bad GrpJobs value: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "GrpWall", MAX(command_len, 4))) { @@ -420,7 +412,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) fprintf(stderr, " Bad GrpWall time format: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "MaxCPUMinsPerJob", @@ -432,7 +423,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) exit_code=1; fprintf(stderr, " Bad MaxCPUMins value: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "MaxCPUsPerJob", @@ -442,7 +432,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) exit_code=1; fprintf(stderr, " Bad MaxCPUs value: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "MaxJobs", MAX(command_len, 4))) { @@ -451,7 +440,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) exit_code=1; fprintf(stderr, " Bad MaxJobs value: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "MaxNodesPerJob", @@ -461,7 +449,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) exit_code=1; fprintf(stderr, " Bad MaxNodes value: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "MaxSubmitJobs", @@ -471,7 +458,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) exit_code=1; fprintf(stderr, " Bad MaxJobs value: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "MaxWallDurationPerJob", @@ -487,7 +473,6 @@ static sacctmgr_file_opts_t *_parse_options(char *options) fprintf(stderr, " Bad MaxWall time format: %s\n", option); - _destroy_sacctmgr_file_opts(file_opts); break; } } else if (!strncasecmp (sub, "Organization", @@ -521,6 +506,7 @@ static sacctmgr_file_opts_t *_parse_options(char *options) } else { exit_code=1; fprintf(stderr, " Unknown option: %s\n", sub); + break; } xfree(sub); -- 1.7.2.5
