EBernhardson has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/386320 )
Change subject: Delete training output dir if empty on exception ...................................................................... Delete training output dir if empty on exception It's a bit annoying to have to manually delete this directory if some error happens when running the first training. Delete it when an exception is thrown and it's still empty. Change-Id: I1e425fc7b809a8c5e7cdb6ce2f0a1c8744a2c7b5 --- M mjolnir/cli/training_pipeline.py 1 file changed, 20 insertions(+), 11 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/search/MjoLniR refs/changes/20/386320/1 diff --git a/mjolnir/cli/training_pipeline.py b/mjolnir/cli/training_pipeline.py index dc32bd7..0e9ae75 100644 --- a/mjolnir/cli/training_pipeline.py +++ b/mjolnir/cli/training_pipeline.py @@ -11,6 +11,7 @@ from __future__ import absolute_import import argparse +import glob import logging import mjolnir.training.xgboost import os @@ -23,16 +24,6 @@ def main(sc, sqlContext, input_dir, output_dir, wikis, initial_num_trees, final_num_trees, num_workers, num_cv_jobs, num_folds, test_dir, zero_features): - - if os.path.exists(output_dir): - logging.error('Output directory (%s) already exists' % (output_dir)) - sys.exit(1) - - # Maybe this is a bit early to create the path ... but should be fine. - # The annoyance might be that an error in training requires deleting - # this directory to try again. - os.mkdir(output_dir) - for wiki in wikis: print 'Training wiki: %s' % (wiki) df_hits_with_features = ( @@ -174,4 +165,22 @@ sc = SparkContext(appName="MLR: training pipeline") sc.setLogLevel('WARN') sqlContext = HiveContext(sc) - main(sc, sqlContext, **args) + + output_dir = args['output_dir'] + if os.path.exists(output_dir): + logging.error('Output directory (%s) already exists' % (output_dir)) + sys.exit(1) + + # Maybe this is a bit early to create the path ... but should be fine. + # The annoyance might be that an error in training requires deleting + # this directory to try again. + os.mkdir(output_dir) + + try: + main(sc, sqlContext, **args) + except: + # If the directory we created is still empty delete it + # so it doesn't need to be manually re-created + if not len(glob.glob(os.path.join(output_dir, '*'))): + os.rmdir(output_dir) + raise -- To view, visit https://gerrit.wikimedia.org/r/386320 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I1e425fc7b809a8c5e7cdb6ce2f0a1c8744a2c7b5 Gerrit-PatchSet: 1 Gerrit-Project: search/MjoLniR Gerrit-Branch: master Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits