Repository: beam Updated Branches: refs/heads/master 44472c76c -> 07274bbfe
Fix Python Dataflow default job name Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/021e2a07 Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/021e2a07 Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/021e2a07 Branch: refs/heads/master Commit: 021e2a075df4832cb43e678d203fb7c56711032b Parents: 44472c7 Author: Pablo <pabl...@google.com> Authored: Fri Mar 24 14:31:33 2017 -0700 Committer: Ahmet Altay <al...@google.com> Committed: Mon Mar 27 15:15:16 2017 -0700 ---------------------------------------------------------------------- .../runners/dataflow/internal/apiclient.py | 21 ++++++++++++++++---- .../runners/dataflow/internal/apiclient_test.py | 12 ++++++++++- 2 files changed, 28 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/021e2a07/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py index e980b14..f7daed0 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py @@ -285,12 +285,25 @@ class Job(object): indent=2, sort_keys=True) @staticmethod + def _build_default_job_name(user_name): + """Generates a default name for a job. + + user_name is lowercased, and any characters outside of [-a-z0-9] + are removed. If necessary, the user_name is truncated to shorten + the job name to 63 characters.""" + user_name = re.sub('[^-a-z0-9]', '', user_name.lower()) + date_component = datetime.utcnow().strftime('%m%d%H%M%S-%f') + app_user_name = 'beamapp-{}'.format(user_name) + job_name = '{}-{}'.format(app_user_name, date_component) + if len(job_name) > 63: + job_name = '{}-{}'.format(app_user_name[:-(len(job_name) - 63)], + date_component) + return job_name + + @staticmethod def default_job_name(job_name): if job_name is None: - user_name = getpass.getuser().lower() - date_component = datetime.utcnow().strftime('%m%d%H%M%S-%f') - app_name = 'beamapp' - job_name = '{}-{}-{}'.format(app_name, user_name, date_component) + job_name = Job._build_default_job_name(getpass.getuser()) return job_name def __init__(self, options): http://git-wip-us.apache.org/repos/asf/beam/blob/021e2a07/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py b/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py index d60c7a5..e9aaacb 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py @@ -15,7 +15,6 @@ # limitations under the License. # """Unit tests for the apiclient module.""" - import unittest from mock import Mock @@ -45,6 +44,17 @@ class UtilTest(unittest.TestCase): pipeline_options, DataflowRunner.BATCH_ENVIRONMENT_MAJOR_VERSION) + def test_invalid_default_job_name(self): + # Regexp for job names in dataflow. + regexp = '^[a-z]([-a-z0-9]{0,61}[a-z0-9])?$' + + job_name = apiclient.Job._build_default_job_name('invalid.-_user_n*/ame') + self.assertRegexpMatches(job_name, regexp) + + job_name = apiclient.Job._build_default_job_name( + 'invalid-extremely-long.username_that_shouldbeshortened_or_is_invalid') + self.assertRegexpMatches(job_name, regexp) + def test_default_job_name(self): job_name = apiclient.Job.default_job_name(None) regexp = 'beamapp-.*-[0-9]{10}-[0-9]{6}'