Repository: beam
Updated Branches:
  refs/heads/master 44472c76c -> 07274bbfe


Fix Python Dataflow default job name


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/021e2a07
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/021e2a07
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/021e2a07

Branch: refs/heads/master
Commit: 021e2a075df4832cb43e678d203fb7c56711032b
Parents: 44472c7
Author: Pablo <pabl...@google.com>
Authored: Fri Mar 24 14:31:33 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Mon Mar 27 15:15:16 2017 -0700

----------------------------------------------------------------------
 .../runners/dataflow/internal/apiclient.py      | 21 ++++++++++++++++----
 .../runners/dataflow/internal/apiclient_test.py | 12 ++++++++++-
 2 files changed, 28 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/021e2a07/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py 
b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
index e980b14..f7daed0 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
@@ -285,12 +285,25 @@ class Job(object):
         indent=2, sort_keys=True)
 
   @staticmethod
+  def _build_default_job_name(user_name):
+    """Generates a default name for a job.
+
+    user_name is lowercased, and any characters outside of [-a-z0-9]
+    are removed. If necessary, the user_name is truncated to shorten
+    the job name to 63 characters."""
+    user_name = re.sub('[^-a-z0-9]', '', user_name.lower())
+    date_component = datetime.utcnow().strftime('%m%d%H%M%S-%f')
+    app_user_name = 'beamapp-{}'.format(user_name)
+    job_name = '{}-{}'.format(app_user_name, date_component)
+    if len(job_name) > 63:
+      job_name = '{}-{}'.format(app_user_name[:-(len(job_name) - 63)],
+                                date_component)
+    return job_name
+
+  @staticmethod
   def default_job_name(job_name):
     if job_name is None:
-      user_name = getpass.getuser().lower()
-      date_component = datetime.utcnow().strftime('%m%d%H%M%S-%f')
-      app_name = 'beamapp'
-      job_name = '{}-{}-{}'.format(app_name, user_name, date_component)
+      job_name = Job._build_default_job_name(getpass.getuser())
     return job_name
 
   def __init__(self, options):

http://git-wip-us.apache.org/repos/asf/beam/blob/021e2a07/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
----------------------------------------------------------------------
diff --git 
a/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py 
b/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
index d60c7a5..e9aaacb 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 #
 """Unit tests for the apiclient module."""
-
 import unittest
 
 from mock import Mock
@@ -45,6 +44,17 @@ class UtilTest(unittest.TestCase):
         pipeline_options,
         DataflowRunner.BATCH_ENVIRONMENT_MAJOR_VERSION)
 
+  def test_invalid_default_job_name(self):
+    # Regexp for job names in dataflow.
+    regexp = '^[a-z]([-a-z0-9]{0,61}[a-z0-9])?$'
+
+    job_name = apiclient.Job._build_default_job_name('invalid.-_user_n*/ame')
+    self.assertRegexpMatches(job_name, regexp)
+
+    job_name = apiclient.Job._build_default_job_name(
+        'invalid-extremely-long.username_that_shouldbeshortened_or_is_invalid')
+    self.assertRegexpMatches(job_name, regexp)
+
   def test_default_job_name(self):
     job_name = apiclient.Job.default_job_name(None)
     regexp = 'beamapp-.*-[0-9]{10}-[0-9]{6}'

Reply via email to