Polber commented on code in PR #28335:
URL: https://github.com/apache/beam/pull/28335#discussion_r1319076514


##########
sdks/python/apache_beam/yaml/yaml_provider.py:
##########
@@ -473,23 +490,57 @@ def __init__(self, packages, base_python=sys.executable):
     self._packages = packages
     self._base_python = base_python
 
-  def _key(self):
-    return json.dumps({'binary': self._base_python, 'packages': 
self._packages})
+  @classmethod
+  def _key(cls, base_python, packages):
+    return json.dumps({
+        'binary': base_python, 'packages': sorted(packages)
+    },
+                      sort_keys=True)
 
-  def _venv(self):
-    venv = os.path.join(
-        self.VENV_CACHE,
-        hashlib.sha256(self._key().encode('utf-8')).hexdigest())
+  @classmethod
+  def _path(cls, base_python, packages):
+    return os.path.join(
+        cls.VENV_CACHE,
+        hashlib.sha256(cls._key(base_python,
+                                packages).encode('utf-8')).hexdigest())
+
+  @classmethod
+  def _create_venv_from_scratch(cls, base_python, packages):
+    venv = cls._path(base_python, packages)
     if not os.path.exists(venv):
-      python_binary = os.path.join(venv, 'bin', 'python')
-      subprocess.run([self._base_python, '-m', 'venv', venv], check=True)
-      subprocess.run([python_binary, '-m', 'ensurepip'], check=True)
-      subprocess.run([python_binary, '-m', 'pip', 'install'] + self._packages,
+      subprocess.run([base_python, '-m', 'venv', venv], check=True)
+      venv_python = os.path.join(venv, 'bin', 'python')
+      subprocess.run([venv_python, '-m', 'ensurepip'], check=True)
+      subprocess.run([venv_python, '-m', 'pip', 'install'] + packages,
                      check=True)
       with open(venv + '-requirements.txt', 'w') as fout:
-        fout.write('\n'.join(self._packages))
+        fout.write('\n'.join(packages))
     return venv

Review Comment:
   Is there a way to copy the local beam installation into the venv? The `pip 
install apache_beam` step is slow, so skipping this step when there are PyPi 
transforms would speedup startup time of the expansion service.
   
   Also, if it is going to be installed using `pip`, does it not need the 
`[gcp]` option?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to