Devin Boyer created SPARK-34100:
-----------------------------------

             Summary: pyspark 2.4 packages can't be installed via pip on Amazon 
Linux 2
                 Key: SPARK-34100
                 URL: https://issues.apache.org/jira/browse/SPARK-34100
             Project: Spark
          Issue Type: Bug
          Components: Deploy, PySpark
    Affects Versions: 2.4.7
         Environment: Amazon Linux 2, with Python 3.7.9 and pip 9.0.3 (also 
tested with pip 20.3.3), using Docker or EMR 5.32.0

 

Example Dockerfile to reproduce:

{{FROM amazonlinux:2}}
{{RUN yum install -y python3}}
{{RUN pip3 install pyspark==2.4.7}}

 
            Reporter: Devin Boyer


I'm unable to install the pyspark Python package on Amazon Linux 2, whether in 
a Docker image or an EMR cluster. Amazon Linux 2 currently ships with Python 
3.7 and pip 9.0.3, but upgrading pip yields the same result.

 

When installing the package, the installation will fail with the error 
"ValueError: bad marshal data (unknown type code)". Full example stack below.

 

This bug prevents use of pyspark for simple testing environments, and from 
using tools where the pyspark package is a dependency, like 
[https://github.com/awslabs/python-deequ.]

 

Stack Trace:

{{Step 3/3 : RUN pip3 install pyspark==2.4.7}}
{{ ---> Running in 2c6e1c1de62f}}
{{WARNING: Running pip install with root privileges is generally not a good 
idea. Try `pip3 install --user` instead.}}
{{Collecting pyspark==2.4.7}}
{{ Downloading 
https://files.pythonhosted.org/packages/e2/06/29f80e5a464033432eedf89924e7aa6ebbc47ce4dcd956853a73627f2c07/pyspark-2.4.7.tar.gz
 (217.9MB)}}
{{ Complete output from command python setup.py egg_info:}}
{{ Could not import pypandoc - required to package PySpark}}
{{ /usr/lib64/python3.7/distutils/dist.py:274: UserWarning: Unknown 
distribution option: 'long_description_content_type'}}
{{ warnings.warn(msg)}}
{{ zip_safe flag not set; analyzing archive contents...}}
{{ Traceback (most recent call last):}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/sandbox.py", line 154, in 
save_modules}}
{{ yield saved}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/sandbox.py", line 195, in 
setup_context}}
{{ yield}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/sandbox.py", line 250, in 
run_setup}}
{{ _execfile(setup_script, ns)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/sandbox.py", line 45, in 
_execfile}}
{{ exec(code, globals, locals)}}
{{ File "/tmp/easy_install-l742j64w/pypandoc-1.5/setup.py", line 111, in 
<module>}}
{{ # using Python imports instead which will be resolved correctly.}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/__init__.py", line 129, in 
setup}}
{{ return distutils.core.setup(**attrs)}}
{{ File "/usr/lib64/python3.7/distutils/core.py", line 148, in setup}}
{{ dist.run_commands()}}
{{ File "/usr/lib64/python3.7/distutils/dist.py", line 966, in run_commands}}
{{ self.run_command(cmd)}}
{{ File "/usr/lib64/python3.7/distutils/dist.py", line 985, in run_command}}
{{ cmd_obj.run()}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/command/bdist_egg.py", 
line 218, in run}}
{{ os.path.join(archive_root, 'EGG-INFO'), self.zip_safe()}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/command/bdist_egg.py", 
line 269, in zip_safe}}
{{ return analyze_egg(self.bdist_dir, self.stubs)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/command/bdist_egg.py", 
line 379, in analyze_egg}}
{{ safe = scan_module(egg_dir, base, name, stubs) and safe}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/command/bdist_egg.py", 
line 416, in scan_module}}
{{ code = marshal.load(f)}}
{{ ValueError: bad marshal data (unknown type code)}}{{During handling of the 
above exception, another exception occurred:}}{{Traceback (most recent call 
last):}}
{{ File "<string>", line 1, in <module>}}
{{ File "/tmp/pip-build-j3d56a0n/pyspark/setup.py", line 224, in <module>}}
{{ 'Programming Language :: Python :: Implementation :: PyPy']}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/__init__.py", line 128, in 
setup}}
{{ _install_setup_requires(attrs)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/__init__.py", line 123, in 
_install_setup_requires}}
{{ dist.fetch_build_eggs(dist.setup_requires)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/dist.py", line 461, in 
fetch_build_eggs}}
{{ replace_conflicting=True,}}
{{ File "/usr/lib/python3.7/site-packages/pkg_resources/__init__.py", line 866, 
in resolve}}
{{ replace_conflicting=replace_conflicting}}
{{ File "/usr/lib/python3.7/site-packages/pkg_resources/__init__.py", line 
1146, in best_match}}
{{ return self.obtain(req, installer)}}
{{ File "/usr/lib/python3.7/site-packages/pkg_resources/__init__.py", line 
1158, in obtain}}
{{ return installer(requirement)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/dist.py", line 528, in 
fetch_build_egg}}
{{ return cmd.easy_install(req)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/command/easy_install.py", 
line 672, in easy_install}}
{{ return self.install_item(spec, dist.location, tmpdir, deps)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/command/easy_install.py", 
line 698, in install_item}}
{{ dists = self.install_eggs(spec, download, tmpdir)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/command/easy_install.py", 
line 881, in install_eggs}}
{{ return self.build_and_install(setup_script, setup_base)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/command/easy_install.py", 
line 1149, in build_and_install}}
{{ self.run_setup(setup_script, setup_base, args)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/command/easy_install.py", 
line 1135, in run_setup}}
{{ run_setup(setup_script, args)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/sandbox.py", line 253, in 
run_setup}}
{{ raise}}
{{ File "/usr/lib64/python3.7/contextlib.py", line 130, in __exit__}}
{{ self.gen.throw(type, value, traceback)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/sandbox.py", line 195, in 
setup_context}}
{{ yield}}
{{ File "/usr/lib64/python3.7/contextlib.py", line 130, in __exit__}}
{{ self.gen.throw(type, value, traceback)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/sandbox.py", line 166, in 
save_modules}}
{{ saved_exc.resume()}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/sandbox.py", line 141, in 
resume}}
{{ six.reraise(type, exc, self._tb)}}
{{ File "/usr/lib/python3.7/site-packages/pkg_resources/_vendor/six.py", line 
685, in reraise}}
{{ raise value.with_traceback(tb)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/sandbox.py", line 154, in 
save_modules}}
{{ yield saved}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/sandbox.py", line 195, in 
setup_context}}
{{ yield}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/sandbox.py", line 250, in 
run_setup}}
{{ _execfile(setup_script, ns)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/sandbox.py", line 45, in 
_execfile}}
{{ exec(code, globals, locals)}}
{{ File "/tmp/easy_install-l742j64w/pypandoc-1.5/setup.py", line 111, in 
<module>}}
{{ # using Python imports instead which will be resolved correctly.}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/__init__.py", line 129, in 
setup}}
{{ return distutils.core.setup(**attrs)}}
{{ File "/usr/lib64/python3.7/distutils/core.py", line 148, in setup}}
{{ dist.run_commands()}}
{{ File "/usr/lib64/python3.7/distutils/dist.py", line 966, in run_commands}}
{{ self.run_command(cmd)}}
{{ File "/usr/lib64/python3.7/distutils/dist.py", line 985, in run_command}}
{{ cmd_obj.run()}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/command/bdist_egg.py", 
line 218, in run}}
{{ os.path.join(archive_root, 'EGG-INFO'), self.zip_safe()}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/command/bdist_egg.py", 
line 269, in zip_safe}}
{{ return analyze_egg(self.bdist_dir, self.stubs)}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/command/bdist_egg.py", 
line 379, in analyze_egg}}
{{ safe = scan_module(egg_dir, base, name, stubs) and safe}}
{{ File "/usr/lib/python3.7/site-packages/setuptools/command/bdist_egg.py", 
line 416, in scan_module}}
{{ code = marshal.load(f)}}
{{ ValueError: bad marshal data (unknown type code)}}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to