[jira] [Commented] (SPARK-11085) Add support for HTTP proxy
[ https://issues.apache.org/jira/browse/SPARK-11085?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15437727#comment-15437727 ] SURESH CHAGANTI commented on SPARK-11085: - Hi All, I have made the code changes to accept the HTTP proxy as a run-time argument and use that for out bound calls below is the pull request: https://github.com/SureshChaganti/spark-ec2/commit/cfd4bf727bdf46b9456f8f4d89221d1377d9c221 > Add support for HTTP proxy > --- > > Key: SPARK-11085 > URL: https://issues.apache.org/jira/browse/SPARK-11085 > Project: Spark > Issue Type: Improvement > Components: Spark Shell, Spark Submit >Reporter: Dustin Cote >Priority: Minor > > Add a way to update ivysettings.xml for the spark-shell and spark-submit to > support proxy settings for clusters that need to access a remote repository > through an http proxy. Typically this would be done like: > JAVA_OPTS="$JAVA_OPTS -Dhttp.proxyHost=proxy.host -Dhttp.proxyPort=8080 > -Dhttps.proxyHost=proxy.host.secure -Dhttps.proxyPort=8080" > Directly in the ivysettings.xml would look like: > > proxyport="8080" > nonproxyhosts="nonproxy.host"/> > > Even better would be a way to customize the ivysettings.xml with command > options. -- This message was sent by Atlassian JIRA (v6.3.4#6332) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Issue Comment Deleted] (SPARK-11085) Add support for HTTP proxy
[ https://issues.apache.org/jira/browse/SPARK-11085?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] SURESH CHAGANTI updated SPARK-11085: Comment: was deleted (was: The following Script accepts the "--proxy_host_port" argument from __future__ import division, print_function, with_statement import codecs import hashlib import itertools import logging import os import os.path import pipes import random import shutil import string from stat import S_IRUSR import subprocess import sys import tarfile import tempfile import textwrap import time import warnings from datetime import datetime from optparse import OptionParser from sys import stderr if sys.version < "3": from urllib2 import urlopen, Request, HTTPError else: from urllib.request import urlopen, Request from urllib.error import HTTPError raw_input = input xrange = range SPARK_EC2_VERSION = "1.6.2" SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__)) VALID_SPARK_VERSIONS = set([ "0.7.3", "0.8.0", "0.8.1", "0.9.0", "0.9.1", "0.9.2", "1.0.0", "1.0.1", "1.0.2", "1.1.0", "1.1.1", "1.2.0", "1.2.1", "1.3.0", "1.3.1", "1.4.0", "1.4.1", "1.5.0", "1.5.1", "1.5.2", "1.6.0", "1.6.1", "1.6.2", ]) SPARK_TACHYON_MAP = { "1.0.0": "0.4.1", "1.0.1": "0.4.1", "1.0.2": "0.4.1", "1.1.0": "0.5.0", "1.1.1": "0.5.0", "1.2.0": "0.5.0", "1.2.1": "0.5.0", "1.3.0": "0.5.0", "1.3.1": "0.5.0", "1.4.0": "0.6.4", "1.4.1": "0.6.4", "1.5.0": "0.7.1", "1.5.1": "0.7.1", "1.5.2": "0.7.1", "1.6.0": "0.8.2", "1.6.1": "0.8.2", "1.6.2": "0.8.2", } DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark; # Default location to get the spark-ec2 scripts (and ami-list) from DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/amplab/spark-ec2; DEFAULT_SPARK_EC2_BRANCH = "branch-1.6" def setup_external_libs(libs): """ Download external libraries from PyPI to SPARK_EC2_DIR/lib/ and prepend them to our PATH. """ PYPI_URL_PREFIX = "https://pypi.python.org/packages/source; SPARK_EC2_LIB_DIR = os.path.join(SPARK_EC2_DIR, "lib") if not os.path.exists(SPARK_EC2_LIB_DIR): print("Downloading external libraries that spark-ec2 needs from PyPI to {path}...".format( path=SPARK_EC2_LIB_DIR )) print("This should be a one-time operation.") os.mkdir(SPARK_EC2_LIB_DIR) for lib in libs: versioned_lib_name = "{n}-{v}".format(n=lib["name"], v=lib["version"]) lib_dir = os.path.join(SPARK_EC2_LIB_DIR, versioned_lib_name) if not os.path.isdir(lib_dir): tgz_file_path = os.path.join(SPARK_EC2_LIB_DIR, versioned_lib_name + ".tar.gz") print(" - Downloading {lib}...".format(lib=lib["name"])) download_stream = urlopen( "{prefix}/{first_letter}/{lib_name}/{lib_name}-{lib_version}.tar.gz".format( prefix=PYPI_URL_PREFIX, first_letter=lib["name"][:1], lib_name=lib["name"], lib_version=lib["version"] ) ) with open(tgz_file_path, "wb") as tgz_file: tgz_file.write(download_stream.read()) with open(tgz_file_path, "rb") as tar: if hashlib.md5(tar.read()).hexdigest() != lib["md5"]: print("ERROR: Got wrong md5sum for {lib}.".format(lib=lib["name"]), file=stderr) sys.exit(1) tar = tarfile.open(tgz_file_path) tar.extractall(path=SPARK_EC2_LIB_DIR) tar.close() os.remove(tgz_file_path) print(" - Finished downloading {lib}.".format(lib=lib["name"])) sys.path.insert(1, lib_dir) # Only PyPI libraries are supported. external_libs = [ { "name": "boto", "version": "2.34.0", "md5": "5556223d2d0cc4d06dd4829e671dcecd" } ] setup_external_libs(external_libs) import boto from boto.ec2.blockdevicemapping import BlockDeviceMapping, BlockDeviceType, EBSBlockDeviceType from boto import ec2 class UsageError(Exception): pass # Configure and parse our command-line arguments def parse_args(): parser = OptionParser( prog="spark-ec2", version="%prog {v}".format(v=SPARK_EC2_VERSION), usage="%prog [options] \n\n" + " can be: launch, destroy, login, stop, start, get-master, reboot-slaves") parser.add_option( "-s", "--slaves", type="int", default=1, help="Number of slaves to launch (default: %default)") parser.add_option( "-w", "--wait", type="int", help="DEPRECATED (no longer necessary) - Seconds to wait for nodes to start") parser.add_option( "-k", "--key-pair", help="Key pair to use on
[jira] [Commented] (SPARK-11085) Add support for HTTP proxy
[ https://issues.apache.org/jira/browse/SPARK-11085?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15437505#comment-15437505 ] SURESH CHAGANTI commented on SPARK-11085: - The following Script accepts the "--proxy_host_port" argument from __future__ import division, print_function, with_statement import codecs import hashlib import itertools import logging import os import os.path import pipes import random import shutil import string from stat import S_IRUSR import subprocess import sys import tarfile import tempfile import textwrap import time import warnings from datetime import datetime from optparse import OptionParser from sys import stderr if sys.version < "3": from urllib2 import urlopen, Request, HTTPError else: from urllib.request import urlopen, Request from urllib.error import HTTPError raw_input = input xrange = range SPARK_EC2_VERSION = "1.6.2" SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__)) VALID_SPARK_VERSIONS = set([ "0.7.3", "0.8.0", "0.8.1", "0.9.0", "0.9.1", "0.9.2", "1.0.0", "1.0.1", "1.0.2", "1.1.0", "1.1.1", "1.2.0", "1.2.1", "1.3.0", "1.3.1", "1.4.0", "1.4.1", "1.5.0", "1.5.1", "1.5.2", "1.6.0", "1.6.1", "1.6.2", ]) SPARK_TACHYON_MAP = { "1.0.0": "0.4.1", "1.0.1": "0.4.1", "1.0.2": "0.4.1", "1.1.0": "0.5.0", "1.1.1": "0.5.0", "1.2.0": "0.5.0", "1.2.1": "0.5.0", "1.3.0": "0.5.0", "1.3.1": "0.5.0", "1.4.0": "0.6.4", "1.4.1": "0.6.4", "1.5.0": "0.7.1", "1.5.1": "0.7.1", "1.5.2": "0.7.1", "1.6.0": "0.8.2", "1.6.1": "0.8.2", "1.6.2": "0.8.2", } DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark; # Default location to get the spark-ec2 scripts (and ami-list) from DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/amplab/spark-ec2; DEFAULT_SPARK_EC2_BRANCH = "branch-1.6" def setup_external_libs(libs): """ Download external libraries from PyPI to SPARK_EC2_DIR/lib/ and prepend them to our PATH. """ PYPI_URL_PREFIX = "https://pypi.python.org/packages/source; SPARK_EC2_LIB_DIR = os.path.join(SPARK_EC2_DIR, "lib") if not os.path.exists(SPARK_EC2_LIB_DIR): print("Downloading external libraries that spark-ec2 needs from PyPI to {path}...".format( path=SPARK_EC2_LIB_DIR )) print("This should be a one-time operation.") os.mkdir(SPARK_EC2_LIB_DIR) for lib in libs: versioned_lib_name = "{n}-{v}".format(n=lib["name"], v=lib["version"]) lib_dir = os.path.join(SPARK_EC2_LIB_DIR, versioned_lib_name) if not os.path.isdir(lib_dir): tgz_file_path = os.path.join(SPARK_EC2_LIB_DIR, versioned_lib_name + ".tar.gz") print(" - Downloading {lib}...".format(lib=lib["name"])) download_stream = urlopen( "{prefix}/{first_letter}/{lib_name}/{lib_name}-{lib_version}.tar.gz".format( prefix=PYPI_URL_PREFIX, first_letter=lib["name"][:1], lib_name=lib["name"], lib_version=lib["version"] ) ) with open(tgz_file_path, "wb") as tgz_file: tgz_file.write(download_stream.read()) with open(tgz_file_path, "rb") as tar: if hashlib.md5(tar.read()).hexdigest() != lib["md5"]: print("ERROR: Got wrong md5sum for {lib}.".format(lib=lib["name"]), file=stderr) sys.exit(1) tar = tarfile.open(tgz_file_path) tar.extractall(path=SPARK_EC2_LIB_DIR) tar.close() os.remove(tgz_file_path) print(" - Finished downloading {lib}.".format(lib=lib["name"])) sys.path.insert(1, lib_dir) # Only PyPI libraries are supported. external_libs = [ { "name": "boto", "version": "2.34.0", "md5": "5556223d2d0cc4d06dd4829e671dcecd" } ] setup_external_libs(external_libs) import boto from boto.ec2.blockdevicemapping import BlockDeviceMapping, BlockDeviceType, EBSBlockDeviceType from boto import ec2 class UsageError(Exception): pass # Configure and parse our command-line arguments def parse_args(): parser = OptionParser( prog="spark-ec2", version="%prog {v}".format(v=SPARK_EC2_VERSION), usage="%prog [options] \n\n" + " can be: launch, destroy, login, stop, start, get-master, reboot-slaves") parser.add_option( "-s", "--slaves", type="int", default=1, help="Number of slaves to launch (default: %default)") parser.add_option( "-w", "--wait", type="int", help="DEPRECATED (no longer necessary) - Seconds to wait for nodes to start") parser.add_option( "-k", "--key-pair", help="Key pair to use