[jira] [Commented] (SPARK-11085) Add support for HTTP proxy

2016-08-25 Thread SURESH CHAGANTI (JIRA)

[ 
https://issues.apache.org/jira/browse/SPARK-11085?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15437727#comment-15437727
 ] 

SURESH CHAGANTI commented on SPARK-11085:
-

Hi All,
I have made the code changes to accept the HTTP proxy as a run-time argument 
and use that  for out bound calls

below is the pull request:

https://github.com/SureshChaganti/spark-ec2/commit/cfd4bf727bdf46b9456f8f4d89221d1377d9c221


> Add support for HTTP proxy 
> ---
>
> Key: SPARK-11085
> URL: https://issues.apache.org/jira/browse/SPARK-11085
> Project: Spark
>  Issue Type: Improvement
>  Components: Spark Shell, Spark Submit
>Reporter: Dustin Cote
>Priority: Minor
>
> Add a way to update ivysettings.xml for the spark-shell and spark-submit to 
> support proxy settings for clusters that need to access a remote repository 
> through an http proxy.  Typically this would be done like:
> JAVA_OPTS="$JAVA_OPTS -Dhttp.proxyHost=proxy.host -Dhttp.proxyPort=8080 
> -Dhttps.proxyHost=proxy.host.secure -Dhttps.proxyPort=8080"
> Directly in the ivysettings.xml would look like:
>  
>  proxyport="8080" 
> nonproxyhosts="nonproxy.host"/> 
>  
> Even better would be a way to customize the ivysettings.xml with command 
> options.  



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org



[jira] [Issue Comment Deleted] (SPARK-11085) Add support for HTTP proxy

2016-08-25 Thread SURESH CHAGANTI (JIRA)

 [ 
https://issues.apache.org/jira/browse/SPARK-11085?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

SURESH CHAGANTI updated SPARK-11085:

Comment: was deleted

(was: The following Script accepts the  "--proxy_host_port" argument

from __future__ import division, print_function, with_statement

import codecs
import hashlib
import itertools
import logging
import os
import os.path
import pipes
import random
import shutil
import string
from stat import S_IRUSR
import subprocess
import sys
import tarfile
import tempfile
import textwrap
import time
import warnings
from datetime import datetime
from optparse import OptionParser
from sys import stderr

if sys.version < "3":
from urllib2 import urlopen, Request, HTTPError
else:
from urllib.request import urlopen, Request
from urllib.error import HTTPError
raw_input = input
xrange = range

SPARK_EC2_VERSION = "1.6.2"
SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__))

VALID_SPARK_VERSIONS = set([
"0.7.3",
"0.8.0",
"0.8.1",
"0.9.0",
"0.9.1",
"0.9.2",
"1.0.0",
"1.0.1",
"1.0.2",
"1.1.0",
"1.1.1",
"1.2.0",
"1.2.1",
"1.3.0",
"1.3.1",
"1.4.0",
"1.4.1",
"1.5.0",
"1.5.1",
"1.5.2",
"1.6.0",
"1.6.1",
"1.6.2",
])

SPARK_TACHYON_MAP = {
"1.0.0": "0.4.1",
"1.0.1": "0.4.1",
"1.0.2": "0.4.1",
"1.1.0": "0.5.0",
"1.1.1": "0.5.0",
"1.2.0": "0.5.0",
"1.2.1": "0.5.0",
"1.3.0": "0.5.0",
"1.3.1": "0.5.0",
"1.4.0": "0.6.4",
"1.4.1": "0.6.4",
"1.5.0": "0.7.1",
"1.5.1": "0.7.1",
"1.5.2": "0.7.1",
"1.6.0": "0.8.2",
"1.6.1": "0.8.2",
"1.6.2": "0.8.2",
}

DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION
DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark;

# Default location to get the spark-ec2 scripts (and ami-list) from
DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/amplab/spark-ec2;
DEFAULT_SPARK_EC2_BRANCH = "branch-1.6"


def setup_external_libs(libs):
"""
Download external libraries from PyPI to SPARK_EC2_DIR/lib/ and prepend 
them to our PATH.
"""
PYPI_URL_PREFIX = "https://pypi.python.org/packages/source;
SPARK_EC2_LIB_DIR = os.path.join(SPARK_EC2_DIR, "lib")

if not os.path.exists(SPARK_EC2_LIB_DIR):
print("Downloading external libraries that spark-ec2 needs from PyPI to 
{path}...".format(
path=SPARK_EC2_LIB_DIR
))
print("This should be a one-time operation.")
os.mkdir(SPARK_EC2_LIB_DIR)

for lib in libs:
versioned_lib_name = "{n}-{v}".format(n=lib["name"], v=lib["version"])
lib_dir = os.path.join(SPARK_EC2_LIB_DIR, versioned_lib_name)

if not os.path.isdir(lib_dir):
tgz_file_path = os.path.join(SPARK_EC2_LIB_DIR, versioned_lib_name 
+ ".tar.gz")
print(" - Downloading {lib}...".format(lib=lib["name"]))
download_stream = urlopen(

"{prefix}/{first_letter}/{lib_name}/{lib_name}-{lib_version}.tar.gz".format(
prefix=PYPI_URL_PREFIX,
first_letter=lib["name"][:1],
lib_name=lib["name"],
lib_version=lib["version"]
)
)
with open(tgz_file_path, "wb") as tgz_file:
tgz_file.write(download_stream.read())
with open(tgz_file_path, "rb") as tar:
if hashlib.md5(tar.read()).hexdigest() != lib["md5"]:
print("ERROR: Got wrong md5sum for 
{lib}.".format(lib=lib["name"]), file=stderr)
sys.exit(1)
tar = tarfile.open(tgz_file_path)
tar.extractall(path=SPARK_EC2_LIB_DIR)
tar.close()
os.remove(tgz_file_path)
print(" - Finished downloading {lib}.".format(lib=lib["name"]))
sys.path.insert(1, lib_dir)


# Only PyPI libraries are supported.
external_libs = [
{
"name": "boto",
"version": "2.34.0",
"md5": "5556223d2d0cc4d06dd4829e671dcecd"
}
]

setup_external_libs(external_libs)


import boto
from boto.ec2.blockdevicemapping import BlockDeviceMapping, BlockDeviceType, 
EBSBlockDeviceType
from boto import ec2


class UsageError(Exception):
pass


# Configure and parse our command-line arguments
def parse_args():
parser = OptionParser(
prog="spark-ec2",
version="%prog {v}".format(v=SPARK_EC2_VERSION),
usage="%prog [options]  \n\n"
+ " can be: launch, destroy, login, stop, start, get-master, 
reboot-slaves")

parser.add_option(
"-s", "--slaves", type="int", default=1,
help="Number of slaves to launch (default: %default)")
parser.add_option(
"-w", "--wait", type="int",
help="DEPRECATED (no longer necessary) - Seconds to wait for nodes to 
start")
parser.add_option(
"-k", "--key-pair",
help="Key pair to use on 

[jira] [Commented] (SPARK-11085) Add support for HTTP proxy

2016-08-25 Thread SURESH CHAGANTI (JIRA)

[ 
https://issues.apache.org/jira/browse/SPARK-11085?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15437505#comment-15437505
 ] 

SURESH CHAGANTI commented on SPARK-11085:
-

The following Script accepts the  "--proxy_host_port" argument

from __future__ import division, print_function, with_statement

import codecs
import hashlib
import itertools
import logging
import os
import os.path
import pipes
import random
import shutil
import string
from stat import S_IRUSR
import subprocess
import sys
import tarfile
import tempfile
import textwrap
import time
import warnings
from datetime import datetime
from optparse import OptionParser
from sys import stderr

if sys.version < "3":
from urllib2 import urlopen, Request, HTTPError
else:
from urllib.request import urlopen, Request
from urllib.error import HTTPError
raw_input = input
xrange = range

SPARK_EC2_VERSION = "1.6.2"
SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__))

VALID_SPARK_VERSIONS = set([
"0.7.3",
"0.8.0",
"0.8.1",
"0.9.0",
"0.9.1",
"0.9.2",
"1.0.0",
"1.0.1",
"1.0.2",
"1.1.0",
"1.1.1",
"1.2.0",
"1.2.1",
"1.3.0",
"1.3.1",
"1.4.0",
"1.4.1",
"1.5.0",
"1.5.1",
"1.5.2",
"1.6.0",
"1.6.1",
"1.6.2",
])

SPARK_TACHYON_MAP = {
"1.0.0": "0.4.1",
"1.0.1": "0.4.1",
"1.0.2": "0.4.1",
"1.1.0": "0.5.0",
"1.1.1": "0.5.0",
"1.2.0": "0.5.0",
"1.2.1": "0.5.0",
"1.3.0": "0.5.0",
"1.3.1": "0.5.0",
"1.4.0": "0.6.4",
"1.4.1": "0.6.4",
"1.5.0": "0.7.1",
"1.5.1": "0.7.1",
"1.5.2": "0.7.1",
"1.6.0": "0.8.2",
"1.6.1": "0.8.2",
"1.6.2": "0.8.2",
}

DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION
DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark;

# Default location to get the spark-ec2 scripts (and ami-list) from
DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/amplab/spark-ec2;
DEFAULT_SPARK_EC2_BRANCH = "branch-1.6"


def setup_external_libs(libs):
"""
Download external libraries from PyPI to SPARK_EC2_DIR/lib/ and prepend 
them to our PATH.
"""
PYPI_URL_PREFIX = "https://pypi.python.org/packages/source;
SPARK_EC2_LIB_DIR = os.path.join(SPARK_EC2_DIR, "lib")

if not os.path.exists(SPARK_EC2_LIB_DIR):
print("Downloading external libraries that spark-ec2 needs from PyPI to 
{path}...".format(
path=SPARK_EC2_LIB_DIR
))
print("This should be a one-time operation.")
os.mkdir(SPARK_EC2_LIB_DIR)

for lib in libs:
versioned_lib_name = "{n}-{v}".format(n=lib["name"], v=lib["version"])
lib_dir = os.path.join(SPARK_EC2_LIB_DIR, versioned_lib_name)

if not os.path.isdir(lib_dir):
tgz_file_path = os.path.join(SPARK_EC2_LIB_DIR, versioned_lib_name 
+ ".tar.gz")
print(" - Downloading {lib}...".format(lib=lib["name"]))
download_stream = urlopen(

"{prefix}/{first_letter}/{lib_name}/{lib_name}-{lib_version}.tar.gz".format(
prefix=PYPI_URL_PREFIX,
first_letter=lib["name"][:1],
lib_name=lib["name"],
lib_version=lib["version"]
)
)
with open(tgz_file_path, "wb") as tgz_file:
tgz_file.write(download_stream.read())
with open(tgz_file_path, "rb") as tar:
if hashlib.md5(tar.read()).hexdigest() != lib["md5"]:
print("ERROR: Got wrong md5sum for 
{lib}.".format(lib=lib["name"]), file=stderr)
sys.exit(1)
tar = tarfile.open(tgz_file_path)
tar.extractall(path=SPARK_EC2_LIB_DIR)
tar.close()
os.remove(tgz_file_path)
print(" - Finished downloading {lib}.".format(lib=lib["name"]))
sys.path.insert(1, lib_dir)


# Only PyPI libraries are supported.
external_libs = [
{
"name": "boto",
"version": "2.34.0",
"md5": "5556223d2d0cc4d06dd4829e671dcecd"
}
]

setup_external_libs(external_libs)


import boto
from boto.ec2.blockdevicemapping import BlockDeviceMapping, BlockDeviceType, 
EBSBlockDeviceType
from boto import ec2


class UsageError(Exception):
pass


# Configure and parse our command-line arguments
def parse_args():
parser = OptionParser(
prog="spark-ec2",
version="%prog {v}".format(v=SPARK_EC2_VERSION),
usage="%prog [options]  \n\n"
+ " can be: launch, destroy, login, stop, start, get-master, 
reboot-slaves")

parser.add_option(
"-s", "--slaves", type="int", default=1,
help="Number of slaves to launch (default: %default)")
parser.add_option(
"-w", "--wait", type="int",
help="DEPRECATED (no longer necessary) - Seconds to wait for nodes to 
start")
parser.add_option(
"-k", "--key-pair",
help="Key pair to use