I am using spark-1.6.0-bin-hadoop2.6. I am trying to write a python notebook
that reads a data frame from Cassandra.

I connect to cassadra using an ssh tunnel running on port 9043. CQLSH works
how ever I can not figure out how to configure my notebook. I have tried
various hacks any idea what I am doing wrong

: java.io.IOException: Failed to open native connection to Cassandra at
{192.168.1.126}:9042



Thanks in advance

Andy



$ extraPkgs="--packages com.databricks:spark-csv_2.11:1.3.0 \
            --packages datastax:spark-cassandra-connector:1.6.0-M1-s_2.11"

$ export PYSPARK_PYTHON=python3
$ export PYSPARK_DRIVER_PYTHON=python3
$ IPYTHON_OPTS=notebook $SPARK_ROOT/bin/pyspark $extraPkgs $*



In [15]:
1
sqlContext.setConf("spark.cassandra.connection.host”,”127.0.0.1:9043")
2
df = sqlContext.read\
3
    .format("org.apache.spark.sql.cassandra")\
4
    .options(table=“time_series", keyspace="notification")\
5
    .load()
6
​
7
df.printSchema()
8
df.show()
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
<ipython-input-15-9d8f6dcf210f> in <module>()
      1 
sqlContext.setConf("spark.cassandra.connection.host","localhost:9043")
----> 2 df = sqlContext.read    .format("org.apache.spark.sql.cassandra")
.options(table="kv", keyspace="notification")    .load()
      3 
      4 df.printSchema()
      5 df.show()

/Users/andrewdavidson/workSpace/spark/spark-1.6.0-bin-hadoop2.6/python/pyspa
rk/sql/readwriter.py in load(self, path, format, schema, **options)
    137                 return self._df(self._jreader.load(path))
    138         else:
--> 139             return self._df(self._jreader.load())
    140 
    141     @since(1.4)

/Users/andrewdavidson/workSpace/spark/spark-1.6.0-bin-hadoop2.6/python/lib/p
y4j-0.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
    811         answer = self.gateway_client.send_command(command)
    812         return_value = get_return_value(
--> 813             answer, self.gateway_client, self.target_id, self.name)
    814 
    815         for temp_arg in temp_args:

/Users/andrewdavidson/workSpace/spark/spark-1.6.0-bin-hadoop2.6/python/pyspa
rk/sql/utils.py in deco(*a, **kw)
     43     def deco(*a, **kw):
     44         try:
---> 45             return f(*a, **kw)
     46         except py4j.protocol.Py4JJavaError as e:
     47             s = e.java_exception.toString()

/Users/andrewdavidson/workSpace/spark/spark-1.6.0-bin-hadoop2.6/python/lib/p
y4j-0.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client,
target_id, name)
    306                 raise Py4JJavaError(
    307                     "An error occurred while calling {0}{1}{2}.\n".
--> 308                     format(target_id, ".", name), value)
    309             else:
    310                 raise Py4JError(

Py4JJavaError: An error occurred while calling o280.load.
: java.io.IOException: Failed to open native connection to Cassandra at
{192.168.1.126}:9042
        at 
com.datastax.spark.connector.cql.CassandraConnector$.com$datastax$spark$conn
ector$cql$CassandraConnector$$createSession(CassandraConnector.scala:162)
        at 
com.datastax.spark.connector.cql.CassandraConnector$$anonfun$2.apply(Cassand
raConnector.scala:148)
        at 
com.datastax.spark.connector.cql.CassandraConnector$$anonfun$2.apply(Cassand
raConnector.scala:148)
        at 
com.datastax.spark.connector.cql.RefCountedCache.createNewValueAndKeys(RefCo
untedCache.scala:31)
        at 
com.datastax.spark.connector.cql.RefCountedCache.acquire(RefCountedCache.sca
la:56)
        at 
com.datastax.spark.connector.cql.CassandraConnector.openSession(CassandraCon
nector.scala:81)
        at 
com.datastax.spark.connector.cql.CassandraConnector.withSessionDo(CassandraC
onnector.scala:109)
        at 
com.datastax.spark.connector.rdd.partitioner.CassandraRDDPartitioner$.getTok
enFactory(CassandraRDDPartitioner.scala:184)
        at 
org.apache.spark.sql.cassandra.CassandraSourceRelation$.apply(CassandraSourc
eRelation.scala:267)
        at 
org.apache.spark.sql.cassandra.DefaultSource.createRelation(DefaultSource.sc
ala:57)
        at 
org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(Resolve
dDataSource.scala:158)
        at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:119)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62
)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl
.java:43)
        at java.lang.reflect.Method.invoke(Method.java:497)
        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381)
        at py4j.Gateway.invoke(Gateway.java:259)
        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
        at py4j.commands.CallCommand.execute(CallCommand.java:79)
        at py4j.GatewayConnection.run(GatewayConnection.java:209)
        at java.lang.Thread.run(Thread.java:745)
Caused by: com.datastax.driver.core.exceptions.NoHostAvailableException: All
host(s) tried for query failed (tried: /192.168.1.126:9042
(com.datastax.driver.core.exceptions.TransportException: [/192.168.1.126]
Cannot connect))
        at 
com.datastax.driver.core.ControlConnection.reconnectInternal(ControlConnecti
on.java:231)
        at 
com.datastax.driver.core.ControlConnection.connect(ControlConnection.java:77
)
        at com.datastax.driver.core.Cluster$Manager.init(Cluster.java:1414)
        at com.datastax.driver.core.Cluster.getMetadata(Cluster.java:393)
        at 
com.datastax.spark.connector.cql.CassandraConnector$.com$datastax$spark$conn
ector$cql$CassandraConnector$$createSession(CassandraConnector.scala:155)
        ... 22 more



Reply via email to