Abhi Basu Wed, 14 Sep 2016 08:05:49 -0700
%pyspark input_file = "hdfs:////tmp/filenname.gz"
raw_rdd = sc.textFile(input_file)