[ https://issues.apache.org/jira/browse/SPARK-15404?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
郭同 updated SPARK-15404: ----------------------- Description: import os import sys from pyspark import SparkContext from pyspark.sql import SQLContext from pyspark.sql.types import Row, StructField, StructType, StringType, IntegerType if __name__ == "__main__": sc = SparkContext(appName="PythonSQL") sqlContext = SQLContext(sc) schema = StructType([StructField("person_name", StringType(), False), StructField("person_age", IntegerType(), False)]) some_rdd = sc.parallelize([Row(person_name="John", person_age=19), Row(person_name="Smith", person_age=23), Row(person_name="Sarah", person_age=18)]) some_df = sqlContext.createDataFrame(some_rdd, schema) some_df.printSchema() some_df.registerAsTable("people") teenagers = sqlContext.sql("SELECT * FROM people ") for each in teenagers.collect(): print(each) sc.stop() was: from __future__ import print_function import os import sys from pyspark import SparkContext from pyspark.sql import SQLContext from pyspark.sql.types import Row, StructField, StructType, StringType, IntegerType if __name__ == "__main__": sc = SparkContext(appName="PythonSQL") sqlContext = SQLContext(sc) schema = StructType([StructField("person_name", StringType(), False), StructField("person_age", IntegerType(), False)]) some_rdd = sc.parallelize([Row(person_name="John", person_age=19), Row(person_name="Smith", person_age=23), Row(person_name="Sarah", person_age=18)]) some_df = sqlContext.createDataFrame(some_rdd, schema) some_df.printSchema() some_df.registerAsTable("people") teenagers = sqlContext.sql("SELECT * FROM people ") for each in teenagers.collect(): print(each) sc.stop() > pyspark sql bug ,here is the testcase > ------------------------------------- > > Key: SPARK-15404 > URL: https://issues.apache.org/jira/browse/SPARK-15404 > Project: Spark > Issue Type: Bug > Environment: 1.6 > Reporter: 郭同 > > import os > import sys > from pyspark import SparkContext > from pyspark.sql import SQLContext > from pyspark.sql.types import Row, StructField, StructType, StringType, > IntegerType > if __name__ == "__main__": > sc = SparkContext(appName="PythonSQL") > sqlContext = SQLContext(sc) > schema = StructType([StructField("person_name", StringType(), False), > StructField("person_age", IntegerType(), False)]) > some_rdd = sc.parallelize([Row(person_name="John", person_age=19), > Row(person_name="Smith", person_age=23), > Row(person_name="Sarah", person_age=18)]) > some_df = sqlContext.createDataFrame(some_rdd, schema) > some_df.printSchema() > some_df.registerAsTable("people") > teenagers = sqlContext.sql("SELECT * FROM people ") > for each in teenagers.collect(): > print(each) > sc.stop() -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org