[jira] [Updated] (SPARK-9970) SQLContext.createDataFrame failed to properly determine column names

2015-08-18 Thread JIRA

 [ 
https://issues.apache.org/jira/browse/SPARK-9970?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Maciej Bryński updated SPARK-9970:
--
Description: 
Hi,
I'm trying to do nested join of tables.
After first join everything is ok, but second join made some of the column 
names lost.

My code is following:

{code:java}
def joinTable(tableLeft, tableRight, columnLeft, columnRight, columnNested, 
joinType = left_outer):
tmpTable = sqlCtx.createDataFrame(tableRight.rdd.groupBy(lambda r: 
r.asDict()[columnRight]))
tmpTable = tmpTable.select(tmpTable._1.alias(joinColumn), 
tmpTable._2.data.alias(columnNested))
return tableLeft.join(tmpTable, tableLeft[columnLeft] == 
tmpTable[joinColumn], joinType).drop(joinColumn)


user = sqlContext.read.json(user.json)
user.printSchema()
root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)

order =  sqlContext.read.json(order.json)
order.printSchema();
root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)

lines =  sqlContext.read.json(lines.json)
lines.printSchema();
root
 |-- id: long (nullable = true)
 |-- orderid: long (nullable = true)
 |-- product: string (nullable = true)
{code}

And joining code:
Please look for the result of 2nd join. There are columns _1,_2,_3. Should be 
'id', 'orderid', 'product'

{code:java}
orders = joinTable(order, lines, id, orderid, lines)
orders.printSchema()

root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)
 |-- lines: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- orderid: long (nullable = true)
 |||-- product: string (nullable = true)

clients = joinTable(user, orders, id, userid, orders)
clients.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- orders: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- price: double (nullable = true)
 |||-- userid: long (nullable = true)
 |||-- lines: array (nullable = true)
 ||||-- element: struct (containsNull = true)
 |||||-- _1: long (nullable = true)
 |||||-- _2: long (nullable = true)
 |||||-- _3: string (nullable = true)
{code}

I tried to check if groupBy isn't the cause of the problem but it looks right
{code:java}
grouped =  orders.rdd.groupBy(lambda r: r.userid)
print grouped.map(lambda x: list(x[1])).collect()

[[Row(id=1, price=202.3, userid=1, lines=[Row(id=1, orderid=1, product=u'XXX'), 
Row(id=2, orderid=1, product=u'YYY')]), Row(id=2, price=343.99, userid=1, 
lines=[Row(id=3, orderid=2, product=u'XXX')])], [Row(id=3, price=399.99, 
userid=2, lines=[Row(id=4, orderid=3, product=u'XXX')])]]
{code}

So I assume that the problem is with createDataFrame.

  was:
Hi,
I'm trying to do nested join of tables.
After first join everything is ok, but second join made some of the column 
names lost.

My code is following:

{code:java}
def joinTable(tableLeft, tableRight, columnLeft, columnRight, columnNested, 
joinType = left_outer):
tmpTable = sqlCtx.createDataFrame(tableRight.rdd.groupBy(lambda r: 
r.asDict()[columnRight]))
tmpTable = tmpTable.select(tmpTable._1.alias(joinColumn), 
tmpTable._2.data.alias(columnNested))
return tableLeft.join(tmpTable, tableLeft[columnLeft] == 
tmpTable[joinColumn], joinType).drop(joinColumn)


user = sqlContext.read.json(path + user.json)
user.printSchema()
root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)

order =  sqlContext.read.json(path + order.json)
order.printSchema();
root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)

lines =  sqlContext.read.json(path + lines.json)
lines.printSchema();
root
 |-- id: long (nullable = true)
 |-- orderid: long (nullable = true)
 |-- product: string (nullable = true)
{code}

And joining code:
Please look for the result of 2nd join. There are columns _1,_2,_3. Should be 
'id', 'orderid', 'product'

{code:java}
orders = joinTable(order, lines, id, orderid, lines)
orders.printSchema()

root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)
 |-- lines: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- orderid: long (nullable = true)
 |||-- product: string (nullable = true)

clients = joinTable(user, orders, id, userid, orders)
clients.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- orders: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- price: double (nullable = true)
 |||-- 

[jira] [Updated] (SPARK-9970) SQLContext.createDataFrame failed to properly determine column names

2015-08-14 Thread JIRA

 [ 
https://issues.apache.org/jira/browse/SPARK-9970?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Maciej Bryński updated SPARK-9970:
--
Description: 
Hi,
I'm trying to do nested join of tables.
After first join everything is ok, but second join made some of the column 
names lost.

My code is following:

{code:java}
def joinTable(tableLeft, tableRight, columnLeft, columnRight, columnNested, 
joinType = left_outer):
tmpTable = sqlCtx.createDataFrame(tableRight.rdd.groupBy(lambda r: 
r.asDict()[columnRight]))
tmpTable = tmpTable.select(tmpTable._1.alias(joinColumn), 
tmpTable._2.data.alias(columnNested))
return tableLeft.join(tmpTable, tableLeft[columnLeft] == 
tmpTable[joinColumn], joinType).drop(joinColumn)


user = sqlContext.read.json(path + user.json)
user.printSchema()
root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)

order =  sqlContext.read.json(path + order.json)
order.printSchema();
root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)

lines =  sqlContext.read.json(path + lines.json)
lines.printSchema();
root
 |-- id: long (nullable = true)
 |-- orderid: long (nullable = true)
 |-- product: string (nullable = true)
{code}

And joining code:

{code:java}
orders = joinTable(order, lines, id, orderid, lines)
orders.printSchema()

root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)
 |-- lines: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- orderid: long (nullable = true)
 |||-- product: string (nullable = true)

clients = joinTable(user, orders, id, userid, orders)
clients.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- orders: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- price: double (nullable = true)
 |||-- userid: long (nullable = true)
 |||-- lines: array (nullable = true)
 ||||-- element: struct (containsNull = true)
 |||||-- _1: long (nullable = true)
 |||||-- _2: long (nullable = true)
 |||||-- _3: string (nullable = true)
{code}

I tried to check if groupBy isn't the root of the problem but it's looks right
{code:java}
grouped =  orders.rdd.groupBy(lambda r: r.userid)
print grouped.map(lambda x: list(x[1])).collect()

[[Row(id=1, price=202.3, userid=1, lines=[Row(id=1, orderid=1, product=u'XXX'), 
Row(id=2, orderid=1, product=u'YYY')]), Row(id=2, price=343.99, userid=1, 
lines=[Row(id=3, orderid=2, product=u'XXX')])], [Row(id=3, price=399.99, 
userid=2, lines=[Row(id=4, orderid=3, product=u'XXX')])]]
{code}

So I assume that the problem is with createDataFrame.

  was:
Hi,
I'm trying to do nested join of tables.
After first join everything is ok, but second join made some of the column 
names lost.

My code is following:
def joinTable(tableLeft, tableRight, columnLeft, columnRight, columnNested, 
joinType = left_outer):
print Joining tables %s %s % (namestr(tableLeft), namestr(tableRight))
sys.stdout.flush()
tmpTable = sqlCtx.createDataFrame(tableRight.rdd.groupBy(lambda r: 
r.asDict()[columnRight]))
tmpTable = tmpTable.select(tmpTable._1.alias(joinColumn), 
tmpTable._2.data.alias(columnNested))
return tableLeft.join(tmpTable, tableLeft[columnLeft] == 
tmpTable[joinColumn], joinType).drop(joinColumn)

user = sqlContext.read.json(path + user.json)
user.printSchema()
root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)

order =  sqlContext.read.json(path + order.json)
order.printSchema();
root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)

lines =  sqlContext.read.json(path + lines.json)
lines.printSchema();
root
 |-- id: long (nullable = true)
 |-- orderid: long (nullable = true)
 |-- product: string (nullable = true)

orders = joinTable(order, lines, id, orderid, lines)
orders.printSchema()

root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)
 |-- lines: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- orderid: long (nullable = true)
 |||-- product: string (nullable = true)

clients = joinTable(user, orders, id, userid, orders)
clients.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- orders: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- price: double (nullable = true)
 |||-- userid: long (nullable = true)
 |||-- lines: array (nullable = true)
 ||||-- element: struct (containsNull = true)
 ||

[jira] [Updated] (SPARK-9970) SQLContext.createDataFrame failed to properly determine column names

2015-08-14 Thread JIRA

 [ 
https://issues.apache.org/jira/browse/SPARK-9970?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Maciej Bryński updated SPARK-9970:
--
Description: 
Hi,
I'm trying to do nested join of tables.
After first join everything is ok, but second join made some of the column 
names lost.

My code is following:

{code:java}
def joinTable(tableLeft, tableRight, columnLeft, columnRight, columnNested, 
joinType = left_outer):
tmpTable = sqlCtx.createDataFrame(tableRight.rdd.groupBy(lambda r: 
r.asDict()[columnRight]))
tmpTable = tmpTable.select(tmpTable._1.alias(joinColumn), 
tmpTable._2.data.alias(columnNested))
return tableLeft.join(tmpTable, tableLeft[columnLeft] == 
tmpTable[joinColumn], joinType).drop(joinColumn)


user = sqlContext.read.json(path + user.json)
user.printSchema()
root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)

order =  sqlContext.read.json(path + order.json)
order.printSchema();
root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)

lines =  sqlContext.read.json(path + lines.json)
lines.printSchema();
root
 |-- id: long (nullable = true)
 |-- orderid: long (nullable = true)
 |-- product: string (nullable = true)
{code}

And joining code:
Please look for the result of 2nd join. There are columns _1,_2,_3. Should be 
'id', 'orderid', 'product'

{code:java}
orders = joinTable(order, lines, id, orderid, lines)
orders.printSchema()

root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)
 |-- lines: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- orderid: long (nullable = true)
 |||-- product: string (nullable = true)

clients = joinTable(user, orders, id, userid, orders)
clients.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- orders: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- price: double (nullable = true)
 |||-- userid: long (nullable = true)
 |||-- lines: array (nullable = true)
 ||||-- element: struct (containsNull = true)
 |||||-- _1: long (nullable = true)
 |||||-- _2: long (nullable = true)
 |||||-- _3: string (nullable = true)
{code}

I tried to check if groupBy isn't the cause of the problem but it looks right
{code:java}
grouped =  orders.rdd.groupBy(lambda r: r.userid)
print grouped.map(lambda x: list(x[1])).collect()

[[Row(id=1, price=202.3, userid=1, lines=[Row(id=1, orderid=1, product=u'XXX'), 
Row(id=2, orderid=1, product=u'YYY')]), Row(id=2, price=343.99, userid=1, 
lines=[Row(id=3, orderid=2, product=u'XXX')])], [Row(id=3, price=399.99, 
userid=2, lines=[Row(id=4, orderid=3, product=u'XXX')])]]
{code}

So I assume that the problem is with createDataFrame.

  was:
Hi,
I'm trying to do nested join of tables.
After first join everything is ok, but second join made some of the column 
names lost.

My code is following:

{code:java}
def joinTable(tableLeft, tableRight, columnLeft, columnRight, columnNested, 
joinType = left_outer):
tmpTable = sqlCtx.createDataFrame(tableRight.rdd.groupBy(lambda r: 
r.asDict()[columnRight]))
tmpTable = tmpTable.select(tmpTable._1.alias(joinColumn), 
tmpTable._2.data.alias(columnNested))
return tableLeft.join(tmpTable, tableLeft[columnLeft] == 
tmpTable[joinColumn], joinType).drop(joinColumn)


user = sqlContext.read.json(path + user.json)
user.printSchema()
root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)

order =  sqlContext.read.json(path + order.json)
order.printSchema();
root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)

lines =  sqlContext.read.json(path + lines.json)
lines.printSchema();
root
 |-- id: long (nullable = true)
 |-- orderid: long (nullable = true)
 |-- product: string (nullable = true)
{code}

And joining code:
Please look for the result of 2nd join. There are columns _1,_2,_3. Should be 
'id', 'orderid', 'product'

{code:java}
orders = joinTable(order, lines, id, orderid, lines)
orders.printSchema()

root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)
 |-- lines: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- orderid: long (nullable = true)
 |||-- product: string (nullable = true)

clients = joinTable(user, orders, id, userid, orders)
clients.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- orders: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- price: double (nullable = 

[jira] [Updated] (SPARK-9970) SQLContext.createDataFrame failed to properly determine column names

2015-08-14 Thread JIRA

 [ 
https://issues.apache.org/jira/browse/SPARK-9970?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Maciej Bryński updated SPARK-9970:
--
Description: 
Hi,
I'm trying to do nested join of tables.
After first join everything is ok, but second join made some of the column 
names lost.

My code is following:

{code:java}
def joinTable(tableLeft, tableRight, columnLeft, columnRight, columnNested, 
joinType = left_outer):
tmpTable = sqlCtx.createDataFrame(tableRight.rdd.groupBy(lambda r: 
r.asDict()[columnRight]))
tmpTable = tmpTable.select(tmpTable._1.alias(joinColumn), 
tmpTable._2.data.alias(columnNested))
return tableLeft.join(tmpTable, tableLeft[columnLeft] == 
tmpTable[joinColumn], joinType).drop(joinColumn)


user = sqlContext.read.json(path + user.json)
user.printSchema()
root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)

order =  sqlContext.read.json(path + order.json)
order.printSchema();
root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)

lines =  sqlContext.read.json(path + lines.json)
lines.printSchema();
root
 |-- id: long (nullable = true)
 |-- orderid: long (nullable = true)
 |-- product: string (nullable = true)
{code}

And joining code:
Please look for the result of 2nd join. There are columns _1,_2,_3. Should be 
'id', 'orderid', 'product'

{code:java}
orders = joinTable(order, lines, id, orderid, lines)
orders.printSchema()

root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)
 |-- lines: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- orderid: long (nullable = true)
 |||-- product: string (nullable = true)

clients = joinTable(user, orders, id, userid, orders)
clients.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- orders: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- price: double (nullable = true)
 |||-- userid: long (nullable = true)
 |||-- lines: array (nullable = true)
 ||||-- element: struct (containsNull = true)
 |||||-- _1: long (nullable = true)
 |||||-- _2: long (nullable = true)
 |||||-- _3: string (nullable = true)
{code}

I tried to check if groupBy isn't the root of the problem but it's looks right
{code:java}
grouped =  orders.rdd.groupBy(lambda r: r.userid)
print grouped.map(lambda x: list(x[1])).collect()

[[Row(id=1, price=202.3, userid=1, lines=[Row(id=1, orderid=1, product=u'XXX'), 
Row(id=2, orderid=1, product=u'YYY')]), Row(id=2, price=343.99, userid=1, 
lines=[Row(id=3, orderid=2, product=u'XXX')])], [Row(id=3, price=399.99, 
userid=2, lines=[Row(id=4, orderid=3, product=u'XXX')])]]
{code}

So I assume that the problem is with createDataFrame.

  was:
Hi,
I'm trying to do nested join of tables.
After first join everything is ok, but second join made some of the column 
names lost.

My code is following:

{code:java}
def joinTable(tableLeft, tableRight, columnLeft, columnRight, columnNested, 
joinType = left_outer):
tmpTable = sqlCtx.createDataFrame(tableRight.rdd.groupBy(lambda r: 
r.asDict()[columnRight]))
tmpTable = tmpTable.select(tmpTable._1.alias(joinColumn), 
tmpTable._2.data.alias(columnNested))
return tableLeft.join(tmpTable, tableLeft[columnLeft] == 
tmpTable[joinColumn], joinType).drop(joinColumn)


user = sqlContext.read.json(path + user.json)
user.printSchema()
root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)

order =  sqlContext.read.json(path + order.json)
order.printSchema();
root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)

lines =  sqlContext.read.json(path + lines.json)
lines.printSchema();
root
 |-- id: long (nullable = true)
 |-- orderid: long (nullable = true)
 |-- product: string (nullable = true)
{code}

And joining code:

{code:java}
orders = joinTable(order, lines, id, orderid, lines)
orders.printSchema()

root
 |-- id: long (nullable = true)
 |-- price: double (nullable = true)
 |-- userid: long (nullable = true)
 |-- lines: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- orderid: long (nullable = true)
 |||-- product: string (nullable = true)

clients = joinTable(user, orders, id, userid, orders)
clients.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- orders: array (nullable = true)
 ||-- element: struct (containsNull = true)
 |||-- id: long (nullable = true)
 |||-- price: double (nullable = true)
 |||-- userid: long (nullable = true)
 |||-- lines: array (nullable = true)
 ||  

[jira] [Updated] (SPARK-9970) SQLContext.createDataFrame failed to properly determine column names

2015-08-14 Thread JIRA

 [ 
https://issues.apache.org/jira/browse/SPARK-9970?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Maciej Bryński updated SPARK-9970:
--
Priority: Major  (was: Minor)

 SQLContext.createDataFrame failed to properly determine column names
 

 Key: SPARK-9970
 URL: https://issues.apache.org/jira/browse/SPARK-9970
 Project: Spark
  Issue Type: Bug
  Components: PySpark
Affects Versions: 1.4.0
Reporter: Maciej Bryński

 Hi,
 I'm trying to do nested join of tables.
 After first join everything is ok, but second join made some of the column 
 names lost.
 My code is following:
 {code:java}
 def joinTable(tableLeft, tableRight, columnLeft, columnRight, columnNested, 
 joinType = left_outer):
 tmpTable = sqlCtx.createDataFrame(tableRight.rdd.groupBy(lambda r: 
 r.asDict()[columnRight]))
 tmpTable = tmpTable.select(tmpTable._1.alias(joinColumn), 
 tmpTable._2.data.alias(columnNested))
 return tableLeft.join(tmpTable, tableLeft[columnLeft] == 
 tmpTable[joinColumn], joinType).drop(joinColumn)
 user = sqlContext.read.json(path + user.json)
 user.printSchema()
 root
  |-- id: long (nullable = true)
  |-- name: string (nullable = true)
 order =  sqlContext.read.json(path + order.json)
 order.printSchema();
 root
  |-- id: long (nullable = true)
  |-- price: double (nullable = true)
  |-- userid: long (nullable = true)
 lines =  sqlContext.read.json(path + lines.json)
 lines.printSchema();
 root
  |-- id: long (nullable = true)
  |-- orderid: long (nullable = true)
  |-- product: string (nullable = true)
 {code}
 And joining code:
 Please look for the result of 2nd join. There are columns _1,_2,_3. Should be 
 'id', 'orderid', 'product'
 {code:java}
 orders = joinTable(order, lines, id, orderid, lines)
 orders.printSchema()
 root
  |-- id: long (nullable = true)
  |-- price: double (nullable = true)
  |-- userid: long (nullable = true)
  |-- lines: array (nullable = true)
  ||-- element: struct (containsNull = true)
  |||-- id: long (nullable = true)
  |||-- orderid: long (nullable = true)
  |||-- product: string (nullable = true)
 clients = joinTable(user, orders, id, userid, orders)
 clients.printSchema()
 root
  |-- id: long (nullable = true)
  |-- name: string (nullable = true)
  |-- orders: array (nullable = true)
  ||-- element: struct (containsNull = true)
  |||-- id: long (nullable = true)
  |||-- price: double (nullable = true)
  |||-- userid: long (nullable = true)
  |||-- lines: array (nullable = true)
  ||||-- element: struct (containsNull = true)
  |||||-- _1: long (nullable = true)
  |||||-- _2: long (nullable = true)
  |||||-- _3: string (nullable = true)
 {code}
 I tried to check if groupBy isn't the cause of the problem but it looks right
 {code:java}
 grouped =  orders.rdd.groupBy(lambda r: r.userid)
 print grouped.map(lambda x: list(x[1])).collect()
 [[Row(id=1, price=202.3, userid=1, lines=[Row(id=1, orderid=1, 
 product=u'XXX'), Row(id=2, orderid=1, product=u'YYY')]), Row(id=2, 
 price=343.99, userid=1, lines=[Row(id=3, orderid=2, product=u'XXX')])], 
 [Row(id=3, price=399.99, userid=2, lines=[Row(id=4, orderid=3, 
 product=u'XXX')])]]
 {code}
 So I assume that the problem is with createDataFrame.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

-
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org