schema is exactly the same, not sure why it is failing though.

root
 |-- booking_id: integer (nullable = true)
 |-- booking_rooms_room_category_id: integer (nullable = true)
 |-- booking_rooms_room_id: integer (nullable = true)
 |-- booking_source: integer (nullable = true)
 |-- booking_status: integer (nullable = true)
 |-- cancellation_reason: integer (nullable = true)
 |-- checkin: string (nullable = true)
 |-- checkout: string (nullable = true)
 |-- city_id: integer (nullable = true)
 |-- cluster_id: integer (nullable = true)
 |-- company_id: integer (nullable = true)
 |-- created_at: string (nullable = true)
 |-- discount: integer (nullable = true)
 |-- feedback_created_at: string (nullable = true)
 |-- feedback_id: integer (nullable = true)
 |-- hotel_id: integer (nullable = true)
 |-- hub_id: integer (nullable = true)
 |-- month: integer (nullable = true)
 |-- no_show_reason: integer (nullable = true)
 |-- oyo_rooms: integer (nullable = true)
 |-- selling_amount: integer (nullable = true)
 |-- shifting: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- id: integer (nullable = true)
 |    |    |-- booking_id: integer (nullable = true)
 |    |    |-- shifting_status: integer (nullable = true)
 |    |    |-- shifting_reason: integer (nullable = true)
 |    |    |-- shifting_metadata: integer (nullable = true)
 |-- suggest_oyo: integer (nullable = true)
 |-- tickets: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- ticket_source: integer (nullable = true)
 |    |    |-- ticket_status: string (nullable = true)
 |    |    |-- ticket_instance_source: integer (nullable = true)
 |    |    |-- ticket_category: string (nullable = true)
 |-- updated_at: timestamp (nullable = true)
 |-- year: integer (nullable = true)
 |-- zone_id: integer (nullable = true)

root
 |-- booking_id: integer (nullable = true)
 |-- booking_rooms_room_category_id: integer (nullable = true)
 |-- booking_rooms_room_id: integer (nullable = true)
 |-- booking_source: integer (nullable = true)
 |-- booking_status: integer (nullable = true)
 |-- cancellation_reason: integer (nullable = true)
 |-- checkin: string (nullable = true)
 |-- checkout: string (nullable = true)
 |-- city_id: integer (nullable = true)
 |-- cluster_id: integer (nullable = true)
 |-- company_id: integer (nullable = true)
 |-- created_at: string (nullable = true)
 |-- discount: integer (nullable = true)
 |-- feedback_created_at: string (nullable = true)
 |-- feedback_id: integer (nullable = true)
 |-- hotel_id: integer (nullable = true)
 |-- hub_id: integer (nullable = true)
 |-- month: integer (nullable = true)
 |-- no_show_reason: integer (nullable = true)
 |-- oyo_rooms: integer (nullable = true)
 |-- selling_amount: integer (nullable = true)
 |-- shifting: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- id: integer (nullable = true)
 |    |    |-- booking_id: integer (nullable = true)
 |    |    |-- shifting_status: integer (nullable = true)
 |    |    |-- shifting_reason: integer (nullable = true)
 |    |    |-- shifting_metadata: integer (nullable = true)
 |-- suggest_oyo: integer (nullable = true)
 |-- tickets: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- ticket_source: integer (nullable = true)
 |    |    |-- ticket_status: string (nullable = true)
 |    |    |-- ticket_instance_source: integer (nullable = true)
 |    |    |-- ticket_category: string (nullable = true)
 |-- updated_at: timestamp (nullable = false)
 |-- year: integer (nullable = true)
 |-- zone_id: integer (nullable = true)

On Sun, Jun 3, 2018 at 8:05 PM, Alessandro Solimando <
alessandro.solima...@gmail.com> wrote:

> Hi Pranav,
> I don´t have an answer to your issue, but what I generally do in this
> cases is to first try to simplify it to a point where it is easier to check
> what´s going on, and then adding back ¨pieces¨ one by one until I spot the
> error.
>
> In your case I can suggest to:
>
> 1) project the dataset to the problematic column only (column 21 from your
> log)
> 2) use explode function to have one element of the array per line
> 3) flatten the struct
>
> At each step use printSchema() to double check if the types are as you
> expect them to be, and if they are the same for both datasets.
>
> Best regards,
> Alessandro
>
> On 2 June 2018 at 19:48, Pranav Agrawal <pranav.mn...@gmail.com> wrote:
>
>> can't get around this error when performing union of two datasets
>> (ds1.union(ds2)) having complex data type (struct, list),
>>
>>
>> *18/06/02 15:12:00 INFO ApplicationMaster: Final app status: FAILED,
>> exitCode: 15, (reason: User class threw exception:
>> org.apache.spark.sql.AnalysisException: Union can only be performed on
>> tables with the compatible column types.
>> array<struct<id:int,booking_id:int,shifting_status:int,shifting_reason:int,shifting_metadata:string>>
>> <>
>> array<struct<id:int,booking_id:int,shifting_status:int,shifting_reason:int,shifting_metadata:string>>
>> at the 21th column of the second table;;*
>> As far as I can tell, they are the same. What am I doing wrong? Any help
>> / workaround appreciated!
>>
>> spark version: 2.2.1
>>
>> Thanks,
>> Pranav
>>
>
>

Reply via email to