mkk1490 commented on issue #3313: URL: https://github.com/apache/hudi/issues/3313#issuecomment-886010620
@nsivabalan I'm so sorry. That's my mistake. I'm trying to update the next field to src_pri_psbr_id which is pri_az_cust_id. Please find the dfs below: Insert: df_ins = spark.createDataFrame([ ('10490113453106300000','561295328','2018-02-26 00:00:00','129999','18078735','15889231','OTHER PLACE OF SERVICE','53200900','RESP','201802'), ('10490120407605900000','559017519','2018-02-26 00:00:00','24290619','100','','OTHER PLACE OF SERVICE','53214384','RESP','201802'), ('10490116725506700000','1275017082','2018-02-27 00:00:00','209999','7875173','56874023','INPATIENT HOSPITAL','53201132','RESP','201802'), ('10420113469301100000','1705523399','2018-02-26 00:00:00','20045','7088459','55674640','EMERGENCY ROOM - HOSPITAL','53199046','RESP','201802'), ('10419114183800600000','29764982','2018-02-26 00:00:00','11659999','7786313','56694289','OFFICE','53207492','RESP','201802'), ('10490115064904900000','5007290360','2018-02-27 00:00:00','11720002','6937467','57968295','INPATIENT HOSPITAL','53207546','RESP','201803'), ('10419114186102800000','4968929315','2018-02-27 00:00:00','38930013','7311494','55315237','OFFICE','1002607736','RESP','201803'), ('10236114374904400000','320648979','2018-02-26 00:00:00','35160001','100','','INDEPENDENT LABORATORY','53217687','RESP','201803'), ('10809131289117500000','505439892','2018-02-28 00:00:00','20860001','2492213','52847481','OTHER PLACE OF SERVICE','53212703','RESP','201803'), ('10419123011601800000','3138080815','2018-02-25 00:00:00','32060002','7574212','58923898','INPATIENT HOSPITAL','1002608174','RESP','201803') ], ['claim_id','pat_id','claim_subm_dt','src_plan_id','src_pri_psbr_id','pri_az_cust_id','plac_of_srvc_cd','az_plan_id','src_sys_nm','yr_mth'] ).withColumn('claim_subm_dt', F.expr("cast(claim_subm_dt as timestamp)")).withColumn('yr_mth', F.expr("cast(yr_mth as int)")) Update: df_upd = spark.createDataFrame([ ('10490113453106300000','561295328','2018-02-26 00:00:00','129999','18078735','15889231','OTHER PLACE OF SERVICE','53200900','RESP','201802'), ('10490120407605900000','559017519','2018-02-26 00:00:00','24290619','100','32531','OTHER PLACE OF SERVICE','53214384','RESP','201802'), ('10490116725506700000','1275017082','2018-02-27 00:00:00','209999','7875173','56874023','INPATIENT HOSPITAL','53201132','RESP','201802'), ('10420113469301100000','1705523399','2018-02-26 00:00:00','20045','7088459','55674640','EMERGENCY ROOM - HOSPITAL','53199046','RESP','201802'), ('10419114183800600000','29764982','2018-02-26 00:00:00','11659999','7786313','56694289','OFFICE','53207492','RESP','201802'), ('10490115064904900000','5007290360','2018-02-27 00:00:00','11720002','6937467','57968295','INPATIENT HOSPITAL','53207546','RESP','201803'), ('10419114186102800000','4968929315','2018-02-27 00:00:00','38930013','7311494','55315237','OFFICE','1002607736','RESP','201803'), ('10236114374904400000','320648979','2018-02-26 00:00:00','35160001','100','32531','INDEPENDENT LABORATORY','53217687','RESP','201803'), ('10809131289117500000','505439892','2018-02-28 00:00:00','20860001','2492213','52847481','OTHER PLACE OF SERVICE','53212703','RESP','201803'), ('10419123011601800000','3138080815','2018-02-25 00:00:00','32060002','7574212','58923898','INPATIENT HOSPITAL','1002608174','RESP','201803') ], ['claim_id','pat_id','claim_subm_dt','src_plan_id','src_pri_psbr_id','pri_az_cust_id','plac_of_srvc_cd','az_plan_id','src_sys_nm','yr_mth'] ).withColumn('claim_subm_dt', F.expr("cast(claim_subm_dt as timestamp)")).withColumn('yr_mth', F.expr("cast(yr_mth as int)")) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org