BryanCutler commented on a change in pull request #28987: URL: https://github.com/apache/spark/pull/28987#discussion_r449344814
########## File path: python/pyspark/sql/tests/test_pandas_grouped_map.py ########## @@ -570,17 +570,21 @@ def test_grouped_over_window_with_key(self): 5: (3, expected_window[1]), 6: (3, expected_window[2])} - df = self.spark.createDataFrame(data, ['id', 'group', 'ts', 'result']) - df = df.select(col('id'), col('group'), col('ts').cast('timestamp'), col('result')) + df = self.spark.createDataFrame(data, ['id', 'group', 'ts']) + df = df.select(col('id'), col('group'), col('ts').cast('timestamp')) @pandas_udf(df.schema, PandasUDFType.GROUPED_MAP) def f(key, pdf): group = key[0] window_range = key[1] - # Result will be True if group and window range equal to expected - is_expected = pdf.id.apply(lambda id: (expected[id][0] == group and - expected[id][1] == window_range)) - return pdf.assign(result=is_expected) + + # Make sure the key with group and window values are correct + for _, i in pdf.id.iteritems(): + assert expected[i][0] == group, "{} != {}".format(expected[i][0], group) + assert expected[i][1] == window_range, \ + "{} != {}".format(expected[i][1], window_range) + + return pdf result = df.groupby('group', window('ts', '5 days')).apply(f).select('result').collect() Review comment: oops, I messed up removing the `result` col, I'll have to fix that up. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org