icexelloss commented on code in PR #13880: URL: https://github.com/apache/arrow/pull/13880#discussion_r965450019
########## cpp/src/arrow/compute/exec/asof_join_node.cc: ########## @@ -222,46 +387,60 @@ class InputState { // latest_time and latest_ref_row to the value that immediately pass the // specified timestamp. // Returns true if updates were made, false if not. - bool AdvanceAndMemoize(int64_t ts) { + Result<bool> AdvanceAndMemoize(OnType ts) { // Advance the right side row index until we reach the latest right row (for each key) // for the given left timestamp. // Check if already updated for TS (or if there is no latest) if (Empty()) return false; // can't advance if empty - auto latest_time = GetLatestTime(); - if (latest_time > ts) return false; // already advanced // Not updated. Try to update and possibly advance. - bool updated = false; + bool advanced, updated = false; do { - latest_time = GetLatestTime(); + auto latest_time = GetLatestTime(); // if Advance() returns true, then the latest_ts must also be valid // Keep advancing right table until we hit the latest row that has // timestamp <= ts. This is because we only need the latest row for the // match given a left ts. - if (latest_time <= ts) { - memo_.Store(GetLatestBatch(), latest_ref_row_, latest_time, GetLatestKey()); - } else { + if (latest_time > ts) { break; // hit a future timestamp -- done updating for now } + auto rb = GetLatestBatch(); + if (may_rehash_ && rb->column_data(key_col_index_[0])->GetNullCount() > 0) { Review Comment: Why are we only looking at `key_col_index_[0]` here? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org