nickva commented on code in PR #4958: URL: https://github.com/apache/couchdb/pull/4958#discussion_r1483157782
########## src/fabric/src/fabric_view.erl: ########## @@ -241,11 +246,63 @@ possibly_embed_doc( end; _ -> Row + end; +possibly_embed_doc( + #collector{db_name = DbName, query_args = Args}, + #{key := _Key, id := _Id, value := Value, doc := _Doc} = Row +) -> + #mrargs{include_docs = IncludeDocs} = Args, + case IncludeDocs andalso is_tuple(Value) of + true -> + {Props} = Value, + Rev0 = couch_util:get_value(<<"_rev">>, Props), + case couch_util:get_value(<<"_id">>, Props) of + null -> + Row#{doc => null}; + undefined -> + Row; + IncId -> + % use separate process to call fabric:open_doc + % to not interfere with current call + {Pid, Ref} = spawn_monitor(fun() -> + exit( + case Rev0 of + undefined -> + case fabric:open_doc(DbName, IncId, []) of + {ok, NewDoc} -> + Row#{doc => couch_doc:to_json_obj(NewDoc, [])}; + {not_found, _} -> + Row#{doc => null}; + Else -> + Row#{doc => {error, Else}} + end; + Rev0 -> + Rev = couch_doc:parse_rev(Rev0), + case fabric:open_revs(DbName, IncId, [Rev], []) of + {ok, [{ok, NewDoc}]} -> + Row#{doc => couch_doc:to_json_obj(NewDoc, [])}; + {ok, [{{not_found, _}, Rev}]} -> + Row#{doc => null}; + Else -> + Row#{doc => {error, Else}} + end + end + ) + end), + receive + {'DOWN', Ref, process, Pid, Resp} -> + Resp + end + end; + _ -> + Row end. detach_partition(#view_row{key = {p, _Partition, Key}} = Row) -> Row#view_row{key = Key}; -detach_partition(#view_row{} = Row) -> +detach_partition(#{key := {p, _Partition, Key}} = Row) -> + Row#{key => Key}; +detach_partition(Row) -> Review Comment: We could match `#{key := _ }` here at least indicating we still expect it to be a map with a `key` item. ########## src/fabric/src/fabric_view_all_docs.erl: ########## @@ -187,6 +187,49 @@ shards(Db, Args) -> end, fabric_view:get_shards(Db, NewArgs). +handle_row(Row0, {Worker, _} = Source, State) -> + #collector{query_args = Args, counters = Counters0, rows = Rows0} = State, + #mrargs{extra = Options, direction = Dir} = Args, + Row1 = + case Row0 of + #view_row{} -> Row0#view_row{worker = Source}; + #{} -> Row0#{worker => Source} + end, + % It has to be ensured that rows of the same format are merged in case of + % mixed-version cluster scenarios. + Row = + case couch_util:get_value(execution_stats_rolling, Options, false) of + true -> + case Row1 of + #view_row{} -> + #{ + key => Row1#view_row.key, + id => Row1#view_row.id, + value => Row1#view_row.value, + doc => Row1#view_row.doc, + worker => Row1#view_row.worker, + stats => #{} + }; + #{} -> + Row1 + end; + false -> + case Row1 of + #view_row{} -> + Row1; + #{} -> + #{id := Id, key := Key} = Row1, Review Comment: These could be matched in the `#{}` case directly. ########## src/fabric/src/fabric_view_all_docs.erl: ########## @@ -187,6 +187,49 @@ shards(Db, Args) -> end, fabric_view:get_shards(Db, NewArgs). +handle_row(Row0, {Worker, _} = Source, State) -> + #collector{query_args = Args, counters = Counters0, rows = Rows0} = State, + #mrargs{extra = Options, direction = Dir} = Args, + Row1 = + case Row0 of + #view_row{} -> Row0#view_row{worker = Source}; + #{} -> Row0#{worker => Source} + end, + % It has to be ensured that rows of the same format are merged in case of + % mixed-version cluster scenarios. + Row = + case couch_util:get_value(execution_stats_rolling, Options, false) of + true -> + case Row1 of + #view_row{} -> + #{ + key => Row1#view_row.key, + id => Row1#view_row.id, + value => Row1#view_row.value, + doc => Row1#view_row.doc, + worker => Row1#view_row.worker, + stats => #{} + }; + #{} -> + Row1 + end; + false -> + case Row1 of + #view_row{} -> + Row1; + #{} -> + #{id := Id, key := Key} = Row1, + Value = maps:get(value, Row1, null), + Doc = maps:get(doc, Row1, null), + Worker = maps:get(worker, Row1, null), Review Comment: The default values for the record would be `undefined`, or do the workers explicitly set them to `null`? ########## src/fabric/src/fabric_view_all_docs.erl: ########## @@ -273,10 +313,24 @@ handle_message({execution_stats, _} = Msg, {_, From}, St) -> rexi:stream_ack(From), {Go, St#collector{user_acc = Acc}}. -merge_row(fwd, Row, Rows) -> +insert_row(_Dir, _Key, R, []) -> + [R]; +insert_row(fwd, Key, R, [H | T] = List) -> + V1 = maps:get(Key, R), + V2 = maps:get(Key, H), Review Comment: I think we can just use `id` here and also do a more common KV match `#{id := RowId} = R`and `#{id := HeadId} = H`. With `id` atom those an be matched directly in the head, whichever one looks better to you. ########## src/fabric/src/fabric_view_all_docs.erl: ########## @@ -273,10 +313,24 @@ handle_message({execution_stats, _} = Msg, {_, From}, St) -> rexi:stream_ack(From), {Go, St#collector{user_acc = Acc}}. -merge_row(fwd, Row, Rows) -> +insert_row(_Dir, _Key, R, []) -> + [R]; +insert_row(fwd, Key, R, [H | T] = List) -> + V1 = maps:get(Key, R), + V2 = maps:get(Key, H), + if + V1 =< V2 -> [R | List]; + true -> [H | insert_row(fwd, Key, R, T)] + end; +insert_row(rev, Key, R, List) -> + lists:reverse(insert_row(fwd, Key, R, lists:reverse(List))). + +merge_row(fwd, #view_row{} = Row, Rows) -> lists:keymerge(#view_row.id, [Row], Rows); -merge_row(rev, Row, Rows) -> - lists:rkeymerge(#view_row.id, [Row], Rows). +merge_row(rev, #view_row{} = Row, Rows) -> + lists:rkeymerge(#view_row.id, [Row], Rows); +merge_row(Dir, #{} = Row, Rows) -> + insert_row(Dir, id, Row, Rows). Review Comment: I think only `id` is the key. Might be better to just simply and hard-code the key in insert_row ########## src/fabric/src/fabric_view.erl: ########## @@ -241,11 +246,63 @@ possibly_embed_doc( end; _ -> Row + end; +possibly_embed_doc( + #collector{db_name = DbName, query_args = Args}, + #{key := _Key, id := _Id, value := Value, doc := _Doc} = Row +) -> + #mrargs{include_docs = IncludeDocs} = Args, + case IncludeDocs andalso is_tuple(Value) of + true -> + {Props} = Value, + Rev0 = couch_util:get_value(<<"_rev">>, Props), + case couch_util:get_value(<<"_id">>, Props) of + null -> + Row#{doc => null}; + undefined -> + Row; + IncId -> + % use separate process to call fabric:open_doc + % to not interfere with current call + {Pid, Ref} = spawn_monitor(fun() -> + exit( + case Rev0 of + undefined -> + case fabric:open_doc(DbName, IncId, []) of + {ok, NewDoc} -> + Row#{doc => couch_doc:to_json_obj(NewDoc, [])}; + {not_found, _} -> + Row#{doc => null}; + Else -> + Row#{doc => {error, Else}} + end; + Rev0 -> + Rev = couch_doc:parse_rev(Rev0), + case fabric:open_revs(DbName, IncId, [Rev], []) of + {ok, [{ok, NewDoc}]} -> + Row#{doc => couch_doc:to_json_obj(NewDoc, [])}; + {ok, [{{not_found, _}, Rev}]} -> + Row#{doc => null}; + Else -> + Row#{doc => {error, Else}} + end + end + ) + end), + receive + {'DOWN', Ref, process, Pid, Resp} -> Review Comment: If we have a utilty function to fetch the doc body only, wonder if we could remove some of the duplication between the two cases. ########## src/fabric/src/fabric_view.erl: ########## @@ -241,11 +246,63 @@ possibly_embed_doc( end; _ -> Row + end; +possibly_embed_doc( + #collector{db_name = DbName, query_args = Args}, + #{key := _Key, id := _Id, value := Value, doc := _Doc} = Row Review Comment: Minor nit: if we don't use `_Key`, `_Id`, etc, it's better to just keep them as `_`. We only care about matching on existence of keys (`key`, `id`, ...) only. ########## src/fabric/src/fabric_view_all_docs.erl: ########## @@ -187,6 +187,49 @@ shards(Db, Args) -> end, fabric_view:get_shards(Db, NewArgs). +handle_row(Row0, {Worker, _} = Source, State) -> + #collector{query_args = Args, counters = Counters0, rows = Rows0} = State, + #mrargs{extra = Options, direction = Dir} = Args, + Row1 = + case Row0 of + #view_row{} -> Row0#view_row{worker = Source}; + #{} -> Row0#{worker => Source} + end, + % It has to be ensured that rows of the same format are merged in case of + % mixed-version cluster scenarios. + Row = + case couch_util:get_value(execution_stats_rolling, Options, false) of Review Comment: Seeing this case in fabric_view_all_docs indicates we should probably make a separate map_view_row or similar option. ########## src/fabric/src/fabric_view.erl: ########## @@ -349,7 +410,11 @@ transform_row(#view_row{key = Key, id = Id, value = Value, doc = undefined}) -> transform_row(#view_row{key = Key, id = _Id, value = _Value, doc = {error, Reason}}) -> {row, [{id, error}, {key, Key}, {value, Reason}]}; transform_row(#view_row{key = Key, id = Id, value = Value, doc = Doc}) -> - {row, [{id, Id}, {key, Key}, {value, Value}, {doc, Doc}]}. + {row, [{id, Id}, {key, Key}, {value, Value}, {doc, Doc}]}; +transform_row(#{} = Row) -> + #{key := Key, id := Id, value := Value, doc := Doc, stats := Stats} = Row, + {row, Props} = transform_row(#view_row{key = Key, id = Id, value = Value, doc = Doc}), + {row, Props, Stats}. Review Comment: Thinking out loud: this feels a slightly awkward, that the callback would sometimes be called with 2 or 3 times. What we if added the `stats` field to the row props and accessed them later or removed them later. That's just so that the transform calls always return `{row, Props}` only. ########## src/fabric/src/fabric_view_all_docs.erl: ########## @@ -273,10 +313,24 @@ handle_message({execution_stats, _} = Msg, {_, From}, St) -> rexi:stream_ack(From), {Go, St#collector{user_acc = Acc}}. -merge_row(fwd, Row, Rows) -> +insert_row(_Dir, _Key, R, []) -> + [R]; +insert_row(fwd, Key, R, [H | T] = List) -> + V1 = maps:get(Key, R), + V2 = maps:get(Key, H), + if + V1 =< V2 -> [R | List]; + true -> [H | insert_row(fwd, Key, R, T)] + end; Review Comment: Nit: Avoid using `if` in Erlang as a rule, usually `case` is preferred. `case V1 =< V2 of true -> ... ; false -... end` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@couchdb.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org