This is an automated email from the ASF dual-hosted git repository. rnewson pushed a commit to branch pwrite in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 0c5d6d3b515a69c3f97093bb3e0b30cda5895538 Author: Robert Newson <[email protected]> AuthorDate: Sat May 23 16:32:06 2026 +0100 use pwrite for safety --- src/couch/priv/couch_cfile/couch_cfile.c | 83 +++++++++++++++++++++++++++++++ src/couch/src/couch_cfile.erl | 18 +++++++ src/couch/src/couch_file.erl | 15 ++---- src/couch/test/eunit/couch_file_tests.erl | 6 +-- 4 files changed, 109 insertions(+), 13 deletions(-) diff --git a/src/couch/priv/couch_cfile/couch_cfile.c b/src/couch/priv/couch_cfile/couch_cfile.c index c7e6f16f0..027af3cfd 100644 --- a/src/couch/priv/couch_cfile/couch_cfile.c +++ b/src/couch/priv/couch_cfile/couch_cfile.c @@ -199,6 +199,42 @@ static long efile_writev(int fd, SysIOVec *iov, int iovlen, posix_errno_t* res_e return result; } +// Copied from OTP just like efile_preadv. Differences are: +// - Pass file descriptor as int and errno result as a separate arg +// - Assume pwritev exists +// +long efile_pwritev(int fd, long offset, SysIOVec *iov, int iovlen, posix_errno_t* res_errno) { + + long bytes_written; + ssize_t result; + + bytes_written = 0; + + do { + if(iovlen < 1) { + result = 0; + break; + } + + result = pwritev(fd, (const struct iovec *)iov, MIN(IOV_MAX, iovlen), offset); + + if(result > 0) { + shift_iov(&iov, &iovlen, result); + bytes_written += result; + offset += result; + } + } while(result > 0 || (result < 0 && errno == EINTR)); + + *res_errno = errno; + + if(result == 0 && bytes_written > 0) { + return bytes_written; + } + + return result; +} + + // Copied from OTP. Differences are: // - File descriptor and return error passed in as separate args // - This is for datasync only so don't pass that extra argument in @@ -459,6 +495,52 @@ static ERL_NIF_TERM write_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[ #endif } +// Follows implementation from prim_file_nif.c +// +static ERL_NIF_TERM pwrite_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { +#ifdef COUCH_CFILE_SUPPORTED + handle_t* hdl; + ErlNifIOVec vec, *input = &vec; + posix_errno_t res_errno = 0; + long bytes_written, offset; + ERL_NIF_TERM tail; + + if (argc != 3 + || !get_handle(env, argv[0], &hdl) + || !enif_is_number(env, argv[1]) + || !enif_inspect_iovec(env, 64, argv[2], &tail, &input)) { + return badarg(env); + } + + if(!enif_get_int64(env, argv[1], &offset) || offset < 0) { + return err_tup(env, EINVAL); + } + + // ------ Critical section start ------ + READ_LOCK; + if (hdl->fd < 0) { + READ_UNLOCK; + return err_tup(env, EINVAL); + } + bytes_written = efile_pwritev(hdl->fd, offset, input->iov, input->iovcnt, &res_errno); + READ_UNLOCK; + // ------- Critical section end ------ + + if(bytes_written < 0) { + return err_tup(env, res_errno); + } + + if(!enif_is_empty_list(env, tail)) { + return enif_make_tuple3(env, ATOM_CONTINUE, bytes_written, tail); + } + + return ATOM_OK; +#else + return err_tup(env, EINVAL) +#endif +} + + static ERL_NIF_TERM seek_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { #ifdef COUCH_CFILE_SUPPORTED handle_t* hdl; @@ -719,6 +801,7 @@ static ErlNifFunc funcs[] = { {"eof_nif", 1, eof_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, {"seek_nif", 3, seek_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, {"write_nif", 2, write_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, + {"pwrite_nif", 3, pwrite_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, {"datasync_nif", 1, datasync_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, {"truncate_nif", 1, truncate_nif, ERL_NIF_DIRTY_JOB_IO_BOUND}, {"info_nif", 1, info_nif} diff --git a/src/couch/src/couch_cfile.erl b/src/couch/src/couch_cfile.erl index 3511fc162..431783bf7 100644 --- a/src/couch/src/couch_cfile.erl +++ b/src/couch/src/couch_cfile.erl @@ -31,6 +31,7 @@ position/2, datasync/1, write/2, + pwrite/3, truncate/1, fd/1, advise/4 @@ -53,6 +54,7 @@ eof_nif/1, seek_nif/3, write_nif/2, + pwrite_nif/3, datasync_nif/1, truncate_nif/1 ]). @@ -119,6 +121,9 @@ datasync(_) -> write(#file_descriptor{module = ?MODULE} = Fd, IOData) -> write_1(owner_handle(Fd), erlang:iolist_to_iovec(IOData)). +pwrite(#file_descriptor{module = ?MODULE} = Fd, Pos, IOData) -> + pwrite_1(owner_handle(Fd), Pos, erlang:iolist_to_iovec(IOData)). + truncate(#file_descriptor{module = ?MODULE} = Fd) -> truncate_nif(owner_handle(Fd)). @@ -215,6 +220,16 @@ write_1(Ref, IOVec) -> {error, Reason} end. +pwrite_1(Ref, Pos, IOVec) -> + case pwrite_nif(Ref, Pos, IOVec) of + {continue, BytesWritten, Remainder} -> + pwrite_1(Ref, Pos + BytesWritten, Remainder); + ok -> + ok; + {error, Reason} -> + {error, Reason} + end. + init() -> case os:type() of {unix, _} -> @@ -283,6 +298,9 @@ seek_nif(_, _, _) -> write_nif(_, _) -> {error, einval}. +pwrite_nif(_, _, _) -> + {error, einval}. + datasync_nif(_) -> {error, einval}. diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl index 2d0920f7e..e8e31c3b2 100644 --- a/src/couch/src/couch_file.erl +++ b/src/couch/src/couch_file.erl @@ -514,7 +514,7 @@ file_open_options(Options) -> true -> []; false -> - [append] + [write] end. maybe_track_open_os_files(Options) -> @@ -635,9 +635,9 @@ append_bins(#file{fd = Fd, eof = Pos} = File, Bins) -> Bins ), {AllBlocks, Resps} = lists:unzip(BlockResps), - case file:write(Fd, AllBlocks) of + case file:pwrite(Fd, Pos, AllBlocks) of ok -> {{ok, Resps}, File#file{eof = FinalPos}}; - Error -> {Error, reset_eof(File)} + Error -> {Error, File} end. pread(#file{} = File, PosL) -> @@ -777,9 +777,9 @@ handle_write_header(Bin, #file{fd = Fd, eof = Pos} = File) -> BlockOffset -> Padding = <<0:(8 * (?SIZE_BLOCK - BlockOffset))>> end, FinalBin = [Padding, <<1, BinSize:32/integer>> | make_blocks(5, [Bin])], - case file:write(Fd, FinalBin) of + case file:pwrite(Fd, Pos, FinalBin) of ok -> {ok, File#file{eof = Pos + iolist_size(FinalBin)}}; - {error, Error} -> {{error, Error}, reset_eof(File)} + {error, Error} -> {{error, Error}, File} end. read_multi_raw_iolists_int(#file{fd = Fd, eof = Eof} = File, PosLens) -> @@ -959,11 +959,6 @@ is_idle(#file{is_sys = false}) -> process_info(Pid) -> couch_util:process_dict_get(Pid, couch_file_fd). -%% in event of a partially successful write. -reset_eof(#file{} = File) -> - {ok, Eof} = file:position(File#file.fd, eof), - File#file{eof = Eof}. - -spec generate_checksum(binary()) -> <<_:128>>. generate_checksum(Bin) when is_binary(Bin) -> case generate_xxhash_checksums() of diff --git a/src/couch/test/eunit/couch_file_tests.erl b/src/couch/test/eunit/couch_file_tests.erl index c138a9c0a..b88956205 100644 --- a/src/couch/test/eunit/couch_file_tests.erl +++ b/src/couch/test/eunit/couch_file_tests.erl @@ -1021,11 +1021,11 @@ should_handle_error_of_the_second_sync(Fd) -> should_handle_error_of_the_file_write(Fd) -> meck:expect( file, - write, - ['_', '_'], + pwrite, + ['_', '_', '_'], meck:val({error, terminated}) ), ?assertEqual({error, terminated}, couch_file:write_header(Fd, {<<"some_data">>, 32}, [sync])), ?assertEqual(1, meck:num_calls(file, datasync, ['_'])), - ?assertEqual(1, meck:num_calls(file, write, ['_', '_'])), + ?assertEqual(1, meck:num_calls(file, pwrite, ['_', '_', '_'])), ok.
