On Mon, Sep 01, 2025 at 01:05:16PM +0200, Jann Horn wrote:
> On Thu, Aug 28, 2025 at 11:01 PM Serge E. Hallyn <[email protected]> wrote:
> > On Wed, Aug 27, 2025 at 05:32:02PM -0700, Andy Lutomirski wrote:
> > > On Wed, Aug 27, 2025 at 5:14 PM Aleksa Sarai <[email protected]> wrote:
> > > >
> > > > On 2025-08-26, Mickaël Salaün <[email protected]> wrote:
> > > > > On Tue, Aug 26, 2025 at 11:07:03AM +0200, Christian Brauner wrote:
> > > > > > Nothing has changed in that regard and I'm not interested in 
> > > > > > stuffing
> > > > > > the VFS APIs full of special-purpose behavior to work around the 
> > > > > > fact
> > > > > > that this is work that needs to be done in userspace. Change the 
> > > > > > apps,
> > > > > > stop pushing more and more cruft into the VFS that has no business
> > > > > > there.
> > > > >
> > > > > It would be interesting to know how to patch user space to get the 
> > > > > same
> > > > > guarantees...  Do you think I would propose a kernel patch otherwise?
> > > >
> > > > You could mmap the script file with MAP_PRIVATE. This is the *actual*
> > > > protection the kernel uses against overwriting binaries (yes, ETXTBSY is
> > > > nice but IIRC there are ways to get around it anyway).
> > >
> > > Wait, really?  MAP_PRIVATE prevents writes to the mapping from
> > > affecting the file, but I don't think that writes to the file will
> > > break the MAP_PRIVATE CoW if it's not already broken.
> > >
> > > IPython says:
> > >
> > > In [1]: import mmap, tempfile
> > >
> > > In [2]: f = tempfile.TemporaryFile()
> > >
> > > In [3]: f.write(b'initial contents')
> > > Out[3]: 16
> > >
> > > In [4]: f.flush()
> > >
> > > In [5]: map = mmap.mmap(f.fileno(), f.tell(), flags=mmap.MAP_PRIVATE,
> > > prot=mmap.PROT_READ)
> > >
> > > In [6]: map[:]
> > > Out[6]: b'initial contents'
> > >
> > > In [7]: f.seek(0)
> > > Out[7]: 0
> > >
> > > In [8]: f.write(b'changed')
> > > Out[8]: 7
> > >
> > > In [9]: f.flush()
> > >
> > > In [10]: map[:]
> > > Out[10]: b'changed contents'
> >
> > That was surprising to me, however, if I split the reader
> > and writer into different processes, so
> 
> Testing this in python is a terrible idea because it obfuscates the
> actual syscalls from you.

Hah, I was just trying to fit in :), but of course you're right.
Redoing it in straight c, I'm getting the updates.

-serge

// mmap-w.c (creates an overwrites)
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>

#define FIRST "Initial contents"
#define SECOND "updated contents"

int main() {
        int fd, rc;
        char c;

        fd = open("/tmp/m", O_CREAT | O_RDWR, 0644);
        if (fd < 0) {
                printf("failed to open /tmp/m: %m\n");
                _exit(1);
        }
        rc = write(fd, FIRST, sizeof(FIRST));
        if (rc < 0) {
                printf("write failed: %m\n");
                _exit(1);
        }
        rc = fsync(fd);
        if (rc < 0) {
                printf("flush failed: %m\n");
                _exit(1);
        }

        read(STDIN_FILENO, &c, 1);

        printf("updating the contents\n");

        rc = lseek(fd, 0, SEEK_SET);
        if (rc < 0) {
                printf("seek failed; %m\n");
                _exit(1);
        }

        rc = write(fd, SECOND, sizeof(SECOND));
        if (fd < 0) {
                printf("write failed: %m\n");
                _exit(1);
        }
        rc = close(fd);
        if (rc < 0) {
                printf("close failed: %m\n");
                _exit(1);
        }
        printf("done\n");
}

// mmap-r.c (checks and re-checks contents)
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <string.h>

#define FIRST "Initial contents"
#define SECOND "Updated contents"

int main() {
        int fd, rc;
        char *m;
        char c;

        fd = open("/tmp/m", O_RDONLY);
        if (fd < 0) {
                printf("failed to open /tmp/m: %m\n");
                _exit(1);
        }

        m = mmap(NULL, 40, PROT_READ, MAP_PRIVATE, fd, 0);
        if (m == MAP_FAILED) {
                printf("mmap failed: %m\n");
                _exit(1);
        }

        if (strncmp(m, FIRST, 7) != 0) {
                printf("m is %c%c%c%c%c%c%c\n",
                        m[0], m[1], m[2], m[3], m[4], m[5], m[6]);
                _exit(1);
        }

        read(STDIN_FILENO, &c, 1);

        if (strncmp(m, SECOND, 7) != 0) {
                printf("m is %c%c%c%c%c%c%c\n",
                        m[0], m[1], m[2], m[3], m[4], m[5], m[6]);
                _exit(1);
        }

        printf("done\n");
}

Reply via email to