Thanks all,
I'm attaching here the initial super early patch.

This is very much experimental and incomplete — the current implementation
does *not* yet capture all changes, enforce ordering guarantees, or support
recovery — but I wanted to share it early to gather feedback.

It logs things like this into /tmp/journal.log

=== BEGIN TX 504 === [2025-06-27 07:35:05.116]
action: sync
name: (unknown)
parent inode: 0
inode:        49142
mode:         010100666
size:         0 bytes
nlink:        1
blocks:       8
mtime: 1751006105
ctime: 1751006105
=== END TX 504 ===

There is a small header on journal.c that says what it does (and what it
doesn't yet do).

I tried to make it thread safe, and somewhat smart (linear buffer, flushes
when full and so on), but i have only been on it for short periods of time.

Thanks in advance
Milos


On Thu, Jun 26, 2025 at 4:52 PM Samuel Thibault <samuel.thiba...@gnu.org>
wrote:

> Hello,
>
> Milos Nikic, le mer. 25 juin 2025 14:36:04 -0700, a ecrit:
> > As a learning and exploration effort, I've started working on a toy
> journaling
> > layer inside ext2fs. The goal is to understand how journaling might look
> in a
> > user-space filesystem like Hurd’s, and whether it's feasible to
> implement a
> > basic journaling mechanism incrementally.
>
> Being userspace probably doesn't really have impact on the
> implementation of journaling.
>
> > So far, I’ve added a non-intrusive skeleton that:
> > - Hooks into `diskfs_init_diskfs` (for init) and
> `diskfs_sync_everything` (for
> > flushing),
>
> You'll want to plug at a lower level than diskfs_sync_everything, to
> catch all data and metadata writes and write to the log ahead of them.
>
> For real safety, we will need to introduce write barriers at the device
> RPC layer.
>
> Samuel
>
> > - Buffers log entries in memory and flushes to `/tmp/journal.log`,
> > - Outputs to the screen during boot if writing fails (e.g., due to early
> boot
> > or read-only FS),
> > - Is wrapped in a minimal interface (`journal_log`,
> `flush_journal_to_file`)
> > with guards for safety.
> >
> > The goal is **not** a production journaling layer, but rather to build a
> base
> > to explore correctness, crash safety, and design directions.
> >
> > You can see it show up during boot with messages like:
> >
> > Toy journaling: journal_init() called
> > Toy journaling: flushing journal to disk...
> >
> > I can also verify the presence of one of the init messages in
> /tmp/journal.log
> >
> > Before proceeding further:
> > - I'd appreciate any guidance on whether this is being plugged in the
> right
> > places.
> > - Are there preferred conventions or hooks I should be using instead?
> > - Would you be open to reviewing it as a small patch series while I
> iterate, or
> > should this stay on a branch until it's more mature?
> >
> > Thanks in advance for any input — and for the warm welcome so far!
> >
> > Best,
> > Milos Nikic
>
From d8987badd1ba8cb1b8e4b35126bd93fae4b634f7 Mon Sep 17 00:00:00 2001
From: Milos Nikic <nikic.mi...@gmail.com>
Date: Wed, 25 Jun 2025 19:09:54 +0100
Subject: [PATCH] Initial skeleton journaling, bare bones (but it flushes to a
 file).

---
 ext2fs/inode.c        |   3 +
 ext2fs/pager.c        |   3 +
 libdiskfs/Makefile    |   4 +-
 libdiskfs/init-init.c |   2 +
 libdiskfs/journal.c   | 260 ++++++++++++++++++++++++++++++++++++++++++
 libdiskfs/journal.h   |  19 +++
 6 files changed, 289 insertions(+), 2 deletions(-)
 create mode 100644 libdiskfs/journal.c
 create mode 100644 libdiskfs/journal.h

diff --git a/ext2fs/inode.c b/ext2fs/inode.c
index dc309ac8..a3560630 100644
--- a/ext2fs/inode.c
+++ b/ext2fs/inode.c
@@ -28,6 +28,7 @@
 #include <sys/statfs.h>
 #include <sys/statvfs.h>
 #include <sys/xattr.h>
+#include <libdiskfs/journal.h>
 
 /* these flags aren't actually defined by a header file yet, so temporarily
    disable them if necessary.  */
@@ -524,6 +525,8 @@ write_all_disknodes (void)
 void
 diskfs_write_disknode (struct node *np, int wait)
 {
+
+  journal_log_metadata(np, &(struct journal_entry_info){ .action = "sync" });
   struct ext2_inode *di = write_node (np);
   if (di)
     {
diff --git a/ext2fs/pager.c b/ext2fs/pager.c
index c55107a9..9174e3d5 100644
--- a/ext2fs/pager.c
+++ b/ext2fs/pager.c
@@ -25,6 +25,7 @@
 #include <inttypes.h>
 #include <hurd/store.h>
 #include "ext2fs.h"
+#include <libdiskfs/journal.h>
 
 /* XXX */
 #include "../libpager/priv.h"
@@ -1437,6 +1438,8 @@ diskfs_shutdown_pager (void)
 void
 diskfs_sync_everything (int wait)
 {
+  flush_journal_to_file();
+
   error_t sync_one (void *v_p)
     {
       struct pager *p = v_p;
diff --git a/libdiskfs/Makefile b/libdiskfs/Makefile
index aa6b24a4..9a025a92 100644
--- a/libdiskfs/Makefile
+++ b/libdiskfs/Makefile
@@ -32,7 +32,7 @@ IOSRCS= io-async-icky.c io-async.c io-duplicate.c io-get-conch.c io-revoke.c \
 	io-modes-on.c io-modes-set.c io-owner-mod.c io-owner-get.c \
 	io-pathconf.c io-prenotify.c io-read.c io-readable.c io-identity.c \
 	io-reauthenticate.c io-rel-conch.c io-restrict-auth.c io-seek.c \
-	io-select.c io-stat.c io-stubs.c io-write.c io-version.c io-sigio.c
+	io-select.c io-stat.c io-stubs.c io-write.c io-version.c io-sigio.c journal.c
 FSYSSRCS=fsys-getroot.c fsys-goaway.c fsys-startup.c fsys-getfile.c \
 	fsys-options.c fsys-syncfs.c fsys-forward.c \
 	fsys-get-children.c fsys-get-source.c
@@ -54,7 +54,7 @@ OTHERSRCS = conch-fetch.c conch-set.c dir-clear.c dir-init.c dir-renamed.c \
 	validate-mode.c validate-group.c validate-author.c validate-flags.c \
 	validate-rdev.c validate-owner.c priv.c get-source.c
 SRCS = $(OTHERSRCS) $(FSSRCS) $(IOSRCS) $(FSYSSRCS) $(IFSOCKSRCS)
-installhdrs = diskfs.h diskfs-pager.h
+installhdrs = diskfs.h diskfs-pager.h journal.h
 
 MIGSTUBS = fsServer.o ioServer.o fsysServer.o exec_startupServer.o \
 	fsys_replyUser.o fs_notifyUser.o ifsockServer.o \
diff --git a/libdiskfs/init-init.c b/libdiskfs/init-init.c
index f9b12f6f..8bc5914a 100644
--- a/libdiskfs/init-init.c
+++ b/libdiskfs/init-init.c
@@ -24,6 +24,7 @@
 #include <hurd/fsys.h>
 #include <stdio.h>
 #include <maptime.h>
+#include <libdiskfs/journal.h>
 
 /* For safe inlining of diskfs_node_disknode and
    diskfs_disknode_node.  */
@@ -98,6 +99,7 @@ diskfs_init_diskfs (void)
 
   _hurd_port_init (&_diskfs_exec_portcell, MACH_PORT_NULL);
 
+  journal_init();
   return 0;
 }
 
diff --git a/libdiskfs/journal.c b/libdiskfs/journal.c
new file mode 100644
index 00000000..4c8681dc
--- /dev/null
+++ b/libdiskfs/journal.c
@@ -0,0 +1,260 @@
+/*
+ * journal.c - Experimental journaling layer for Hurd's ext2fs/libdiskfs
+ *
+ * This is a work-in-progress implementation of a toy journaling layer
+ * intended for exploration and learning purposes. It logs basic metadata
+ * about file changes into a shared in-memory buffer, which is periodically
+ * flushed to a file (/tmp/journal.log).
+ *
+ * Features:
+ *   - Logs inode metadata (mode, size, nlink, mtime, ctime, etc.)
+ *   - Each log entry is wrapped in a transaction with a unique ID and timestamp
+ *   - Uses a fixed-size in-memory buffer with auto-flushing on overflow
+ *   - Timestamp includes millisecond precision
+ *   - Thread-safe using a mutex
+ *
+ * Missing / Not Implemented Yet:
+ *   - Write barriers or guarantees of ordering with actual FS operations
+ *   - Integration at a lower level to capture all metadata changes (not just sync hooks)
+ *   - Actual recovery mechanisms or replays from the journal
+ *   - Logging of inode or block bitmap changes
+ *   - File name resolution (only available if passed manually)
+ *   - UID/GID or finer-grained permission changes
+ *   - Disk-backed circular journal buffer for continuous logging
+ *   - Atomicity guarantees across flush boundaries (currently only soft protection)
+ *
+ * Warning:
+ *   This code is experimental and not suitable for production.
+ *   It is designed to support incremental development and learning.
+ *
+ * Author: Milos Nikic, 2025
+ */
+#include <stdio.h>
+#include <inttypes.h>
+#include <time.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <libdiskfs/journal.h>
+#include <diskfs.h>
+#include <sys/types.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+#define JOURNAL_DIR_PATH "/tmp"
+#define JOURNAL_LOG_PATH JOURNAL_DIR_PATH "/journal.log"
+#define JOURNAL_BUF_SIZE (64 * 1024) 
+#define MAX_REASONABLE_TIME 4102444800  /* Jan 1, 2100 */
+#define MIN_REASONABLE_TIME 946684800   /* Jan 1, 2000 */
+
+static pthread_mutex_t journal_lock = PTHREAD_MUTEX_INITIALIZER;
+static char journal_buf[JOURNAL_BUF_SIZE];
+static size_t journal_buf_used = 0;
+static uint64_t journal_tx_id = 1;
+
+static void get_current_time_string(char *buf, size_t bufsize)
+{
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+
+    struct tm tm;
+    localtime_r(&tv.tv_sec, &tm);
+
+    snprintf(buf, bufsize, "%04d-%02d-%02d %02d:%02d:%02d.%03ld",
+             tm.tm_year + 1900,
+             tm.tm_mon + 1,
+             tm.tm_mday,
+             tm.tm_hour,
+             tm.tm_min,
+             tm.tm_sec,
+             (long)tv.tv_usec / 1000); // convert microseconds to milliseconds
+}
+
+static inline bool try_add_to_buffer(const char *msg, size_t msg_len)
+{
+    size_t total_len = msg_len + 1; // +1 for newline
+
+    if (journal_buf_used + total_len < JOURNAL_BUF_SIZE) {
+        memcpy(&journal_buf[journal_buf_used], msg, msg_len);
+        journal_buf_used += msg_len;
+        journal_buf[journal_buf_used++] = '\n';
+	return true;
+    } 
+    return false;
+}
+
+static void journal_log_tx(const char *body)
+{
+    char time_str[128];
+    get_current_time_string(time_str, sizeof(time_str));
+
+    // Prepare transaction header/footer strings here to know length before locking
+    char header[128];
+    char footer[64];
+    uint64_t tx_id;
+
+    // We need body length for buffer size checks
+    size_t body_len = strlen(body);
+    size_t header_len, footer_len, total_len;
+
+    // Lock scope to get tx_id and prepare header/footer lengths
+    pthread_mutex_lock(&journal_lock);
+    tx_id = journal_tx_id++;
+
+    header_len = snprintf(header, sizeof(header), "=== BEGIN TX %" PRIu64 " === [%s]", tx_id, time_str);
+    footer_len = snprintf(footer, sizeof(footer), "=== END TX %" PRIu64 " ===", tx_id);
+
+    total_len = header_len + 1 + body_len + 1 + footer_len + 1; // +1 for each newline
+
+    // If transaction bigger than buffer, drop it
+    if (total_len >= JOURNAL_BUF_SIZE) {
+        pthread_mutex_unlock(&journal_lock);
+        fprintf(stderr, "Toy journaling: transaction too large, dropping (%zu bytes)\n", total_len);
+        return;
+    }
+
+    // If not enough space, unlock, flush, then re-lock and re-check
+    if (journal_buf_used + total_len >= JOURNAL_BUF_SIZE) {
+        pthread_mutex_unlock(&journal_lock);
+        if (!flush_journal_to_file()) {
+            fprintf(stderr, "Toy journaling: flush failed, dropping transaction\n");
+            return;
+        }
+        pthread_mutex_lock(&journal_lock);
+        // Re-check space after flush, someone else could have logged
+        if (journal_buf_used + total_len >= JOURNAL_BUF_SIZE) {
+            pthread_mutex_unlock(&journal_lock);
+            fprintf(stderr, "Toy journaling: still no space after flush, dropping transaction\n");
+            return;
+        }
+    }
+
+    try_add_to_buffer(header, header_len);
+    try_add_to_buffer(body, body_len);
+    try_add_to_buffer(footer, footer_len);
+
+    pthread_mutex_unlock(&journal_lock);
+}
+
+bool flush_journal_to_file(void)
+{
+    if (journal_buf_used == 0) {
+        fprintf(stderr, "Toy journaling: Nothing to flush. Skipping.\n");
+	return false;
+    }
+    struct stat st;
+    if (stat(JOURNAL_DIR_PATH, &st) != 0 || !S_ISDIR(st.st_mode)) {
+        fprintf(stderr, "Toy journaling: %s not accessible or not a directory. Skipping flush.\n", JOURNAL_DIR_PATH);
+        return false;
+    }
+    FILE *f = fopen(JOURNAL_LOG_PATH, "a");
+    if (f) {
+        fprintf(stderr, "Toy journaling: Writing to %zu chars to %s file.\n", journal_buf_used, JOURNAL_LOG_PATH);
+	size_t written = fwrite(journal_buf, 1, journal_buf_used, f);
+	bool success = written == journal_buf_used;
+	if (!success) {
+	    fprintf(stderr, "Toy journaling: fwrite to %s failed: %s\n", JOURNAL_LOG_PATH, strerror(errno));
+	}
+	if (fclose(f) != 0) {
+	    fprintf(stderr, "Toy journaling: fclose failed: %s\n", strerror(errno));
+	}
+        journal_buf_used = 0;
+	return success;
+    } else {
+	fprintf(stderr, "Toy journaling: Failed to open %s: %s. Skipping flush.\n",
+                JOURNAL_LOG_PATH, strerror(errno));
+	return false;
+    }
+}
+
+void journal_init(void)
+{
+    fprintf(stderr, "Toy journaling: journal_init() called\n");
+}
+
+void journal_shutdown(void)
+{
+    fprintf(stderr, "Toy journaling: journal_shutdown() called\n");
+}
+
+struct tx_buffer {
+    char buf[2048];
+    size_t used;
+};
+
+
+static void tx_printf(struct tx_buffer *tx, const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+
+    size_t available = sizeof(tx->buf) - tx->used;
+    int written = vsnprintf(tx->buf + tx->used, available, fmt, ap);
+    if (written > 0 && (size_t)written < available) {
+        tx->used += written;
+    } else {
+        fprintf(stderr, "Toy journaling: tx_printf truncated output (wanted %d bytes, had %zu)\n",
+                written, available);
+        tx->used = sizeof(tx->buf) - 1;
+    }
+
+    va_end(ap);
+}
+
+static void
+tx_log_time_field(struct tx_buffer *tx, const char *label, time_t value)
+{
+    if (value > MIN_REASONABLE_TIME && value < MAX_REASONABLE_TIME)
+        tx_printf(tx, "%s: %ld\n", label, (long)value);
+    else
+        tx_printf(tx, "%s: [invalid or uninitialized: %ld]\n", label, (long)value);
+}
+
+void
+journal_log_metadata(void *node_ptr, const struct journal_entry_info *info)
+{
+    struct node *np = (struct node *) node_ptr;
+    struct tx_buffer tx = { .used = 0 };
+
+    if (!np) {
+        fprintf(stderr, "Toy journaling: Null node passed. Skipping.\n");
+        return;
+    }
+
+    const struct stat *st = &np->dn_stat;
+
+    const char *action = info && info->action ? info->action : "unknown";
+    const char *name = info && info->name ? info->name : "(unknown)";
+    ino_t parent_ino = info ? info->parent_ino : 0;
+
+    tx_printf(&tx, "action: %s\n", action);
+    tx_printf(&tx, "name: %s\n", name);
+    tx_printf(&tx, "parent inode: %" PRIuMAX "\n", (uintmax_t)parent_ino);
+    tx_printf(&tx, "inode:        %" PRIuMAX "\n", (uintmax_t) st->st_ino);
+
+    if (st->st_mode == 0)
+        tx_printf(&tx, "mode:         (unset)\n");
+    else
+        tx_printf(&tx, "mode:         0%o\n", st->st_mode);
+
+    if ((ssize_t)st->st_size < 0)
+        tx_printf(&tx, "size:         (invalid: negative)\n");
+    else
+        tx_printf(&tx, "size:         %" PRIdMAX " bytes\n", (intmax_t) st->st_size);
+
+    if (st->st_nlink == 0) {
+        tx_printf(&tx, "nlink:        0 (file may have been unlinked, skipping rest)\n");
+	journal_log_tx(tx.buf);
+        return;
+    }
+
+    tx_printf(&tx, "nlink:        %" PRIuMAX "\n", (uintmax_t) st->st_nlink);
+    tx_printf(&tx, "blocks:       %" PRIuMAX "\n", (uintmax_t) st->st_blocks);
+
+    tx_log_time_field(&tx, "mtime", st->st_mtime);
+    tx_log_time_field(&tx, "ctime", st->st_ctime);
+
+    journal_log_tx(tx.buf);
+}
diff --git a/libdiskfs/journal.h b/libdiskfs/journal.h
new file mode 100644
index 00000000..8ae0a575
--- /dev/null
+++ b/libdiskfs/journal.h
@@ -0,0 +1,19 @@
+#ifndef JOURNAL_H
+#define JOURNAL_H
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+struct journal_entry_info {
+    const char *action;      // "sync", "create", "unlink", etc.
+    const char *name;        // filename if available
+    ino_t parent_ino;        // parent inode if known
+    // Future: uid, gid, device, flags, etc.
+};
+
+void journal_init(void);
+void journal_shutdown(void);
+bool flush_journal_to_file(void);
+void journal_log_metadata(void *node_ptr, const struct journal_entry_info *info);
+
+#endif /* JOURNAL_H */
-- 
2.40.1

Reply via email to