Thanks all, I'm attaching here the initial super early patch. This is very much experimental and incomplete — the current implementation does *not* yet capture all changes, enforce ordering guarantees, or support recovery — but I wanted to share it early to gather feedback.
It logs things like this into /tmp/journal.log === BEGIN TX 504 === [2025-06-27 07:35:05.116] action: sync name: (unknown) parent inode: 0 inode: 49142 mode: 010100666 size: 0 bytes nlink: 1 blocks: 8 mtime: 1751006105 ctime: 1751006105 === END TX 504 === There is a small header on journal.c that says what it does (and what it doesn't yet do). I tried to make it thread safe, and somewhat smart (linear buffer, flushes when full and so on), but i have only been on it for short periods of time. Thanks in advance Milos On Thu, Jun 26, 2025 at 4:52 PM Samuel Thibault <samuel.thiba...@gnu.org> wrote: > Hello, > > Milos Nikic, le mer. 25 juin 2025 14:36:04 -0700, a ecrit: > > As a learning and exploration effort, I've started working on a toy > journaling > > layer inside ext2fs. The goal is to understand how journaling might look > in a > > user-space filesystem like Hurd’s, and whether it's feasible to > implement a > > basic journaling mechanism incrementally. > > Being userspace probably doesn't really have impact on the > implementation of journaling. > > > So far, I’ve added a non-intrusive skeleton that: > > - Hooks into `diskfs_init_diskfs` (for init) and > `diskfs_sync_everything` (for > > flushing), > > You'll want to plug at a lower level than diskfs_sync_everything, to > catch all data and metadata writes and write to the log ahead of them. > > For real safety, we will need to introduce write barriers at the device > RPC layer. > > Samuel > > > - Buffers log entries in memory and flushes to `/tmp/journal.log`, > > - Outputs to the screen during boot if writing fails (e.g., due to early > boot > > or read-only FS), > > - Is wrapped in a minimal interface (`journal_log`, > `flush_journal_to_file`) > > with guards for safety. > > > > The goal is **not** a production journaling layer, but rather to build a > base > > to explore correctness, crash safety, and design directions. > > > > You can see it show up during boot with messages like: > > > > Toy journaling: journal_init() called > > Toy journaling: flushing journal to disk... > > > > I can also verify the presence of one of the init messages in > /tmp/journal.log > > > > Before proceeding further: > > - I'd appreciate any guidance on whether this is being plugged in the > right > > places. > > - Are there preferred conventions or hooks I should be using instead? > > - Would you be open to reviewing it as a small patch series while I > iterate, or > > should this stay on a branch until it's more mature? > > > > Thanks in advance for any input — and for the warm welcome so far! > > > > Best, > > Milos Nikic >
From d8987badd1ba8cb1b8e4b35126bd93fae4b634f7 Mon Sep 17 00:00:00 2001 From: Milos Nikic <nikic.mi...@gmail.com> Date: Wed, 25 Jun 2025 19:09:54 +0100 Subject: [PATCH] Initial skeleton journaling, bare bones (but it flushes to a file). --- ext2fs/inode.c | 3 + ext2fs/pager.c | 3 + libdiskfs/Makefile | 4 +- libdiskfs/init-init.c | 2 + libdiskfs/journal.c | 260 ++++++++++++++++++++++++++++++++++++++++++ libdiskfs/journal.h | 19 +++ 6 files changed, 289 insertions(+), 2 deletions(-) create mode 100644 libdiskfs/journal.c create mode 100644 libdiskfs/journal.h diff --git a/ext2fs/inode.c b/ext2fs/inode.c index dc309ac8..a3560630 100644 --- a/ext2fs/inode.c +++ b/ext2fs/inode.c @@ -28,6 +28,7 @@ #include <sys/statfs.h> #include <sys/statvfs.h> #include <sys/xattr.h> +#include <libdiskfs/journal.h> /* these flags aren't actually defined by a header file yet, so temporarily disable them if necessary. */ @@ -524,6 +525,8 @@ write_all_disknodes (void) void diskfs_write_disknode (struct node *np, int wait) { + + journal_log_metadata(np, &(struct journal_entry_info){ .action = "sync" }); struct ext2_inode *di = write_node (np); if (di) { diff --git a/ext2fs/pager.c b/ext2fs/pager.c index c55107a9..9174e3d5 100644 --- a/ext2fs/pager.c +++ b/ext2fs/pager.c @@ -25,6 +25,7 @@ #include <inttypes.h> #include <hurd/store.h> #include "ext2fs.h" +#include <libdiskfs/journal.h> /* XXX */ #include "../libpager/priv.h" @@ -1437,6 +1438,8 @@ diskfs_shutdown_pager (void) void diskfs_sync_everything (int wait) { + flush_journal_to_file(); + error_t sync_one (void *v_p) { struct pager *p = v_p; diff --git a/libdiskfs/Makefile b/libdiskfs/Makefile index aa6b24a4..9a025a92 100644 --- a/libdiskfs/Makefile +++ b/libdiskfs/Makefile @@ -32,7 +32,7 @@ IOSRCS= io-async-icky.c io-async.c io-duplicate.c io-get-conch.c io-revoke.c \ io-modes-on.c io-modes-set.c io-owner-mod.c io-owner-get.c \ io-pathconf.c io-prenotify.c io-read.c io-readable.c io-identity.c \ io-reauthenticate.c io-rel-conch.c io-restrict-auth.c io-seek.c \ - io-select.c io-stat.c io-stubs.c io-write.c io-version.c io-sigio.c + io-select.c io-stat.c io-stubs.c io-write.c io-version.c io-sigio.c journal.c FSYSSRCS=fsys-getroot.c fsys-goaway.c fsys-startup.c fsys-getfile.c \ fsys-options.c fsys-syncfs.c fsys-forward.c \ fsys-get-children.c fsys-get-source.c @@ -54,7 +54,7 @@ OTHERSRCS = conch-fetch.c conch-set.c dir-clear.c dir-init.c dir-renamed.c \ validate-mode.c validate-group.c validate-author.c validate-flags.c \ validate-rdev.c validate-owner.c priv.c get-source.c SRCS = $(OTHERSRCS) $(FSSRCS) $(IOSRCS) $(FSYSSRCS) $(IFSOCKSRCS) -installhdrs = diskfs.h diskfs-pager.h +installhdrs = diskfs.h diskfs-pager.h journal.h MIGSTUBS = fsServer.o ioServer.o fsysServer.o exec_startupServer.o \ fsys_replyUser.o fs_notifyUser.o ifsockServer.o \ diff --git a/libdiskfs/init-init.c b/libdiskfs/init-init.c index f9b12f6f..8bc5914a 100644 --- a/libdiskfs/init-init.c +++ b/libdiskfs/init-init.c @@ -24,6 +24,7 @@ #include <hurd/fsys.h> #include <stdio.h> #include <maptime.h> +#include <libdiskfs/journal.h> /* For safe inlining of diskfs_node_disknode and diskfs_disknode_node. */ @@ -98,6 +99,7 @@ diskfs_init_diskfs (void) _hurd_port_init (&_diskfs_exec_portcell, MACH_PORT_NULL); + journal_init(); return 0; } diff --git a/libdiskfs/journal.c b/libdiskfs/journal.c new file mode 100644 index 00000000..4c8681dc --- /dev/null +++ b/libdiskfs/journal.c @@ -0,0 +1,260 @@ +/* + * journal.c - Experimental journaling layer for Hurd's ext2fs/libdiskfs + * + * This is a work-in-progress implementation of a toy journaling layer + * intended for exploration and learning purposes. It logs basic metadata + * about file changes into a shared in-memory buffer, which is periodically + * flushed to a file (/tmp/journal.log). + * + * Features: + * - Logs inode metadata (mode, size, nlink, mtime, ctime, etc.) + * - Each log entry is wrapped in a transaction with a unique ID and timestamp + * - Uses a fixed-size in-memory buffer with auto-flushing on overflow + * - Timestamp includes millisecond precision + * - Thread-safe using a mutex + * + * Missing / Not Implemented Yet: + * - Write barriers or guarantees of ordering with actual FS operations + * - Integration at a lower level to capture all metadata changes (not just sync hooks) + * - Actual recovery mechanisms or replays from the journal + * - Logging of inode or block bitmap changes + * - File name resolution (only available if passed manually) + * - UID/GID or finer-grained permission changes + * - Disk-backed circular journal buffer for continuous logging + * - Atomicity guarantees across flush boundaries (currently only soft protection) + * + * Warning: + * This code is experimental and not suitable for production. + * It is designed to support incremental development and learning. + * + * Author: Milos Nikic, 2025 + */ +#include <stdio.h> +#include <inttypes.h> +#include <time.h> +#include <string.h> +#include <errno.h> +#include <sys/stat.h> +#include <libdiskfs/journal.h> +#include <diskfs.h> +#include <sys/types.h> +#include <stdbool.h> +#include <stdint.h> +#include <sys/time.h> +#include <pthread.h> + +#define JOURNAL_DIR_PATH "/tmp" +#define JOURNAL_LOG_PATH JOURNAL_DIR_PATH "/journal.log" +#define JOURNAL_BUF_SIZE (64 * 1024) +#define MAX_REASONABLE_TIME 4102444800 /* Jan 1, 2100 */ +#define MIN_REASONABLE_TIME 946684800 /* Jan 1, 2000 */ + +static pthread_mutex_t journal_lock = PTHREAD_MUTEX_INITIALIZER; +static char journal_buf[JOURNAL_BUF_SIZE]; +static size_t journal_buf_used = 0; +static uint64_t journal_tx_id = 1; + +static void get_current_time_string(char *buf, size_t bufsize) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + + struct tm tm; + localtime_r(&tv.tv_sec, &tm); + + snprintf(buf, bufsize, "%04d-%02d-%02d %02d:%02d:%02d.%03ld", + tm.tm_year + 1900, + tm.tm_mon + 1, + tm.tm_mday, + tm.tm_hour, + tm.tm_min, + tm.tm_sec, + (long)tv.tv_usec / 1000); // convert microseconds to milliseconds +} + +static inline bool try_add_to_buffer(const char *msg, size_t msg_len) +{ + size_t total_len = msg_len + 1; // +1 for newline + + if (journal_buf_used + total_len < JOURNAL_BUF_SIZE) { + memcpy(&journal_buf[journal_buf_used], msg, msg_len); + journal_buf_used += msg_len; + journal_buf[journal_buf_used++] = '\n'; + return true; + } + return false; +} + +static void journal_log_tx(const char *body) +{ + char time_str[128]; + get_current_time_string(time_str, sizeof(time_str)); + + // Prepare transaction header/footer strings here to know length before locking + char header[128]; + char footer[64]; + uint64_t tx_id; + + // We need body length for buffer size checks + size_t body_len = strlen(body); + size_t header_len, footer_len, total_len; + + // Lock scope to get tx_id and prepare header/footer lengths + pthread_mutex_lock(&journal_lock); + tx_id = journal_tx_id++; + + header_len = snprintf(header, sizeof(header), "=== BEGIN TX %" PRIu64 " === [%s]", tx_id, time_str); + footer_len = snprintf(footer, sizeof(footer), "=== END TX %" PRIu64 " ===", tx_id); + + total_len = header_len + 1 + body_len + 1 + footer_len + 1; // +1 for each newline + + // If transaction bigger than buffer, drop it + if (total_len >= JOURNAL_BUF_SIZE) { + pthread_mutex_unlock(&journal_lock); + fprintf(stderr, "Toy journaling: transaction too large, dropping (%zu bytes)\n", total_len); + return; + } + + // If not enough space, unlock, flush, then re-lock and re-check + if (journal_buf_used + total_len >= JOURNAL_BUF_SIZE) { + pthread_mutex_unlock(&journal_lock); + if (!flush_journal_to_file()) { + fprintf(stderr, "Toy journaling: flush failed, dropping transaction\n"); + return; + } + pthread_mutex_lock(&journal_lock); + // Re-check space after flush, someone else could have logged + if (journal_buf_used + total_len >= JOURNAL_BUF_SIZE) { + pthread_mutex_unlock(&journal_lock); + fprintf(stderr, "Toy journaling: still no space after flush, dropping transaction\n"); + return; + } + } + + try_add_to_buffer(header, header_len); + try_add_to_buffer(body, body_len); + try_add_to_buffer(footer, footer_len); + + pthread_mutex_unlock(&journal_lock); +} + +bool flush_journal_to_file(void) +{ + if (journal_buf_used == 0) { + fprintf(stderr, "Toy journaling: Nothing to flush. Skipping.\n"); + return false; + } + struct stat st; + if (stat(JOURNAL_DIR_PATH, &st) != 0 || !S_ISDIR(st.st_mode)) { + fprintf(stderr, "Toy journaling: %s not accessible or not a directory. Skipping flush.\n", JOURNAL_DIR_PATH); + return false; + } + FILE *f = fopen(JOURNAL_LOG_PATH, "a"); + if (f) { + fprintf(stderr, "Toy journaling: Writing to %zu chars to %s file.\n", journal_buf_used, JOURNAL_LOG_PATH); + size_t written = fwrite(journal_buf, 1, journal_buf_used, f); + bool success = written == journal_buf_used; + if (!success) { + fprintf(stderr, "Toy journaling: fwrite to %s failed: %s\n", JOURNAL_LOG_PATH, strerror(errno)); + } + if (fclose(f) != 0) { + fprintf(stderr, "Toy journaling: fclose failed: %s\n", strerror(errno)); + } + journal_buf_used = 0; + return success; + } else { + fprintf(stderr, "Toy journaling: Failed to open %s: %s. Skipping flush.\n", + JOURNAL_LOG_PATH, strerror(errno)); + return false; + } +} + +void journal_init(void) +{ + fprintf(stderr, "Toy journaling: journal_init() called\n"); +} + +void journal_shutdown(void) +{ + fprintf(stderr, "Toy journaling: journal_shutdown() called\n"); +} + +struct tx_buffer { + char buf[2048]; + size_t used; +}; + + +static void tx_printf(struct tx_buffer *tx, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + + size_t available = sizeof(tx->buf) - tx->used; + int written = vsnprintf(tx->buf + tx->used, available, fmt, ap); + if (written > 0 && (size_t)written < available) { + tx->used += written; + } else { + fprintf(stderr, "Toy journaling: tx_printf truncated output (wanted %d bytes, had %zu)\n", + written, available); + tx->used = sizeof(tx->buf) - 1; + } + + va_end(ap); +} + +static void +tx_log_time_field(struct tx_buffer *tx, const char *label, time_t value) +{ + if (value > MIN_REASONABLE_TIME && value < MAX_REASONABLE_TIME) + tx_printf(tx, "%s: %ld\n", label, (long)value); + else + tx_printf(tx, "%s: [invalid or uninitialized: %ld]\n", label, (long)value); +} + +void +journal_log_metadata(void *node_ptr, const struct journal_entry_info *info) +{ + struct node *np = (struct node *) node_ptr; + struct tx_buffer tx = { .used = 0 }; + + if (!np) { + fprintf(stderr, "Toy journaling: Null node passed. Skipping.\n"); + return; + } + + const struct stat *st = &np->dn_stat; + + const char *action = info && info->action ? info->action : "unknown"; + const char *name = info && info->name ? info->name : "(unknown)"; + ino_t parent_ino = info ? info->parent_ino : 0; + + tx_printf(&tx, "action: %s\n", action); + tx_printf(&tx, "name: %s\n", name); + tx_printf(&tx, "parent inode: %" PRIuMAX "\n", (uintmax_t)parent_ino); + tx_printf(&tx, "inode: %" PRIuMAX "\n", (uintmax_t) st->st_ino); + + if (st->st_mode == 0) + tx_printf(&tx, "mode: (unset)\n"); + else + tx_printf(&tx, "mode: 0%o\n", st->st_mode); + + if ((ssize_t)st->st_size < 0) + tx_printf(&tx, "size: (invalid: negative)\n"); + else + tx_printf(&tx, "size: %" PRIdMAX " bytes\n", (intmax_t) st->st_size); + + if (st->st_nlink == 0) { + tx_printf(&tx, "nlink: 0 (file may have been unlinked, skipping rest)\n"); + journal_log_tx(tx.buf); + return; + } + + tx_printf(&tx, "nlink: %" PRIuMAX "\n", (uintmax_t) st->st_nlink); + tx_printf(&tx, "blocks: %" PRIuMAX "\n", (uintmax_t) st->st_blocks); + + tx_log_time_field(&tx, "mtime", st->st_mtime); + tx_log_time_field(&tx, "ctime", st->st_ctime); + + journal_log_tx(tx.buf); +} diff --git a/libdiskfs/journal.h b/libdiskfs/journal.h new file mode 100644 index 00000000..8ae0a575 --- /dev/null +++ b/libdiskfs/journal.h @@ -0,0 +1,19 @@ +#ifndef JOURNAL_H +#define JOURNAL_H + +#include <stdbool.h> +#include <sys/types.h> + +struct journal_entry_info { + const char *action; // "sync", "create", "unlink", etc. + const char *name; // filename if available + ino_t parent_ino; // parent inode if known + // Future: uid, gid, device, flags, etc. +}; + +void journal_init(void); +void journal_shutdown(void); +bool flush_journal_to_file(void); +void journal_log_metadata(void *node_ptr, const struct journal_entry_info *info); + +#endif /* JOURNAL_H */ -- 2.40.1