Module Name: othersrc
Committed By: dholland
Date: Sat May 25 19:37:32 UTC 2013
Added Files:
othersrc/external/bsd/bikeshed/dist/src: journal.c journal.h
Log Message:
Some code for a journal container/log manager.
This is untested draft stuff and will need more attention later,
To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 othersrc/external/bsd/bikeshed/dist/src/journal.c \
othersrc/external/bsd/bikeshed/dist/src/journal.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Added files:
Index: othersrc/external/bsd/bikeshed/dist/src/journal.c
diff -u /dev/null othersrc/external/bsd/bikeshed/dist/src/journal.c:1.1
--- /dev/null Sat May 25 19:37:32 2013
+++ othersrc/external/bsd/bikeshed/dist/src/journal.c Sat May 25 19:37:32 2013
@@ -0,0 +1,928 @@
+/*-
+ * Copyright (c) 2013 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by David A. Holland.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <errno.h>
+#include <err.h>
+
+#include "journal.h"
+
+/*
+ * Ways in which this code may be inadequate:
+ *
+ * - For some recovery schemes you need to be able to add records
+ * while running recovery. This requires a read/write mode, and in
+ * that case the read/write distinction this code has is probably
+ * useless.
+ *
+ * - In that case we'll also need a better scheme for discovering
+ * the next lsn to use when opening an existing journal file.
+ *
+ * - It might be more sensible to forget about using a count for
+ * the lsn and just use the file offset.
+ *
+ * - If recovery performance matters it may make sense to read in
+ * large buffered chunks rather than one record at a time.
+ *
+ * - Part of the point of having this code is that you can crash
+ * out arbitrarily without losing data. Probably we don't need to
+ * return errors but can just crash out from in here as well. This
+ * requires further thought about the overall error model.
+ *
+ * - There needs to be catastrophic recovery code for when the
+ * journal is corrupted.
+ */
+
+////////////////////////////////////////////////////////////
+// infrastructure
+
+/*
+ * XXX everything in this section should be expanded and moved out
+ * into support files.
+ */
+
+#define CTASSERT __CTASSERT
+
+#define domalloc(sz) malloc(sz)
+#define dorealloc(op, osz, nsz) realloc(op, nsz)
+#define dofree(p, sz) free(p)
+#define dostrdup(s) strdup(s)
+#define dostrfree(s) free(s)
+#define getuuid() (0xdeaddeaddeaddeadULL)
+#define whine warnx
+#define htonll htobe64
+#define ntohll be64toh
+
+////////////////////////////////////////////////////////////
+// constants
+
+#define BIKESHED_JOURNAL_STRING "bikeshed journal v0"
+#define BIKESHED_JOURNAL_VERSION 0 /* minor version */
+
+#define JOURNAL_RECORD_MAGIC 0xad074ef9
+
+#define JOURNAL_WRITE_BUFFER_SIZE 16384
+
+#define FIRST_LSN ((journal_lsn_t)1)
+
+#define RECORDLEN_MAX 65535
+#define RECORDTYPE_MAX 65535
+
+////////////////////////////////////////////////////////////
+// data types
+
+/*
+ * Content-independent header for a journal file.
+ * (This is an on-disk structure and should be handled accordingly.)
+ */
+struct journal_fileheader {
+ char jfh_magic[20]; /* 0-19 */
+ uint32_t jfh_minorversion; /* 20-23 */
+ char jfh_clientname[20]; /* 24-43 */
+ uint32_t jfh_clientversion; /* 44-47 */
+ uint64_t jfh_fileuuid; /* 48-55 */
+ uint64_t jfh_pad1; /* 56-63 */
+ char jfh_pad2[64]; /* 64-127 */
+}; /* total size 128 */
+#define JOURNAL_FILEHEADER_SIZE 128
+
+/*
+ * Content-independent header for a journal entry.
+ * (This is an on-disk structure and should be handled accordingly.)
+ */
+struct journal_recordhead {
+ uint32_t jrh_magic; /* 0-3 */
+ uint16_t jrh_type; /* 4-5 */
+ uint16_t jrh_length; /* 6-7 */
+ uint64_t jrh_fileuuid; /* 8-15 */
+ uint64_t jrh_lsn; /* 16-23 */
+};
+#define JOURNAL_RECORDHEAD_SIZE 24
+
+struct journal_recordtail {
+ uint32_t jrt_magic; /* 0-3 */
+ uint16_t jrt_type; /* 4-5 */
+ uint16_t jrt_length; /* 6-7 */
+ uint64_t jrt_lsn; /* 8-15 */
+};
+#define JOURNAL_RECORDTAIL_SIZE 16
+
+/*
+ * In-memory state of a journal.
+ */
+struct journal {
+ /* client's settings */
+ char *j_clientname;
+ unsigned j_clientversion;
+
+ /* attached file, if any */
+ char *j_filename;
+ uint64_t j_fileuuid;
+ int j_fd;
+
+ /* operating mode */
+ enum journal_modes { J_NON,J_READ, J_WRITE } j_mode;
+
+ /* reading */
+ off_t j_filepos;
+ off_t j_endpos;
+ struct journal_recordhead j_jrh;
+
+ /* writing */
+ journal_lsn_t j_nextlsn;
+ journal_lsn_t j_writtenlsn;
+
+ /* buffer */
+ void *j_buf;
+ size_t j_bufpos;
+ size_t j_bufmax;
+};
+
+////////////////////////////////////////////////////////////
+// Constructor and destructor
+
+struct journal *
+journal_create(void)
+{
+ struct journal *j;
+
+ CTASSERT(sizeof(struct journal_fileheader) == JOURNAL_FILEHEADER_SIZE);
+ CTASSERT(sizeof(struct journal_recordhead) == JOURNAL_RECORDHEAD_SIZE);
+ CTASSERT(sizeof(struct journal_recordtail) == JOURNAL_RECORDTAIL_SIZE);
+
+ j = domalloc(sizeof(*j));
+
+ j->j_clientname = NULL;
+ j->j_clientversion = 0;
+
+ j->j_filename = NULL;
+ j->j_fileuuid = 0;
+ j->j_fd = -1;
+
+ j->j_mode = J_NON;
+
+ j->j_filepos = -1;
+ j->j_endpos = -1;
+ memset(&j->j_jrh, 0, sizeof(j->j_jrh));
+
+ j->j_nextlsn = JOURNAL_LSN_INVALID;
+ j->j_writtenlsn = JOURNAL_LSN_INVALID;
+
+ j->j_buf = NULL;
+ j->j_bufpos = 0;
+ j->j_bufmax = 0;
+
+ return j;
+}
+
+void
+journal_destroy(struct journal *j)
+{
+ /* you are supposed to close first */
+ assert(j->j_fd == -1);
+
+ dofree(j->j_buf, j->j_bufmax);
+ dostrfree(j->j_filename);
+ dostrfree(j->j_clientname);
+ dofree(j, sizeof(*j));
+}
+
+////////////////////////////////////////////////////////////
+// low-level I/O
+
+static int
+journal_read(struct journal *j, void *data, size_t len)
+{
+ ssize_t ret;
+
+ ret = read(j->j_fd, data, len);
+ if (ret == -1) {
+ return -1;
+ }
+ if ((size_t)ret != len) {
+ /* unexpected EOF */
+ errno = EFTYPE;
+ return -1;
+ }
+ return 0;
+}
+
+static int
+journal_write(struct journal *j, const void *data, size_t len)
+{
+ ssize_t ret;
+
+ ret = write(j->j_fd, data, len);
+ if (ret == -1) {
+ return -1;
+ }
+ if ((size_t)ret != len) {
+ /* ? */
+ errno = EFBIG;
+ return -1;
+ }
+ return 0;
+}
+
+////////////////////////////////////////////////////////////
+// Attaching a journal file
+
+static int
+journal_setnames(struct journal *j,
+ const char *clientname, const char *filename)
+{
+ assert(j->j_filename == NULL);
+ assert(j->j_clientname == NULL);
+
+ /*
+ * XXX: when we get that infrastructure, we should probably
+ * have the passed-in filename always be relative to the
+ * ctldir, and use the ctldir module to get a full pathname.
+ */
+ j->j_filename = dostrdup(filename);
+ j->j_clientname = dostrdup(clientname);
+
+ return 0;
+}
+
+static int
+journal_open(struct journal *j, int openflags)
+{
+ assert(j->j_fd == -1);
+
+ j->j_fd = open(j->j_filename, openflags, 0644);
+ if (j->j_fd < 0) {
+ return -1;
+ }
+ return 0;
+}
+
+static int
+journal_writenewheader(struct journal *j, unsigned clientversion)
+{
+ struct journal_fileheader jfh;
+
+ memset(&jfh, 0, sizeof(jfh));
+ strcpy(jfh.jfh_magic, BIKESHED_JOURNAL_STRING);
+ jfh.jfh_minorversion = htonl(BIKESHED_JOURNAL_VERSION);
+ assert(strlen(j->j_clientname) < sizeof(jfh.jfh_clientname));
+ strcpy(jfh.jfh_clientname, j->j_clientname);
+ jfh.jfh_clientversion = htonl(clientversion);
+ /* don't bother htonll()'ing this */
+ jfh.jfh_fileuuid = getuuid();
+
+ if (journal_write(j, &jfh, sizeof(jfh))) {
+ return -1;
+ }
+ if (fsync(j->j_fd) < 0) {
+ return -1;
+ }
+
+ j->j_clientversion = clientversion;
+ return 0;
+}
+
+static int
+journal_readoldheader(struct journal *j,
+ unsigned clientversion_min, unsigned clientversion_max)
+{
+ struct journal_fileheader jfh;
+ unsigned jversion, cversion;
+
+ if (journal_read(j, &jfh, sizeof(jfh))) {
+ return -1;
+ }
+ if (!memcmp(jfh.jfh_magic, BIKESHED_JOURNAL_STRING,
+ sizeof(jfh.jfh_magic))) {
+ whine("%s: wrong magic number / not a journal file",
+ j->j_filename);
+ errno = EFTYPE;
+ return -1;
+ }
+
+ jversion = ntohl(jfh.jfh_minorversion);
+ if (jversion != BIKESHED_JOURNAL_VERSION) {
+ whine("%s: wrong journal container version %u (expected %u)",
+ j->j_filename, jversion, BIKESHED_JOURNAL_VERSION);
+ errno = EFTYPE;
+ return -1;
+ }
+
+ if (jfh.jfh_clientname[sizeof(jfh.jfh_clientname) - 1] != '\0') {
+ whine("%s: garbled journal client name", j->j_filename);
+ errno = EFTYPE;
+ return -1;
+ }
+ if (strcmp(jfh.jfh_clientname, j->j_clientname)) {
+ whine("%s: wrong journal client name %s (expected %s)",
+ j->j_filename, jfh.jfh_clientname, j->j_clientname);
+ errno = EFTYPE;
+ return -1;
+ }
+
+ cversion = ntohl(jfh.jfh_clientversion);
+ if (cversion < clientversion_min) {
+ whine("%s: journal client version %u too old "
+ "(need at least %u)", j->j_filename, cversion,
+ clientversion_min);
+ errno = EFTYPE;
+ return -1;
+ }
+ if (cversion > clientversion_min) {
+ whine("%s: journal client version %u too new "
+ "(maximum accepted %u)", j->j_filename, cversion,
+ clientversion_max);
+ errno = EFTYPE;
+ return -1;
+ }
+
+ j->j_clientversion = cversion;
+ /* don't bother ntohll()'ing this */
+ j->j_fileuuid = jfh.jfh_fileuuid;
+ return 0;
+}
+
+static
+int
+journal_findlsn(struct journal *j)
+{
+ struct journal_recordtail jrt;
+ off_t pos;
+
+ pos = lseek(j->j_fd, 0, SEEK_END);
+ if (pos < 0) {
+ /* ? */
+ return -1;
+ }
+ if (pos == (off_t)sizeof(struct journal_fileheader)) {
+ /* journal is empty */
+ j->j_nextlsn = FIRST_LSN;
+ return 0;
+ }
+ if (lseek(j->j_fd, -sizeof(struct journal_recordtail), SEEK_CUR)) {
+ return -1;
+ }
+ if (journal_read(j, &jrt, sizeof(jrt))) {
+ return -1;
+ }
+ if (ntohl(jrt.jrt_magic) != JOURNAL_RECORD_MAGIC) {
+ /*
+ * By the time we get here, this journal's supposed to
+ * have been recovered, so we don't need to search for
+ * a valid record. If this assumption changes, then
+ * this code should probably be changed to scan the
+ * whole journal forwards.
+ */
+ whine("%s: last record has bad magic number in tail",
+ j->j_filename);
+ errno = EFTYPE;
+ return -1;
+ }
+ j->j_nextlsn = ntohll(jrt.jrt_lsn) + 1;
+ return 0;
+}
+
+static void
+journal_allocwritebuf(struct journal *j)
+{
+ assert(j->j_buf == NULL);
+ assert(j->j_bufpos == 0);
+ assert(j->j_bufmax == 0);
+
+ j->j_bufmax = JOURNAL_WRITE_BUFFER_SIZE;
+ j->j_buf = domalloc(j->j_bufmax);
+}
+
+int
+journal_attach_new(struct journal *j, const char *clientname,
+ unsigned clientversion,
+ const char *filename)
+{
+ int serrno;
+
+ if (journal_setnames(j, clientname, filename)) {
+ return -1;
+ }
+ if (journal_open(j, O_WRONLY|O_CREAT|O_EXCL)) {
+ return -1;
+ }
+
+ j->j_mode = J_WRITE;
+
+ if (journal_writenewheader(j, clientversion)) {
+ serrno = errno;
+ close(j->j_fd);
+ j->j_fd = -1;
+ errno = serrno;
+ return -1;
+ }
+
+ j->j_nextlsn = FIRST_LSN;
+ journal_allocwritebuf(j);
+ return 0;
+}
+
+int
+journal_attach_old_write(struct journal *j, const char *clientname,
+ unsigned clientversion_min,
+ unsigned clientversion_max,
+ const char *filename)
+{
+ int serrno;
+
+ if (journal_setnames(j, clientname, filename)) {
+ return -1;
+ }
+ if (journal_open(j, O_RDWR|O_APPEND)) {
+ return -1;
+ }
+
+ j->j_mode = J_WRITE;
+
+ if (journal_readoldheader(j, clientversion_min, clientversion_max)) {
+ serrno = errno;
+ close(j->j_fd);
+ j->j_fd = -1;
+ errno = serrno;
+ return -1;
+ }
+
+ if (journal_findlsn(j)) {
+ serrno = errno;
+ close(j->j_fd);
+ j->j_fd = -1;
+ errno = serrno;
+ return -1;
+ }
+
+ journal_allocwritebuf(j);
+ return 0;
+}
+
+int
+journal_attach_old_read(struct journal *j, const char *clientname,
+ unsigned clientversion_min, unsigned clientversion_max,
+ const char *filename)
+{
+ int serrno;
+
+ if (journal_setnames(j, clientname, filename)) {
+ return -1;
+ }
+ if (journal_open(j, O_RDONLY|O_APPEND)) {
+ return -1;
+ }
+
+ j->j_mode = J_READ;
+
+ if (journal_readoldheader(j, clientversion_min, clientversion_max)) {
+ serrno = errno;
+ close(j->j_fd);
+ j->j_fd = -1;
+ errno = serrno;
+ return -1;
+ }
+
+ j->j_filepos = lseek(j->j_fd, 0, SEEK_CUR);
+ j->j_endpos = lseek(j->j_fd, 0, SEEK_END);
+ lseek(j->j_fd, j->j_filepos, SEEK_SET);
+
+ j->j_bufmax = 128;
+ j->j_buf = domalloc(j->j_bufmax);
+ return 0;
+}
+
+////////////////////////////////////////////////////////////
+// Basic inquiries
+
+const char *
+journal_getclientname(struct journal *j)
+{
+ return j->j_clientname;
+}
+
+unsigned
+journal_getclientversion(struct journal *j)
+{
+ return j->j_clientversion;
+}
+
+////////////////////////////////////////////////////////////
+// Writing new records
+
+static int
+journal_flush(struct journal *j)
+{
+ assert(j->j_mode == J_WRITE);
+
+ if (j->j_bufpos == 0) {
+ /* nothing to do */
+ return 0;
+ }
+
+ if (journal_write(j, j->j_buf, j->j_bufpos)) {
+ return -1;
+ }
+ j->j_bufpos = 0;
+
+ /*
+ * If we came here via journal_writebuf_append, nextlsn is the
+ * lsn we're currently writing, so after flush the buffer
+ * we've written out up to the previous log record.
+ *
+ * If we came here some other way, and aren't in the middle of
+ * writing a record, nextlsn is the next record and nextlsn - 1
+ * is fully in the buffer we just wrote out.
+ */
+ j->j_writtenlsn = j->j_nextlsn - 1;
+
+ return 0;
+}
+
+static int
+journal_writebuf_append(struct journal *j, const void *data, size_t len)
+{
+ size_t remaining, amount;
+
+ assert(j->j_mode == J_WRITE);
+
+ remaining = len;
+ while (remaining > 0) {
+ amount = j->j_bufmax - j->j_bufpos;
+ if (amount > remaining) {
+ amount = remaining;
+ }
+ memcpy((char *)j->j_buf + j->j_bufpos, data, amount);
+ j->j_bufpos += amount;
+ remaining -= amount;
+ assert(j->j_bufpos <= j->j_bufmax);
+ if (j->j_bufpos == j->j_bufmax) {
+ if (journal_flush(j)) {
+ return -1;
+ }
+ }
+ }
+ return 0;
+}
+
+int
+journal_addrecord(struct journal *j, unsigned type,
+ const void *data, size_t len, journal_lsn_t *lsn_ret)
+{
+ struct journal_recordhead jrh;
+ struct journal_recordtail jrt;
+
+ assert(j->j_mode == J_WRITE);
+
+ assert(type <= RECORDTYPE_MAX);
+ assert(len <= RECORDLEN_MAX);
+
+ /* paranoia */
+ memset(&jrh, 0, sizeof(jrh));
+ memset(&jrt, 0, sizeof(jrt));
+
+ jrh.jrh_magic = htonl(JOURNAL_RECORD_MAGIC);
+ jrh.jrh_type = htons(type);
+ jrh.jrh_length = htons(len);
+ jrh.jrh_fileuuid = j->j_fileuuid; /* this doesn't get htonll'd */
+ jrh.jrh_lsn = j->j_nextlsn;
+
+ jrt.jrt_magic = jrh.jrh_magic;
+ jrt.jrt_type = jrh.jrh_type;
+ jrt.jrt_length = jrh.jrh_length;
+ jrt.jrt_lsn = jrh.jrh_lsn;
+
+ if (journal_writebuf_append(j, &jrh, sizeof(jrh))) {
+ return -1;
+ }
+ if (journal_writebuf_append(j, data, len)) {
+ return -1;
+ }
+ if (journal_writebuf_append(j, &jrt, sizeof(jrt))) {
+ return -1;
+ }
+
+ /* must not change nextlsn until the writes are done */
+ *lsn_ret = j->j_nextlsn++;
+
+ /* if the record perfectly fit into the buffer, note that it's done */
+ if (j->j_bufpos == 0) {
+ j->j_writtenlsn++;
+ assert(j->j_writtenlsn == *lsn_ret);
+ }
+
+ return 0;
+}
+
+int
+journal_sync(struct journal *j)
+{
+ assert(j->j_mode == J_WRITE);
+
+ if (journal_flush(j)) {
+ return -1;
+ }
+ if (fsync(j->j_fd)) {
+ return -1;
+ }
+ return 0;
+}
+
+int
+journal_syncto(struct journal *j, journal_lsn_t lsn)
+{
+ assert(j->j_mode == J_WRITE);
+
+ /* must not request an lsn that doesn't exist */
+ assert(lsn < j->j_nextlsn);
+
+ if (lsn > j->j_writtenlsn) {
+ if (journal_flush(j)) {
+ return -1;
+ }
+ }
+ /* the requested lsn has now been sent to the kernel */
+ assert(lsn < j->j_writtenlsn);
+
+ /* FUTURE: could fdatasync, or fsync_range only the range we need */
+ if (fsync(j->j_fd)) {
+ return -1;
+ }
+ return 0;
+}
+
+////////////////////////////////////////////////////////////
+// Reading records
+
+/*
+ * Return the current position.
+ */
+off_t
+journal_tell(struct journal *j)
+{
+ assert(j->j_mode == J_READ);
+
+ return j->j_filepos;
+}
+
+/*
+ * Seek to a specified position in the journal.
+ * If the position is not a position previously returned by
+ * journal_tell, the behavior is undefined.
+ */
+int
+journal_seek(struct journal *j, off_t pos)
+{
+ assert(j->j_mode == J_READ);
+
+ j->j_filepos = pos;
+ return lseek(j->j_fd, j->j_filepos, SEEK_SET);
+}
+
+/*
+ * Rewind to the beginning of the journal.
+ */
+int
+journal_rewind(struct journal *j)
+{
+ assert(j->j_mode == J_READ);
+
+ return journal_seek(j, sizeof(struct journal_fileheader));
+}
+
+/*
+ * Return nonzer if at the end of the journal.
+ */
+int
+journal_atend(struct journal *j)
+{
+ assert(j->j_mode == J_READ);
+
+ return j->j_filepos == j->j_endpos;
+}
+
+/*
+ * Read in the new current record.
+ */
+static int
+journal_readrecord(struct journal *j, off_t pos,
+ struct journal_recordtail *jrt)
+{
+ struct journal_recordtail jrt_storage;
+
+ assert(j->j_mode == J_READ);
+
+ if (lseek(j->j_fd, pos, SEEK_SET) < 0) {
+ return -1;
+ }
+ j->j_filepos = pos;
+
+ if (journal_read(j, &j->j_jrh, sizeof(j->j_jrh))) {
+ goto eof;
+ }
+ if (ntohl(j->j_jrh.jrh_magic) != JOURNAL_RECORD_MAGIC) {
+ whine("%s: Wrong magic number for record at %jd",
+ j->j_filename, (intmax_t) pos);
+ errno = EFTYPE;
+ return -1;
+ }
+ /* this is not ntohll'd */
+ if (j->j_jrh.jrh_fileuuid != j->j_fileuuid) {
+ whine("%s: Wrong file uuid for record at %jd",
+ j->j_filename, (intmax_t) pos);
+ errno = EFTYPE;
+ return -1;
+ }
+ /* XXX should probably check the lsn */
+
+ j->j_bufpos = ntohs(j->j_jrh.jrh_length);
+ if (j->j_bufpos > RECORDLEN_MAX) {
+ whine("%s: Record too long at %jd",
+ j->j_filename, (intmax_t) pos);
+ errno = EFTYPE;
+ return -1;
+ }
+ if (j->j_bufpos > j->j_bufmax) {
+ j->j_buf = dorealloc(j->j_buf, j->j_bufmax, j->j_bufpos);
+ j->j_bufmax = j->j_bufpos;
+ }
+ if (journal_read(j, j->j_buf, j->j_bufpos)) {
+ goto eof;
+ }
+ if (jrt == NULL) {
+ jrt = &jrt_storage;
+ if (journal_read(j, jrt, sizeof(*jrt))) {
+ goto eof;
+ }
+ }
+ if (ntohl(jrt->jrt_magic) != JOURNAL_RECORD_MAGIC) {
+ whine("%s: Wrong magic number in tail of record at %jd",
+ j->j_filename, (intmax_t) pos);
+ errno = EFTYPE;
+ return -1;
+ }
+ if (jrt->jrt_type != j->j_jrh.jrh_type ||
+ jrt->jrt_length != j->j_jrh.jrh_length ||
+ jrt->jrt_lsn != j->j_jrh.jrh_lsn) {
+ whine("%s: Tail of record at %jd does not match head",
+ j->j_filename, (intmax_t) pos);
+ errno = EFTYPE;
+ return -1;
+ }
+
+ return 0;
+
+ eof:
+ /*
+ * XXX: If we hit EOF in here, we've got a partial record and
+ * we should just truncate and ignore it rather than failing.
+ */
+ whine("%s: Partial record/unexpected EOF at %jd",
+ j->j_filename, (intmax_t) pos);
+ errno = EFTYPE;
+ return -1;
+}
+
+/*
+ * Move to the next record.
+ */
+int
+journal_nextrecord(struct journal *j)
+{
+ off_t pos;
+
+ assert(j->j_mode == J_READ);
+
+ pos = j->j_filepos;
+ pos += sizeof(struct journal_recordhead);
+ pos += j->j_bufpos;
+ pos += sizeof(struct journal_recordtail);
+
+ return journal_readrecord(j, pos, NULL);
+}
+
+/*
+ * Move to the previous record.
+ */
+int
+journal_prevrecord(struct journal *j)
+{
+ struct journal_recordtail jrt;
+ off_t pos;
+
+ assert(j->j_mode == J_READ);
+
+ pos = j->j_filepos;
+ pos -= sizeof(struct journal_recordtail);
+ if (lseek(j->j_fd, pos, SEEK_SET) == -1) {
+ return -1;
+ }
+
+ if (journal_read(j, &jrt, sizeof(jrt))) {
+ return -1;
+ }
+
+ pos -= ntohs(jrt.jrt_length);
+ pos -= sizeof(struct journal_recordhead);
+
+ return journal_readrecord(j, pos, &jrt);
+}
+
+/*
+ * Return the LSN of the current record.
+ */
+journal_lsn_t
+journal_getrecordlsn(struct journal *j)
+{
+ assert(j->j_mode == J_READ);
+
+ return ntohll(j->j_jrh.jrh_lsn);
+}
+
+unsigned
+journal_getrecordtype(struct journal *j)
+{
+ assert(j->j_mode == J_READ);
+
+ return ntohs(j->j_jrh.jrh_type);
+}
+
+size_t
+journal_getrecordsize(struct journal *j)
+{
+ assert(j->j_mode == J_READ);
+
+ return j->j_bufpos;
+}
+
+size_t
+journal_getrecorddata(struct journal *j, void *buf, size_t bufsize)
+{
+ size_t amount;
+
+ assert(j->j_mode == J_READ);
+
+ amount = bufsize;
+ if (amount > j->j_bufpos) {
+ amount = j->j_bufpos;
+ }
+ memcpy(buf, j->j_buf, amount);
+ return amount;
+}
+
+////////////////////////////////////////////////////////////
+// close
+
+int
+journal_close(struct journal *j)
+{
+ assert(j->j_mode != J_NON);
+
+ if (j->j_mode == J_WRITE) {
+ assert(j->j_fd != -1);
+ if (j->j_bufpos > 0) {
+ if (journal_flush(j)) {
+ return -1;
+ }
+ }
+ if (fsync(j->j_fd)) {
+ return -1;
+ }
+ }
+ if (close(j->j_fd)) {
+ return -1;
+ }
+ j->j_fd = -1;
+ j->j_mode = J_NON;
+ return 0;
+}
Index: othersrc/external/bsd/bikeshed/dist/src/journal.h
diff -u /dev/null othersrc/external/bsd/bikeshed/dist/src/journal.h:1.1
--- /dev/null Sat May 25 19:37:32 2013
+++ othersrc/external/bsd/bikeshed/dist/src/journal.h Sat May 25 19:37:32 2013
@@ -0,0 +1,98 @@
+/*-
+ * Copyright (c) 2013 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by David A. Holland.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef JOURNAL_H
+#define JOURNAL_H
+
+/*
+ * Generic journal/log manager.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h> /* for off_t */
+
+struct journal; /* Opaque. */
+typedef uint64_t journal_lsn_t;
+
+#define JOURNAL_LSN_INVALID ((journal_lsn_t)0)
+
+/*
+ * Constructor and destructor
+ */
+struct journal *journal_create(void);
+void journal_destroy(struct journal *);
+
+/*
+ * Attaching a journal file
+ */
+int journal_attach_new(struct journal *, const char *clientname,
+ unsigned clientversion,
+ const char *filename);
+int journal_attach_old_write(struct journal *, const char *clientname,
+ unsigned clientversion_min, unsigned clientversion_max,
+ const char *filename);
+int journal_attach_old_read(struct journal *, const char *clientname,
+ unsigned clientversion_min, unsigned clientversion_max,
+ const char *filename);
+
+/*
+ * Basic inquiries
+ */
+const char *journal_getclientname(struct journal *);
+unsigned journal_getclientversion(struct journal *);
+
+/*
+ * Writing new records
+ */
+int journal_addrecord(struct journal *, unsigned type,
+ const void *data, size_t len, journal_lsn_t *lsn_ret);
+int journal_sync(struct journal *);
+int journal_syncto(struct journal *, journal_lsn_t lsn);
+
+/*
+ * Reading records
+ */
+int journal_rewind(struct journal *);
+int journal_atend(struct journal *);
+off_t journal_tell(struct journal *);
+int journal_seek(struct journal *, off_t pos);
+int journal_nextrecord(struct journal *);
+int journal_prevrecord(struct journal *j);
+journal_lsn_t journal_getrecordlsn(struct journal *);
+unsigned journal_getrecordtype(struct journal *);
+size_t journal_getrecordsize(struct journal *);
+size_t journal_getrecorddata(struct journal *, void *buf, size_t bufsize);
+
+/*
+ * Close the journal (before destroying the in-memory structures)
+ */
+int journal_close(struct journal *j);
+
+#endif /* JOURNAL_H */