> It would be interesting to isolate the IO effects from the thread
> switching to see where it all goes. I recall a lot of it goes to
> non-IO overhead. A really-synchronous of bs_rdwr follows. It may
> not build against the current tree; it's old.
Here's a patch that works with the current version of stgt.
commit 421d1ed5ede16dfd45d86921fcdaaac5db436e23
Author: Erez Zilber <[EMAIL PROTECTED]>
Date: Thu Mar 6 10:46:33 2008 +0200
bs-rdwr-sync
New file bs_rdwr_sync.c is similar in spirit to bs_rdwr.c but without all
the threading. For simpler debugging of core iscsi.
diff --git a/usr/Makefile b/usr/Makefile
index 13c77d2..bc489b5 100644
--- a/usr/Makefile
+++ b/usr/Makefile
@@ -11,7 +11,7 @@ CFLAGS += -DISCSI
TGTD_OBJS += $(addprefix iscsi/, conn.o param.o session.o \
iscsid.o target.o chap.o transport.o iscsi_tcp.o \
isns.o libcrc32c.o)
-TGTD_OBJS += bs_rdwr.o bs_aio.o
+TGTD_OBJS += bs_rdwr.o bs_rdwr_sync.o bs_aio.o
LIBS += -lcrypto
ifneq ($(ISCSI_RDMA),)
diff --git a/usr/bs_rdwr_sync.c b/usr/bs_rdwr_sync.c
new file mode 100644
index 0000000..daa2398
--- /dev/null
+++ b/usr/bs_rdwr_sync.c
@@ -0,0 +1,204 @@
+/*
+ * Synchronous I/O file backing store routine, based on an old bs_rdwr.
+ *
+ * Copyright (C) 2006-2007 FUJITA Tomonori <[EMAIL PROTECTED]>
+ * Copyright (C) 2006-2007 Mike Christie <[EMAIL PROTECTED]>
+ * Copyright (C) 2006-2007 Pete Wyckoff <[EMAIL PROTECTED]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+#define _XOPEN_SOURCE 500
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include <linux/fs.h>
+#include <sys/epoll.h>
+
+#include "list.h"
+#include "util.h"
+#include "tgtd.h"
+#include "scsi.h"
+
+static int bs_rdwr_sync_open(struct scsi_lu *lu, char *path, int *fd,
+ uint64_t *size)
+{
+ *fd = backed_file_open(path, O_RDWR | O_LARGEFILE, size);
+ if (*fd < 0)
+ return *fd;
+ return 0;
+}
+
+static void bs_rdwr_sync_close(struct scsi_lu *lu)
+{
+ close(lu->fd);
+}
+
+static void set_medium_error(int *result, uint8_t *key, uint16_t *asc)
+{
+ *result = SAM_STAT_CHECK_CONDITION;
+ *key = MEDIUM_ERROR;
+ *asc = ASC_READ_ERROR;
+}
+
+static void bs_sync_sync_range(struct scsi_cmd *cmd, uint32_t length,
+ int *result, uint8_t *key, uint16_t *asc)
+{
+ int ret;
+ unsigned int flags = SYNC_FILE_RANGE_WAIT_BEFORE| SYNC_FILE_RANGE_WRITE;
+
+ ret = __sync_file_range(cmd->dev->fd, cmd->offset, length, flags);
+ if (ret)
+ set_medium_error(result, key, asc);
+}
+
+static int bs_rdwr_sync_cmd_submit(struct scsi_cmd *cmd)
+{
+ int ret, fd = cmd->dev->fd;
+ uint32_t length;
+ int result = SAM_STAT_GOOD;
+ uint8_t key;
+ uint16_t asc;
+
+ ret = length = 0;
+ key = asc = 0;
+
+ switch (cmd->scb[0])
+ {
+ case SYNCHRONIZE_CACHE:
+ case SYNCHRONIZE_CACHE_16:
+ /* TODO */
+ length = (cmd->scb[0] == SYNCHRONIZE_CACHE) ? 0 : 0;
+
+ if (cmd->scb[1] & 0x2) {
+ result = SAM_STAT_CHECK_CONDITION;
+ key = ILLEGAL_REQUEST;
+ asc = ASC_INVALID_FIELD_IN_CDB;
+ } else
+ bs_sync_sync_range(cmd, length, &result, &key, &asc);
+ break;
+ case WRITE_6:
+ case WRITE_10:
+ case WRITE_12:
+ case WRITE_16:
+ length = scsi_get_out_length(cmd);
+ ret = pwrite64(fd, scsi_get_out_buffer(cmd), length,
+ cmd->offset);
+ if (ret == length) {
+ if ((cmd->scb[0] != WRITE_6) && (cmd->scb[1] & 0x8))
+ bs_sync_sync_range(cmd, length, &result, &key,
+ &asc);
+ } else
+ set_medium_error(&result, &key, &asc);
+
+ break;
+ case READ_6:
+ case READ_10:
+ case READ_12:
+ case READ_16:
+ length = scsi_get_in_length(cmd);
+ ret = pread64(fd, scsi_get_in_buffer(cmd), length,
+ cmd->offset);
+
+ if (ret != length)
+ set_medium_error(&result, &key, &asc);
+ break;
+ default:
+ break;
+ }
+
+ dprintf("io done %p %x %d %u\n", cmd, cmd->scb[0], ret, length);
+
+ scsi_set_result(cmd, result);
+
+ if (result != SAM_STAT_GOOD) {
+ eprintf("io error %p %x %d %d %" PRIu64 ", %m\n",
+ cmd, cmd->scb[0], ret, length, cmd->offset);
+ sense_data_build(cmd, key, asc);
+ }
+
+ return 0;
+}
+
+/*static int bs_rdwr_sync_cmd_submit(struct scsi_cmd *cmd)
+{
+ struct scsi_lu *lu = cmd->dev;
+ int ret = 0, fd = lu->fd;
+ uint32_t length = 0;
+
+ dprintf("rw %d len %u off %llu cdb %02x\n", cmd->rw, cmd->len,
+ (unsigned long long) cmd->offset, cmd->scb[0]);
+
+ switch (cmd->scb[0]) {
+ case SYNCHRONIZE_CACHE:
+ case SYNCHRONIZE_CACHE_16:
+ ret = fsync(fd);
+ break;
+ case WRITE_6:
+ case WRITE_10:
+ case WRITE_12:
+ case WRITE_16:
+ length = scsi_get_out_length(cmd);
+ ret = pwrite64(fd, scsi_get_out_buffer(cmd), length,
+ cmd->offset);
+ break;
+ case READ_6:
+ case READ_10:
+ case READ_12:
+ case READ_16:
+ length = scsi_get_in_length(cmd);
+ ret = pread64(fd, scsi_get_in_buffer(cmd), length, cmd->offset);
+ break;
+ default:
+ break;
+ }
+
+ if (ret == length) {
+ scsi_set_result(cmd, SAM_STAT_GOOD);
+ } else {
+ eprintf("io error %p %x %d %d %" PRIu64 ", %m\n",
+ cmd, cmd->scb[0], ret, length, cmd->offset);
+ scsi_set_result(cmd, SAM_STAT_CHECK_CONDITION);
+ sense_data_build(cmd, MEDIUM_ERROR, ASC_READ_ERROR);
+ }
+
+ return 0;
+}*/
+
+static int bs_rdwr_sync_cmd_done(struct scsi_cmd *cmd)
+{
+ return 0;
+}
+
+static struct backingstore_template rdwr_sync_bst = {
+ .bs_name = "rdwr_sync",
+ .bs_datasize = 0,
+ .bs_open = bs_rdwr_sync_open,
+ .bs_close = bs_rdwr_sync_close,
+ .bs_cmd_submit = bs_rdwr_sync_cmd_submit,
+ .bs_cmd_done = bs_rdwr_sync_cmd_done,
+};
+
+__attribute__((constructor)) static void bs_rdwr_sync_constructor(void)
+{
+ register_backingstore_template(&rdwr_sync_bst);
+}
diff --git a/usr/iscsi/iscsid.c b/usr/iscsi/iscsid.c
index 490a743..7ab4c8f 100644
--- a/usr/iscsi/iscsid.c
+++ b/usr/iscsi/iscsid.c
@@ -2153,7 +2153,7 @@ static struct tgt_driver iscsi = {
.show = iscsi_target_show,
.cmd_end_notify = iscsi_scsi_cmd_done,
.mgmt_end_notify = iscsi_tm_done,
- .default_bst = "rdwr",
+ .default_bst = "rdwr_sync",
};
__attribute__((constructor)) static void iscsi_driver_constructor(void)
Now, the performance is even lower (~460 MB/sec with rdwr_sync compared to ~670
MB/sec with rdwr). I've noticed that it takes a lot of time between
target_cmd_queue (time = 663673) & iscsi_task_tx_start (669209).
I don't understand something in the behavior of iscsi_task_tx_start (this may
be related to the long time mentioned above): when it is called, it handles
only the 1st task in conn->tx_clist. Why doesn't it try to handle all tasks on
the list? What happens is that after bs completes is work, it takes a lot of
time until iscsi_task_tx_start is called for that task. iscsi_task_tx_start
*is* called immediately, but it handles the 1st task only (so the current task
has to wait for this thread to wake up multiple times until it will be
handled). Can anyone explain this design?
Erez
_______________________________________________
Stgt-devel mailing list
[email protected]
https://lists.berlios.de/mailman/listinfo/stgt-devel