Add support to attach a multipath disk. We still allocate the gendisk per path, and this is required for the per-path submission. However, those gendisks are marked as hidden. Those disks are named sdX:Y, where X is the multipath disk index and Y is the per-path index.
A global list of sd_mpath_disks is kept for matching scsi_device's. The multipath gendisk has the name and disk->major/minor set to minic a scsi_disk. The following is an example of relevant scsi_disk and block sysfs directories: $ ls -l /sys/block/ | grep sdc lrwxrwxrwx 1 root root 0 Feb 24 16:01 sdc -> ../devices/virtual/scsi_mpath_disk/0/sdc lrwxrwxrwx 1 root root 0 Feb 24 16:01 sdc:0 -> ../devices/platform/host8/session1/target8:0:0/8:0:0:0/block/sdc:0 lrwxrwxrwx 1 root root 0 Feb 24 16:02 sdc:1 -> ../devices/platform/host9/session2/target9:0:0/9:0:0:0/block/sdc:1 $ ls -l /sys/class/scsi_mpath_disk/scsi_mpath_disk0/ total 0 drwxr-xr-x 2 root root 0 Feb 24 16:03 power drwxr-xr-x 11 root root 0 Feb 24 16:01 sdc lrwxrwxrwx 1 root root 0 Feb 24 16:01 subsystem -> ../../../../class/scsi_mpath_disk -rw-r--r-- 1 root root 4096 Feb 24 16:01 uevent $ ls -l /sys/class/scsi_mpath_disk/scsi_mpath_disk0/sdc/multipath/ total 0 lrwxrwxrwx 1 root root 0 Feb 24 16:20 sdc:0 -> ../../../../../platform/host8/session1/target8:0:0/8:0:0:0/block/sdc:0 lrwxrwxrwx 1 root root 0 Feb 24 16:20 sdc:1 -> ../../../../../platform/host9/session2/target9:0:0/9:0:0:0/block/sdc:1 $ ls -l /dev/sdc* brw-rw---- 1 root disk 8, 32 Feb 24 16:01 /dev/sdc brw-rw---- 1 root disk 8, 33 Feb 24 16:01 /dev/sdc1 brw-rw---- 1 root disk 8, 34 Feb 24 16:01 /dev/sdc2 $ lsblk /dev/sdc NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS sdc 8:32 0 600M 0 disk |-sdc1 8:33 0 9M 0 part `-sdc2 8:34 0 568M 0 part Signed-off-by: John Garry <[email protected]> --- drivers/scsi/sd.c | 396 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 378 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c74f336f8cba9..ca20f9430b4ac 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -112,12 +112,30 @@ static DEFINE_IDA(sd_index_ida); static mempool_t *sd_page_pool; static struct lock_class_key sd_bio_compl_lkclass; #ifdef CONFIG_SCSI_MULTIPATH +static LIST_HEAD(sd_mpath_disks_list); +static DEFINE_MUTEX(sd_mpath_disks_lock); + struct sd_mpath_disk { + struct device dev; + int disk_index; + int disk_count; + struct list_head entry; struct scsi_mpath_head *scsi_mpath_head; }; static void sd_mpath_disk_release(struct device *dev) { + struct sd_mpath_disk *sd_mpath_disk = + container_of(dev, struct sd_mpath_disk, dev); + struct scsi_mpath_head *scsi_mpath_head = + sd_mpath_disk->scsi_mpath_head; + struct mpath_head *mpath_head = scsi_mpath_head->mpath_head; + + mpath_put_disk(mpath_head); + ida_free(&sd_index_ida, sd_mpath_disk->disk_index); + scsi_mpath_put_head(scsi_mpath_head); + + kfree(sd_mpath_disk); } static const struct class sd_mpath_disk_class = { @@ -787,7 +805,8 @@ static void scsi_disk_release(struct device *dev) { struct scsi_disk *sdkp = to_scsi_disk(dev); - ida_free(&sd_index_ida, sdkp->index); + if (sdkp->index >= 0) + ida_free(&sd_index_ida, sdkp->index); put_device(&sdkp->device->sdev_gendev); free_opal_dev(sdkp->opal_dev); @@ -3964,6 +3983,322 @@ static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen) return 0; } +#ifdef CONFIG_SCSI_MULTIPATH +static int sd_mpath_revalidate_head(struct scsi_disk *sdkp) +{ + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + struct scsi_mpath_head *scsi_mpath_head = sd_mpath_disk->scsi_mpath_head; + struct mpath_head *mpath_head = scsi_mpath_head->mpath_head; + struct gendisk *disk = mpath_head->disk; + struct queue_limits *sdkp_lim = &sdkp->disk->queue->limits; + struct queue_limits lim; + unsigned int memflags; + int ret; + + lim = queue_limits_start_update(disk->queue); + memflags = blk_mq_freeze_queue(disk->queue); + + lim.logical_block_size = sdkp_lim->logical_block_size; + lim.physical_block_size = sdkp_lim->physical_block_size; + lim.io_min = sdkp_lim->io_min; + lim.io_opt = sdkp_lim->io_opt; + + queue_limits_stack_bdev(&lim, sdkp->disk->part0, 0, + disk->disk_name); + + /* TODO: setup integrity limits */ + lim.max_write_streams = sdkp_lim->max_write_streams; + lim.write_stream_granularity = sdkp_lim->write_stream_granularity; + ret = queue_limits_commit_update(disk->queue, &lim); + + set_capacity_and_notify(disk, get_capacity(sdkp->disk)); + + blk_mq_unfreeze_queue(disk->queue, memflags); + + return ret; +} +static int sd_mpath_get_disk(struct sd_mpath_disk *sd_mpath_disk) +{ + if (!get_device(&sd_mpath_disk->dev)) + return -ENXIO; + return 0; +} + +static void sd_mpath_put_disk(struct sd_mpath_disk *sd_mpath_disk) +{ + put_device(&sd_mpath_disk->dev); +} + +static struct sd_mpath_disk *sd_mpath_find_disk( + struct scsi_mpath_head *scsi_mpath_head) +{ + struct sd_mpath_disk *sd_mpath_disk; + int ret; + + list_for_each_entry(sd_mpath_disk, &sd_mpath_disks_list, entry) { + ret = sd_mpath_get_disk(sd_mpath_disk); + if (ret) + continue; + + if (sd_mpath_disk->scsi_mpath_head == scsi_mpath_head) + return sd_mpath_disk; + + sd_mpath_put_disk(sd_mpath_disk); + } + + return NULL; +} + +static void sd_mpath_add_disk(struct scsi_disk *sdkp) +{ + struct scsi_device *sdp = sdkp->device; + struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev; + struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device; + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + struct scsi_mpath_head *scsi_mpath_head = sd_mpath_disk->scsi_mpath_head; + struct mpath_head *mpath_head = scsi_mpath_head->mpath_head; + + mpath_device->disk = sdkp->disk; + mpath_device->numa_node = dev_to_node(sdp->host->dma_dev); + mpath_add_device(mpath_head, mpath_device); + mpath_device_set_live(mpath_device); +} + +static int sd_mpath_probe(struct scsi_disk *sdkp) +{ + struct scsi_device *sdp = sdkp->device; + struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev; + struct device *dma_dev = sdp->host->dma_dev; + struct scsi_mpath_head *scsi_mpath_head = + scsi_mpath_dev->scsi_mpath_head; + struct sd_mpath_disk *sd_mpath_disk; + struct mpath_head *mpath_head = scsi_mpath_head->mpath_head; + char disk_name[DISK_NAME_LEN - 2]; + struct queue_limits lim; + struct gendisk *disk; + int error; + + /* + * sd_mpath_disks_list is kept locked if no disk found. + * Otherwise an extra reference is taken. + */ + mutex_lock(&sd_mpath_disks_lock); + sd_mpath_disk = sd_mpath_find_disk(scsi_mpath_head); + if (sd_mpath_disk) { + error = sized_strscpy(disk_name, mpath_head->disk->disk_name, + sizeof(disk_name)); + if (error < 0) { + /* + * Should not happen as would fail for the same when + * allocating the sd_mpath_disk + */ + sd_mpath_put_disk(sd_mpath_disk); + mutex_unlock(&sd_mpath_disks_lock); + return error; + } + sd_mpath_disk->disk_count++; + mutex_unlock(&sd_mpath_disks_lock); + + goto found; + } + + sd_mpath_disk = kzalloc(sizeof(*sd_mpath_disk), GFP_KERNEL); + if (!sd_mpath_disk) { + error = -ENOMEM; + goto out_unlock; + } + + sd_mpath_disk->scsi_mpath_head = scsi_mpath_head; + device_initialize(&sd_mpath_disk->dev); + sd_mpath_disk->dev.class = &sd_mpath_disk_class; + + blk_set_stacking_limits(&lim); + lim.dma_alignment = 3; + lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | + BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES; + + error = mpath_alloc_head_disk(mpath_head, &lim, + dev_to_node(dma_dev)); + if (error) + goto out_free_disk; + disk = mpath_head->disk; + + mpath_head->parent = &sd_mpath_disk->dev; + + error = ida_alloc(&sd_index_ida, GFP_KERNEL); + if (error < 0) { + sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n"); + goto out_put_disk; + } + sd_mpath_disk->disk_index = error; + error = sd_format_disk_name("sd", sd_mpath_disk->disk_index, + disk->disk_name, DISK_NAME_LEN); + if (error) + goto out_free_index; + + error = sized_strscpy(disk_name, mpath_head->disk->disk_name, + sizeof(disk_name)); + if (error < 0) + goto out_free_index; + + error = dev_set_name(&sd_mpath_disk->dev, "scsi_mpath_disk%d", + scsi_mpath_head->index); + if (error) + goto out_free_index; + + /* undone in sd_mpath_disk_release() */ + scsi_mpath_get_head(scsi_mpath_head); + scsi_mpath_head->mpath_head->drv_module = THIS_MODULE; + + error = device_add(&sd_mpath_disk->dev); + if (error) { + put_device(&sd_mpath_disk->dev); + goto out_unlock; + } + + list_add_tail(&sd_mpath_disk->entry, &sd_mpath_disks_list); + disk->major = sd_major((sd_mpath_disk->disk_index & 0xf0) >> 4); + disk->first_minor = ((sd_mpath_disk->disk_index & 0xf) << 4) | + (sd_mpath_disk->disk_index & 0xfff00); + disk->minors = SD_MINORS; + + sd_mpath_disk->disk_count = 1; + mutex_unlock(&sd_mpath_disks_lock); +found: + sdkp->sd_mpath_disk = sd_mpath_disk; + sdkp->disk->flags |= GENHD_FL_HIDDEN; + snprintf(sdkp->disk->disk_name, DISK_NAME_LEN, "%s:%d", + disk_name, scsi_mpath_dev->index); + + sdkp->index = -1; + return 0; + +out_free_index: + ida_free(&sd_index_ida, sd_mpath_disk->disk_index); +out_put_disk: + mpath_put_disk(mpath_head); +out_free_disk: + kfree(sd_mpath_disk); +out_unlock: + mutex_unlock(&sd_mpath_disks_lock); + return error; +} + +static void sd_mpath_remove(struct scsi_disk *sdkp) +{ + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + struct scsi_device *sdp = sdkp->device; + struct scsi_mpath_device *scsi_mpath_dev = sdp->scsi_mpath_dev; + struct mpath_device *mpath_device = &scsi_mpath_dev->mpath_device; + struct scsi_mpath_head *scsi_mpath_head = sd_mpath_disk->scsi_mpath_head; + struct mpath_head *mpath_head = scsi_mpath_head->mpath_head; + bool remove = false; + + mpath_synchronize(mpath_head); + + if (mpath_clear_current_path(mpath_device)) + mpath_synchronize(mpath_head); + + mpath_delete_device(mpath_device); + + mutex_lock(&sd_mpath_disks_lock); + sd_mpath_disk->disk_count--; + if (!sd_mpath_disk->disk_count && mpath_can_remove_head(mpath_head)) { + list_del_init(&sd_mpath_disk->entry); + remove = true; + } + mutex_unlock(&sd_mpath_disks_lock); + mpath_remove_sysfs_link(mpath_device); + mpath_device->disk = NULL; + + if (remove) { + device_del(&sd_mpath_disk->dev); + mpath_remove_disk(mpath_head); + } + sd_mpath_put_disk(sd_mpath_disk); +} + +static void sd_mpath_remove_head(struct scsi_mpath_head *scsi_mpath_head) +{ + struct mpath_head *mpath_head = scsi_mpath_head->mpath_head; + struct sd_mpath_disk *sd_mpath_disk; + struct device *dev = &scsi_mpath_head->dev; + + mutex_lock(&sd_mpath_disks_lock); + sd_mpath_disk = sd_mpath_find_disk(scsi_mpath_head); + if (!sd_mpath_disk) { + dev_warn(dev, "could not find mpath disk\n"); + mutex_unlock(&sd_mpath_disks_lock); + return; + } + + list_del_init(&sd_mpath_disk->entry); + mutex_unlock(&sd_mpath_disks_lock); + + device_del(&sd_mpath_disk->dev); + mpath_remove_disk(mpath_head); + sd_mpath_put_disk(sd_mpath_disk); +} + +/* + * Always calls for a failed probe, so we need to handle that some structures + * have not been setup. + */ +static void sd_mpath_fail_probe(struct scsi_disk *sdkp) +{ + struct sd_mpath_disk *sd_mpath_disk = sdkp->sd_mpath_disk; + struct scsi_mpath_device *scsi_mpath_dev; + struct mpath_device *mpath_device; + struct scsi_device *sdp = sdkp->device; + struct scsi_mpath_head *scsi_mpath_head; + struct mpath_head *mpath_head; + bool remove = false; + + if (!sd_mpath_disk) + return; + + scsi_mpath_dev = sdp->scsi_mpath_dev; + mpath_device = &scsi_mpath_dev->mpath_device; + scsi_mpath_head = sd_mpath_disk->scsi_mpath_head; + mpath_head = scsi_mpath_head->mpath_head; + + mutex_lock(&sd_mpath_disks_lock); + sd_mpath_disk->disk_count--; + if (!sd_mpath_disk->disk_count) { + list_del_init(&sd_mpath_disk->entry); + remove = true; + } + mutex_unlock(&sd_mpath_disks_lock); + mpath_device->disk = NULL; + + if (remove) { + device_del(&sd_mpath_disk->dev); + mpath_remove_disk(mpath_head); + } + sd_mpath_put_disk(sd_mpath_disk); +} + +#else /* CONFIG_SCSI_MULTIPATH */ +static int sd_mpath_probe(struct scsi_disk *sdkp) +{ + return 0; +} +static void sd_mpath_remove(struct scsi_disk *sdkp) +{ + return; +} +static void sd_mpath_fail_probe(struct scsi_disk *sdkp) +{ + +} +static int sd_mpath_revalidate_head(struct scsi_disk *sdkp) +{ + return 0; +} +static void sd_mpath_add_disk(struct scsi_disk *sdkp) +{ +} +#endif /** * sd_probe - called during driver initialization and whenever a * new scsi device is attached to the system. It is called once @@ -4016,22 +4351,33 @@ static int sd_probe(struct scsi_device *sdp) &sd_bio_compl_lkclass); if (!gd) goto out_free; + sdkp->disk = gd; + sdkp->device = sdp; - index = ida_alloc(&sd_index_ida, GFP_KERNEL); - if (index < 0) { - sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n"); - goto out_put; - } + if (sdp->scsi_mpath_dev) { + error = sd_mpath_probe(sdkp); + if (error) + goto out_put; + } else { + index = ida_alloc(&sd_index_ida, GFP_KERNEL); + if (index < 0) { + sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n"); + goto out_put; + } - error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); - if (error) { - sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n"); - goto out_free_index; + error = sd_format_disk_name("sd", index, gd->disk_name, + DISK_NAME_LEN); + if (error) { + sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n"); + goto out_free_index; + } + sdkp->index = index; + + gd->major = sd_major((index & 0xf0) >> 4); + gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); + gd->minors = SD_MINORS; } - sdkp->device = sdp; - sdkp->disk = gd; - sdkp->index = index; sdkp->max_retries = SD_MAX_RETRIES; atomic_set(&sdkp->openers, 0); atomic_set(&sdkp->device->ioerr_cnt, 0); @@ -4051,16 +4397,13 @@ static int sd_probe(struct scsi_device *sdp) error = device_add(&sdkp->disk_dev); if (error) { + sd_mpath_fail_probe(sdkp); put_device(&sdkp->disk_dev); goto out; } dev_set_drvdata(dev, sdkp); - gd->major = sd_major((index & 0xf0) >> 4); - gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); - gd->minors = SD_MINORS; - gd->fops = &sd_fops; gd->private_data = sdkp; @@ -4078,6 +4421,12 @@ static int sd_probe(struct scsi_device *sdp) sd_revalidate_disk(gd); + if (sdp->scsi_mpath_dev) { + error = sd_mpath_revalidate_head(sdkp); + if (error) + sdev_printk(KERN_WARNING, sdp, "could not revalidate multipath limits\n"); + } + if (sdp->removable) { gd->flags |= GENHD_FL_REMOVABLE; gd->events |= DISK_EVENT_MEDIA_CHANGE; @@ -4092,11 +4441,15 @@ static int sd_probe(struct scsi_device *sdp) error = device_add_disk(dev, gd, NULL); if (error) { + sd_mpath_fail_probe(sdkp); device_unregister(&sdkp->disk_dev); put_disk(gd); goto out; } + if (sdp->scsi_mpath_dev) + sd_mpath_add_disk(sdkp); + if (sdkp->security) { sdkp->opal_dev = init_opal_dev(sdkp, &sd_sec_submit); if (sdkp->opal_dev) @@ -4110,7 +4463,8 @@ static int sd_probe(struct scsi_device *sdp) return 0; out_free_index: - ida_free(&sd_index_ida, index); + if (index >= 0) + ida_free(&sd_index_ida, index); out_put: put_disk(gd); out_free: @@ -4238,6 +4592,9 @@ static void sd_remove(struct scsi_device *sdp) struct device *dev = &sdp->sdev_gendev; struct scsi_disk *sdkp = dev_get_drvdata(dev); + if (sdp->scsi_mpath_dev) + sd_mpath_remove(sdkp); + scsi_autopm_get_device(sdkp->device); device_del(&sdkp->disk_dev); @@ -4403,6 +4760,9 @@ static struct scsi_driver sd_template = { .resume = sd_resume, .init_command = sd_init_command, .uninit_command = sd_uninit_command, + #ifdef CONFIG_SCSI_MULTIPATH + .mpath_remove_head = sd_mpath_remove_head, + #endif .done = sd_done, .eh_action = sd_eh_action, .eh_reset = sd_eh_reset, -- 2.43.5

