Re: [PATCH 19/32] target: Convert to rbtree for se_dev_entry in se_node_acl
On Fri, 2013-12-13 at 15:59 -0800, Andy Grover wrote: Instead of an array, use a rbtree. Less memory use on average, and can allow 255 entries. We go from O(1) to O(log n) on lookups. If this shows up on profiling (it won't) then transition to other kernel lookup methods is straightforward from here. Ugh. There is no reason to be using rbtrees in a performance critical path here. The number of pointer lookups is what ends up hurting the most vs. a flat array, so given that 256 LUNs per endpoint is currently not an issue, and there is no hard limit on the number of endpoints with virtual addressing, I don't see the benefit of this patch. NAK. --nab Change core_disable_device_list_for_node to be called with device_list_lock held, and tweaked params a little. TRANSPORT_LUNFLAGS_INITIATOR_ACCESS no longer needed, presence in the rbtree is equivalent to this being set. Signed-off-by: Andy Grover agro...@redhat.com --- drivers/target/target_core_device.c | 213 ++--- drivers/target/target_core_fabric_configfs.c | 24 ++-- drivers/target/target_core_internal.h| 15 +- drivers/target/target_core_pr.c | 25 +++- drivers/target/target_core_spc.c | 11 +- drivers/target/target_core_stat.c| 72 +- drivers/target/target_core_tpg.c | 53 +-- drivers/target/target_core_ua.c | 18 ++- include/target/target_core_base.h|8 +- 9 files changed, 222 insertions(+), 217 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index af12456..a18724b 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -54,6 +54,51 @@ static struct se_hba *lun0_hba; /* not static, needed by tpg.c */ struct se_device *g_lun0_dev; +bool transport_insert_deve(struct se_node_acl *nacl, struct se_dev_entry *deve) +{ + struct rb_root *root = nacl-rb_device_list; + struct rb_node **new = (root-rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct se_dev_entry *this = rb_entry(*new, struct se_dev_entry, rb_node); + + parent = *new; + if (deve-mapped_lun this-mapped_lun) + new = ((*new)-rb_left); + else if (deve-mapped_lun this-mapped_lun) + new = ((*new)-rb_right); + else + return false; + } + + /* Add new node and rebalance tree. */ + rb_link_node(deve-rb_node, parent, new); + rb_insert_color(deve-rb_node, root); + + return true; +} + + +struct se_dev_entry *transport_search_deve(struct se_node_acl *nacl, u32 mapped_lun) +{ + struct rb_root *root = nacl-rb_device_list; + struct rb_node *node = root-rb_node; + + while (node) { + struct se_dev_entry *deve = rb_entry(node, struct se_dev_entry, rb_node); + + if (mapped_lun deve-mapped_lun) + node = node-rb_left; + else if (mapped_lun deve-mapped_lun) + node = node-rb_right; + else + return deve; + } + return NULL; +} + + sense_reason_t transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) { @@ -66,8 +111,8 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) return TCM_NON_EXISTENT_LUN; spin_lock_irqsave(se_sess-se_node_acl-device_list_lock, flags); - se_cmd-se_deve = se_sess-se_node_acl-device_list[unpacked_lun]; - if (se_cmd-se_deve-lun_flags TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) { + se_cmd-se_deve = transport_search_deve(se_sess-se_node_acl, unpacked_lun); + if (se_cmd-se_deve) { struct se_dev_entry *deve = se_cmd-se_deve; deve-total_cmds++; @@ -153,10 +198,10 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u32 unpacked_lun) return -ENODEV; spin_lock_irqsave(se_sess-se_node_acl-device_list_lock, flags); - se_cmd-se_deve = se_sess-se_node_acl-device_list[unpacked_lun]; - deve = se_cmd-se_deve; + se_cmd-se_deve = transport_search_deve(se_sess-se_node_acl, unpacked_lun); + if (se_cmd-se_deve) { + deve = se_cmd-se_deve; - if (deve-lun_flags TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) { se_tmr-tmr_lun = deve-se_lun; se_cmd-se_lun = deve-se_lun; se_lun = deve-se_lun; @@ -187,25 +232,22 @@ EXPORT_SYMBOL(transport_lookup_tmr_lun); /* * This function is called from core_scsi3_emulate_pro_register_and_move() - * and core_scsi3_decode_spec_i_port(), and will increment deve-pr_ref_count + * and core_scsi3_decode_spec_i_port(), and will increment deve-refcount * when a matching rtpi is found. */ struct se_dev_entry
Re: [PATCH 19/32] target: Convert to rbtree for se_dev_entry in se_node_acl
On 12/16/2013 01:40 PM, Nicholas A. Bellinger wrote: On Fri, 2013-12-13 at 15:59 -0800, Andy Grover wrote: Instead of an array, use a rbtree. Less memory use on average, and can allow 255 entries. We go from O(1) to O(log n) on lookups. If this shows up on profiling (it won't) then transition to other kernel lookup methods is straightforward from here. Ugh. There is no reason to be using rbtrees in a performance critical path here. The number of pointer lookups is what ends up hurting the most vs. a flat array, so given that 256 LUNs per endpoint is currently not an issue, and there is no hard limit on the number of endpoints with virtual addressing, I don't see the benefit of this patch. NAK. What does virtual addressing mean in this context? I'm not familiar -- so instead of reporting 257+ luns per tpg, it would get split up somehow? -- Andy -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 19/32] target: Convert to rbtree for se_dev_entry in se_node_acl
On Mon, 2013-12-16 at 17:00 -0800, Andy Grover wrote: On 12/16/2013 01:40 PM, Nicholas A. Bellinger wrote: On Fri, 2013-12-13 at 15:59 -0800, Andy Grover wrote: Instead of an array, use a rbtree. Less memory use on average, and can allow 255 entries. We go from O(1) to O(log n) on lookups. If this shows up on profiling (it won't) then transition to other kernel lookup methods is straightforward from here. Ugh. There is no reason to be using rbtrees in a performance critical path here. The number of pointer lookups is what ends up hurting the most vs. a flat array, so given that 256 LUNs per endpoint is currently not an issue, and there is no hard limit on the number of endpoints with virtual addressing, I don't see the benefit of this patch. NAK. What does virtual addressing mean in this context? I'm not familiar -- so instead of reporting 257+ luns per tpg, it would get split up somehow? Virtual addressing meaning /sys/kernel/config/target/$FABRIC/$WWPN/ endpoints that are not tied to a specific hardware mapping, so any arbitrary number of $WWPNs can be created. Right now the only use-case where people are using more than a few dozen LUNs per endpoint is with non NPIV qla2xxx mode, where a single physical Fibre Channel WWPN is exporting 256-512 LUNs, with different per NodeACL LUN mappings for different initiators. With NPIV mode for qla2xxx (currently in development), the WWPN becomes virtualized like iSCSI, and a unsigned short about of virtual port addresses can be configured per physical FC port. --nab -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 19/32] target: Convert to rbtree for se_dev_entry in se_node_acl
Instead of an array, use a rbtree. Less memory use on average, and can allow 255 entries. We go from O(1) to O(log n) on lookups. If this shows up on profiling (it won't) then transition to other kernel lookup methods is straightforward from here. Change core_disable_device_list_for_node to be called with device_list_lock held, and tweaked params a little. TRANSPORT_LUNFLAGS_INITIATOR_ACCESS no longer needed, presence in the rbtree is equivalent to this being set. Signed-off-by: Andy Grover agro...@redhat.com --- drivers/target/target_core_device.c | 213 ++--- drivers/target/target_core_fabric_configfs.c | 24 ++-- drivers/target/target_core_internal.h| 15 +- drivers/target/target_core_pr.c | 25 +++- drivers/target/target_core_spc.c | 11 +- drivers/target/target_core_stat.c| 72 +- drivers/target/target_core_tpg.c | 53 +-- drivers/target/target_core_ua.c | 18 ++- include/target/target_core_base.h|8 +- 9 files changed, 222 insertions(+), 217 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index af12456..a18724b 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -54,6 +54,51 @@ static struct se_hba *lun0_hba; /* not static, needed by tpg.c */ struct se_device *g_lun0_dev; +bool transport_insert_deve(struct se_node_acl *nacl, struct se_dev_entry *deve) +{ + struct rb_root *root = nacl-rb_device_list; + struct rb_node **new = (root-rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + struct se_dev_entry *this = rb_entry(*new, struct se_dev_entry, rb_node); + + parent = *new; + if (deve-mapped_lun this-mapped_lun) + new = ((*new)-rb_left); + else if (deve-mapped_lun this-mapped_lun) + new = ((*new)-rb_right); + else + return false; + } + + /* Add new node and rebalance tree. */ + rb_link_node(deve-rb_node, parent, new); + rb_insert_color(deve-rb_node, root); + + return true; +} + + +struct se_dev_entry *transport_search_deve(struct se_node_acl *nacl, u32 mapped_lun) +{ + struct rb_root *root = nacl-rb_device_list; + struct rb_node *node = root-rb_node; + + while (node) { + struct se_dev_entry *deve = rb_entry(node, struct se_dev_entry, rb_node); + + if (mapped_lun deve-mapped_lun) + node = node-rb_left; + else if (mapped_lun deve-mapped_lun) + node = node-rb_right; + else + return deve; + } + return NULL; +} + + sense_reason_t transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) { @@ -66,8 +111,8 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u32 unpacked_lun) return TCM_NON_EXISTENT_LUN; spin_lock_irqsave(se_sess-se_node_acl-device_list_lock, flags); - se_cmd-se_deve = se_sess-se_node_acl-device_list[unpacked_lun]; - if (se_cmd-se_deve-lun_flags TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) { + se_cmd-se_deve = transport_search_deve(se_sess-se_node_acl, unpacked_lun); + if (se_cmd-se_deve) { struct se_dev_entry *deve = se_cmd-se_deve; deve-total_cmds++; @@ -153,10 +198,10 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u32 unpacked_lun) return -ENODEV; spin_lock_irqsave(se_sess-se_node_acl-device_list_lock, flags); - se_cmd-se_deve = se_sess-se_node_acl-device_list[unpacked_lun]; - deve = se_cmd-se_deve; + se_cmd-se_deve = transport_search_deve(se_sess-se_node_acl, unpacked_lun); + if (se_cmd-se_deve) { + deve = se_cmd-se_deve; - if (deve-lun_flags TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) { se_tmr-tmr_lun = deve-se_lun; se_cmd-se_lun = deve-se_lun; se_lun = deve-se_lun; @@ -187,25 +232,22 @@ EXPORT_SYMBOL(transport_lookup_tmr_lun); /* * This function is called from core_scsi3_emulate_pro_register_and_move() - * and core_scsi3_decode_spec_i_port(), and will increment deve-pr_ref_count + * and core_scsi3_decode_spec_i_port(), and will increment deve-refcount * when a matching rtpi is found. */ struct se_dev_entry *core_get_se_deve_from_rtpi( struct se_node_acl *nacl, u16 rtpi) { - struct se_dev_entry *deve; struct se_lun *lun; struct se_port *port; struct se_portal_group *tpg = nacl-se_tpg; - u32 i; + struct rb_node *node; spin_lock_irq(nacl-device_list_lock); - for (i = 0; i TRANSPORT_MAX_LUNS_PER_TPG; i++) { - deve = nacl-device_list[i]; - - if