from:"Andrei Vagin"

[Devel] [PATCH rh7] target: use to_stat_tgt_port() to handle files in scsi_tgt_port/

2018-08-09 Thread Andrei Vagin

Currently we use to_stat_port(), but it has to be used only for files in
scsi_port/.

https://pmc.acronis.com/browse/VSTOR-13021
Signed-off-by: Andrei Vagin 
---
 drivers/target/target_core_stat.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/target/target_core_stat.c 
b/drivers/target/target_core_stat.c
index 08e8e6a..6d39664 100644
--- a/drivers/target/target_core_stat.c
+++ b/drivers/target/target_core_stat.c
@@ -547,7 +547,7 @@ static ssize_t target_stat_tgt_port_##_name##_show( 
\
 {  \
ssize_t size = -ENODEV; \
struct se_device *dev;  \
-   struct se_lun *lun = to_stat_port(item);\
+   struct se_lun *lun = to_stat_tgt_port(item);\
\
rcu_read_lock();\
dev = rcu_dereference(lun->lun_se_dev); \
@@ -564,7 +564,7 @@ static ssize_t target_stat_tgt_port_##_name##_store(
\
struct config_item *item, const char *page, size_t size)\
 {  \
struct se_device *dev;  \
-   struct se_lun *lun = to_stat_port(item);\
+   struct se_lun *lun = to_stat_tgt_port(item);\
struct scsi_port_stats_hist *old, *new; \
ssize_t ret;\
\
@@ -897,7 +897,7 @@ static ssize_t target_stat_tgt_port_hs_in_cmds_show(struct 
config_item *item,
 static ssize_t  target_stat_tgt_port_##_name##_show(   \
struct config_item *item, char *page)   \
 {  \
-   struct se_lun *lun = to_stat_port(item);\
+   struct se_lun *lun = to_stat_tgt_port(item);\
struct se_device *dev;  \
ssize_t ret = -ENODEV;  \
\
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH] target: rename user_helper back to alua_user_helper

2018-07-16 Thread Andrei Vagin

After the last rebase, alua_user_helper was renamed into user_helper by
mistake.

https://pmc.acronis.com/browse/VSTOR-12238

Signed-off-by: Andrei Vagin 
---
 drivers/target/target_core_configfs.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_configfs.c 
b/drivers/target/target_core_configfs.c
index a5a66ce..101f668 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -1946,14 +1946,14 @@ static ssize_t target_dev_enable_store(struct 
config_item *item,
return count;
 }
 
-static ssize_t target_dev_user_helper_show(struct config_item *item, char 
*page)
+static ssize_t target_dev_alua_user_helper_show(struct config_item *item, char 
*page)
 {
struct se_device *dev = to_device(item);
 
return core_alua_show_user_helper(dev, page);
 }
 
-static ssize_t target_dev_user_helper_store(struct config_item *item,
+static ssize_t target_dev_alua_user_helper_store(struct config_item *item,
const char *page, size_t count)
 {
struct se_device *dev = to_device(item);
@@ -2232,7 +2232,7 @@ CONFIGFS_ATTR(target_dev_, udev_path);
 CONFIGFS_ATTR(target_dev_, enable);
 CONFIGFS_ATTR(target_dev_, alua_lu_gp);
 CONFIGFS_ATTR(target_dev_, lba_map);
-CONFIGFS_ATTR(target_dev_, user_helper);
+CONFIGFS_ATTR(target_dev_, alua_user_helper);
 
 static struct configfs_attribute *target_core_dev_attrs[] = {
_dev_attr_info,
@@ -2242,7 +2242,7 @@ static struct configfs_attribute *target_core_dev_attrs[] 
= {
_dev_attr_enable,
_dev_attr_alua_lu_gp,
_dev_attr_lba_map,
-   _dev_attr_user_helper,
+   _dev_attr_alua_user_helper,
NULL,
 };
 
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH] target: fix a locking scheme of persistent reservations

2018-06-14 Thread Andrei Vagin

In this code, the next pattern is used:

spin_lock(_tmpl->registration_lock);
list_for_each_entry_safe(pr_reg, pr_reg_tmp,
_tmpl->registration_list, pr_reg_list) {
...
atomic_inc_mb(_reg->pr_res_holders);
spin_unlock(_tmpl->registration_lock);
...
spin_lock(_tmpl->registration_lock);
atomic_dec_mb(_reg->pr_res_holders);
...
}
spin_unlock(_tmpl->registration_lock);

It is wrong, because we get a reference for pr_reg and don't care about
pr_reg_tmp, but it can be removed from the list. This unlock/lock in
a middle of a loop is a very strange optimization, because there is no
any heavy operations. This patch removes this temporary release of a
spin lock.

https://pmc.acronis.com/browse/VSTOR-10675

Signed-off-by: Andrei Vagin 
---
 drivers/target/target_core_pr.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index f993cca..dc64c2b 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -687,7 +687,6 @@ static struct t10_pr_registration 
*__core_scsi3_alloc_registration(
spin_lock(>se_port_lock);
list_for_each_entry_safe(port, port_tmp, >dev_sep_list, sep_list) {
atomic_inc_mb(>sep_tg_pt_ref_cnt);
-   spin_unlock(>se_port_lock);
 
spin_lock_bh(>sep_alua_lock);
list_for_each_entry(deve_tmp, >sep_alua_list,
@@ -759,7 +758,6 @@ static struct t10_pr_registration 
*__core_scsi3_alloc_registration(
}
spin_unlock_bh(>sep_alua_lock);
 
-   spin_lock(>se_port_lock);
atomic_dec_mb(>sep_tg_pt_ref_cnt);
}
spin_unlock(>se_port_lock);
@@ -841,7 +839,9 @@ int core_scsi3_alloc_aptpl_registration(
 */
pr_reg->pr_res_holder = res_holder;
 
+   spin_lock(_tmpl->aptpl_reg_lock);
list_add_tail(_reg->pr_reg_aptpl_list, _tmpl->aptpl_reg_list);
+   spin_unlock(_tmpl->aptpl_reg_lock);
pr_debug("SPC-3 PR APTPL Successfully added registration%s from"
" metadata\n", (res_holder) ? "+reservation" : "");
return 0;
@@ -919,7 +919,6 @@ static int __core_scsi3_check_aptpl_registration(
pr_reg->pr_reg_tg_pt_lun = lun;
 
list_del(_reg->pr_reg_aptpl_list);
-   spin_unlock(_tmpl->aptpl_reg_lock);
/*
 * At this point all of the pointers in *pr_reg will
 * be setup, so go ahead and add the registration.
@@ -937,7 +936,6 @@ static int __core_scsi3_check_aptpl_registration(
 * Reenable pr_aptpl_active to accept new metadata
 * updates once the SCSI device is active again..
 */
-   spin_lock(_tmpl->aptpl_reg_lock);
pr_tmpl->pr_aptpl_active = 1;
}
}
@@ -1223,7 +1221,6 @@ static void __core_scsi3_free_registration(
 {
struct target_core_fabric_ops *tfo =
pr_reg->pr_reg_nacl->se_tpg->se_tpg_tfo;
-   struct t10_reservation *pr_tmpl = >t10_pr;
char i_buf[PR_REG_ISID_ID_LEN];
 
memset(i_buf, 0, PR_REG_ISID_ID_LEN);
@@ -1246,11 +1243,9 @@ static void __core_scsi3_free_registration(
 * count back to zero, and we release *pr_reg.
 */
while (atomic_read(_reg->pr_res_holders) != 0) {
-   spin_unlock(_tmpl->registration_lock);
pr_debug("SPC-3 PR [%s] waiting for pr_res_holders\n",
tfo->get_fabric_name());
cpu_relax();
-   spin_lock(_tmpl->registration_lock);
}
 
pr_debug("SPC-3 PR [%s] Service Action: UNREGISTER Initiator"
@@ -3933,7 +3928,6 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
add_desc_len = 0;
 
atomic_inc_mb(_reg->pr_res_holders);
-   spin_unlock(_tmpl->registration_lock);
/*
 * Determine expected length of $FABRIC_MOD specific
 * TransportID full status descriptor..
@@ -3944,7 +3938,6 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
if ((exp_desc_len + add_len) > cmd->data_length) {
pr_warn("SPC-3 PRIN READ_FULL_STATUS ran"
" out of buffer: %d\n", cmd->data_length);
-   spin_lock(_tmpl->registration_lock);
atomic_dec_mb(_reg->pr_res_holders);
break;
}
@@ -4011,7 +4004,6 @@ core_scsi3_pri_read_full_status(struct se_c

[Devel] [PATCH] target: move blkio_cgroup from attrib/ to param/

2018-05-22 Thread Andrei Vagin

targetcli expects to get only number values for attrib/*,
all non-number attributes should be in param/

For example:
/sys/kernel/config/target/core/iblock_0/test/param/blkio_cgroup

https://pmc.acronis.com/browse/VSTOR-10194

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_configfs.c | 22 --
 drivers/target/target_core_device.c   | 27 ++-
 drivers/target/target_core_iblock.c   | 17 +++--
 include/target/target_core_backend.h  |  1 +
 include/target/target_core_backend_configfs.h |  1 +
 include/target/target_core_base.h |  8 +++-
 6 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/drivers/target/target_core_configfs.c 
b/drivers/target/target_core_configfs.c
index 7e7de7c..a9a19a4 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -598,6 +598,20 @@ TB_CIT_SETUP(dev_attrib, _core_dev_attrib_ops, 
NULL, NULL);
 
 /* End functions for struct config_item_type tb_dev_attrib_cit */
 
+/* Start functions for struct config_item_type tb_dev_param_cit */
+
+CONFIGFS_EATTR_STRUCT(target_core_dev_param, se_dev_param);
+CONFIGFS_EATTR_OPS(target_core_dev_param, se_dev_param, da_group);
+
+static struct configfs_item_operations target_core_dev_param_ops = {
+   .show_attribute = target_core_dev_param_attr_show,
+   .store_attribute= target_core_dev_param_attr_store,
+};
+
+TB_CIT_SETUP(dev_param, _core_dev_param_ops, NULL, NULL);
+
+/* End functions for struct config_item_type tb_dev_param_cit */
+
 /*  Start functions for struct config_item_type tb_dev_wwn_cit */
 
 CONFIGFS_EATTR_STRUCT(target_core_dev_wwn, t10_wwn);
@@ -2655,7 +2669,7 @@ static struct config_group *target_core_make_subdev(
 
dev_cg = >dev_group;
 
-   dev_cg->default_groups = kmalloc(sizeof(struct config_group *) * 6,
+   dev_cg->default_groups = kmalloc(sizeof(struct config_group *) * 7,
GFP_KERNEL);
if (!dev_cg->default_groups)
goto out_free_device;
@@ -2663,6 +2677,8 @@ static struct config_group *target_core_make_subdev(
config_group_init_type_name(dev_cg, name, >tb_cits.tb_dev_cit);
config_group_init_type_name(>dev_attrib.da_group, "attrib",
>tb_cits.tb_dev_attrib_cit);
+   config_group_init_type_name(>dev_param.da_group, "param",
+   >tb_cits.tb_dev_param_cit);
config_group_init_type_name(>dev_pr_group, "pr",
>tb_cits.tb_dev_pr_cit);
config_group_init_type_name(>t10_wwn.t10_wwn_group, "wwn",
@@ -2677,7 +2693,8 @@ static struct config_group *target_core_make_subdev(
dev_cg->default_groups[2] = >t10_wwn.t10_wwn_group;
dev_cg->default_groups[3] = >t10_alua.alua_tg_pt_gps_group;
dev_cg->default_groups[4] = >dev_stat_grps.stat_group;
-   dev_cg->default_groups[5] = NULL;
+   dev_cg->default_groups[5] = >dev_param.da_group;
+   dev_cg->default_groups[6] = NULL;
/*
 * Add core/$HBA/$DEV/alua/default_tg_pt_gp
 */
@@ -2968,6 +2985,7 @@ void target_core_setup_sub_cits(struct se_subsystem_api 
*sa)
 {
target_core_setup_dev_cit(sa);
target_core_setup_dev_attrib_cit(sa);
+   target_core_setup_dev_param_cit(sa);
target_core_setup_dev_pr_cit(sa);
target_core_setup_dev_wwn_cit(sa);
target_core_setup_dev_alua_tg_pt_gps_cit(sa);
diff --git a/drivers/target/target_core_device.c 
b/drivers/target/target_core_device.c
index 8f47549..c06cda3 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -835,9 +835,9 @@ ssize_t se_dev_blkio_cgroup_show(struct se_device *dev, 
char *page)
 {
int rb;
 
-   read_lock(>dev_attrib_lock);
-   if (dev->dev_attrib.blk_css) {
-   rb = cgroup_path(dev->dev_attrib.blk_css->cgroup,
+   read_lock(>dev_param_lock);
+   if (dev->dev_param.blk_css) {
+   rb = cgroup_path(dev->dev_param.blk_css->cgroup,
page, PAGE_SIZE - 1);
if (rb < 0)
goto out;
@@ -849,7 +849,7 @@ ssize_t se_dev_blkio_cgroup_show(struct se_device *dev, 
char *page)
} else
rb = 0;
 out:
-   read_unlock(>dev_attrib_lock);
+   read_unlock(>dev_param_lock);
 
return rb;
 }
@@ -873,10 +873,10 @@ ssize_t se_dev_blkio_cgroup_store(struct se_device *dev,
else
css = NULL;
 
-   write_lock(>dev_attrib_lock);
-   pcss = dev->dev_attrib.blk_css;
-   dev->dev_attrib.blk_css = css;
-   write_unlock(>dev_attrib_lock);
+   write_lock(>dev_param_lock);
+   pcss = dev->dev_param.bl

[Devel] [PATCH 2/3] [v2] target: allow to set a blkio cgroup for a backstore

2018-05-08 Thread Andrei Vagin

The Block I/O (blkio) subsystem controls and monitors access to I/O on
block devices by tasks in cgroups. With the introduced changes, a
backstore will be like a task in a specified group.

One of interesting feature is an ability to set limits on a number of
I/O operations and bytes per seconds.

A new attribute is added for backstores, it is called blkio_cgroup.

If we write 1 to the attribute file, a blkio cgroup from the current
process is attached to the backstore.

If we write 0 to the attribute file, a current group will be detached
from the backstore.

When we know a blkio cgroup the only thing, what we need to do to make it
work, is to set this group for bio-s.

How to use:
 # Create a test backstore
$ targetcli
targetcli shell version 2.1.fb46
Copyright 2011-2013 by Datera, Inc and others.
/backstores/block> create dev=/dev/loop0 loop0
Created block storage object loop0 using /dev/loop0.
/backstores/block> cd /loopback
/loopback> create
Created target naa.50014056fd3f341c.
/loopback> cd naa.50014056fd3f341c/luns
/loopback/naa...fd3f341c/luns> create /backstores/block/loop0
Created LUN 0.
/loopback/naa...fd3f341c/luns> exit

 # Create a test cgroup and set it to a test backstore
$ CG_PATH=/sys/fs/cgroup/blkio/test
$ BS_PATH=/sys/kernel/config/target/core/iblock_0/loop0/attrib/blkio_cgroup
$ mkdir -p $CG_PATH
$ bash -c "echo 0 > $CG_PATH/tasks && echo 1 > $BS_PATH"
$ cat $BS_PATH
/test

 # Set 6 MB/sec for the backstore
$ echo "7:0 6291456" > $CG_PATH/blkio.throttle.read_bps_device

 # Check that everything work as expected
$ dd if=/dev/sda of=/dev/null iflag=direct bs=1M count=100
100+0 records in
100+0 records out
104857600 bytes (105 MB, 100 MiB) copied, 16.6958 s, 6.3 MB/s

v2: handle errors of cgroup_path()

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_device.c  | 61 
 drivers/target/target_core_iblock.c  | 38 ++
 include/target/target_core_backend.h |  4 +++
 include/target/target_core_base.h|  2 ++
 4 files changed, 105 insertions(+)

diff --git a/drivers/target/target_core_device.c 
b/drivers/target/target_core_device.c
index cb17aeb..8f47549 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -831,6 +831,61 @@ int se_dev_set_emulate_fua_read(struct se_device *dev, int 
flag)
 }
 EXPORT_SYMBOL(se_dev_set_emulate_fua_read);
 
+ssize_t se_dev_blkio_cgroup_show(struct se_device *dev, char *page)
+{
+   int rb;
+
+   read_lock(>dev_attrib_lock);
+   if (dev->dev_attrib.blk_css) {
+   rb = cgroup_path(dev->dev_attrib.blk_css->cgroup,
+   page, PAGE_SIZE - 1);
+   if (rb < 0)
+   goto out;
+   if (rb == 0)
+   rb = strlen(page);
+   page[rb] = '\n';
+   page[rb + 1] = 0;
+   rb++;
+   } else
+   rb = 0;
+out:
+   read_unlock(>dev_attrib_lock);
+
+   return rb;
+}
+EXPORT_SYMBOL(se_dev_blkio_cgroup_show);
+
+ssize_t se_dev_blkio_cgroup_store(struct se_device *dev,
+   const char *page, size_t count)
+{
+   struct cgroup_subsys_state *css, *pcss;
+   int ret;
+   u32 val;
+
+   ret = kstrtou32(page, 0, );
+   if (ret < 0)
+   return ret;
+
+   if (val > 1)
+   return -EINVAL;
+   if (val == 1)
+   css = task_get_css(current, blkio_subsys_id);
+   else
+   css = NULL;
+
+   write_lock(>dev_attrib_lock);
+   pcss = dev->dev_attrib.blk_css;
+   dev->dev_attrib.blk_css = css;
+   write_unlock(>dev_attrib_lock);
+
+   if (pcss)
+   css_put(pcss);
+
+   return count;
+}
+EXPORT_SYMBOL(se_dev_blkio_cgroup_store);
+
+
 int se_dev_set_emulate_write_cache(struct se_device *dev, int flag)
 {
if (flag != 0 && flag != 1) {
@@ -1507,6 +1562,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, 
const char *name)
INIT_LIST_HEAD(>state_list);
INIT_LIST_HEAD(>qf_cmd_list);
INIT_LIST_HEAD(>g_dev_node);
+   rwlock_init(>dev_attrib_lock);
spin_lock_init(>execute_task_lock);
spin_lock_init(>delayed_cmd_lock);
spin_lock_init(>dev_reservation_lock);
@@ -1555,6 +1611,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, 
const char *name)
dev->dev_attrib.unmap_zeroes_data =
DA_UNMAP_ZEROES_DATA_DEFAULT;
dev->dev_attrib.max_write_same_len = DA_MAX_WRITE_SAME_LEN;
+   dev->dev_attrib.blk_css = NULL;
 
xcopy_lun = >xcopy_lun;
xcopy_lun->lun_se_dev = dev;
@@ -1728,6 +1785,10 @@ void target_free_device(struct se_device *dev)
if (dev->transport->free_prot)
dev-&

Re: [Devel] [PATCH 2/3] target: allow to set a blkio cgroup for a backstore

2018-05-07 Thread Andrei Vagin

On Sat, May 05, 2018 at 09:12:02AM +0300, Cyrill Gorcunov wrote:
> On Sat, May 05, 2018 at 01:42:36AM +0300, Andrei Vagin wrote:
> > The Block I/O (blkio) subsystem controls and monitors access to I/O on
> > block devices by tasks in cgroups. With the introduced changes, a
> > backstore will be like a task in a specified group.
> > 
> > One of interesting feature is an ability to set limits on a number of
> > I/O operations and bytes per seconds.
> > 
> > A new attribute is added for backstores, it is called blkio_cgroup.
> > 
> > If we write 1 to the attribute file, a blkio cgroup from the current
> > process is attached to the backstore.
> > 
> > If we write 0 to the attribute file, a current group will be detached
> > from the backstore.
> > 
> > When we know a blkio cgroup the only thing, what we need to do to make it
> > work, is to set this group for bio-s.
> > 
> > How to use:
> >  # Create a test backstore
> > $ targetcli
> > targetcli shell version 2.1.fb46
> > Copyright 2011-2013 by Datera, Inc and others.
> > /backstores/block> create dev=/dev/loop0 loop0
> > Created block storage object loop0 using /dev/loop0.
> > /backstores/block> cd /loopback
> > /loopback> create
> > Created target naa.50014056fd3f341c.
> > /loopback> cd naa.50014056fd3f341c/luns
> > /loopback/naa...fd3f341c/luns> create /backstores/block/loop0
> > Created LUN 0.
> > /loopback/naa...fd3f341c/luns> exit
> > 
> >  # Create a test cgroup and set it to a test backstore
> > $ CG_PATH=/sys/fs/cgroup/blkio/test
> > $ BS_PATH=/sys/kernel/config/target/core/iblock_0/loop0/attrib/blkio_cgroup
> > $ mkdir -p $CG_PATH
> > $ bash -c "echo 0 > $CG_PATH/tasks && echo 1 > $BS_PATH"
> > $ cat $BS_PATH
> > /test
> > 
> >  # Set 6 MB/sec for the backstore
> > $ echo "7:0 6291456" > $CG_PATH/blkio.throttle.read_bps_device
> > 
> >  # Check that everything work as expected
> > $ dd if=/dev/sda of=/dev/null iflag=direct bs=1M count=100
> > 100+0 records in
> > 100+0 records out
> > 104857600 bytes (105 MB, 100 MiB) copied, 16.6958 s, 6.3 MB/s
> > 
> > Signed-off-by: Andrei Vagin <ava...@openvz.org>
> > ---
> >  drivers/target/target_core_device.c  | 58 
> > 
> >  drivers/target/target_core_iblock.c  | 38 +++
> >  include/target/target_core_backend.h |  4 +++
> >  include/target/target_core_base.h|  2 ++
> >  4 files changed, 102 insertions(+)
> > 
> > diff --git a/drivers/target/target_core_device.c 
> > b/drivers/target/target_core_device.c
> > index cb17aeb..fb1e940 100644
> > --- a/drivers/target/target_core_device.c
> > +++ b/drivers/target/target_core_device.c
> > @@ -831,6 +831,58 @@ int se_dev_set_emulate_fua_read(struct se_device *dev, 
> > int flag)
> >  }
> >  EXPORT_SYMBOL(se_dev_set_emulate_fua_read);
> >  
> > +ssize_t se_dev_blkio_cgroup_show(struct se_device *dev, char *page)
> > +{
> > +   int rb;
> > +
> > +   read_lock(>dev_attrib_lock);
> > +   if (dev->dev_attrib.blk_css) {
> > +   rb = cgroup_path(dev->dev_attrib.blk_css->cgroup,
> > +   page, PAGE_SIZE - 1);
> > +   if (rb == 0)
> > +   rb = strlen(page);
> 
> Maybe it would worth to use strnlen(page, PAGE_SIZE - 2); here?

cgroup_path returns a null terminated string, so I don't understand a
reason for these changes.

> 
> > +   page[rb] = '\n';
> > +   page[rb + 1] = 0;
> > +   rb++;
> > +   } else
> > +   rb = 0;
> > +   read_unlock(>dev_attrib_lock);
> > +
> > +   return rb;
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 3/3] Revert "target/iscsi: add an ability to set io limits for iscsi targets"

2018-05-04 Thread Andrei Vagin

This reverts commit 8edf0797df883502bdd885090bfbd34920866d62.
---
 drivers/target/iscsi/iscsi_target_configfs.c | 46 
 drivers/target/iscsi/iscsi_target_login.c| 34 +---
 drivers/target/iscsi/iscsi_target_tpg.c  | 36 --
 drivers/target/iscsi/iscsi_target_tpg.h  |  2 --
 include/target/iscsi/iscsi_target_core.h |  2 --
 kernel/cgroup.c  |  1 -
 6 files changed, 1 insertion(+), 120 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_configfs.c 
b/drivers/target/iscsi/iscsi_target_configfs.c
index f43e975..cf68964 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -1412,51 +1412,6 @@ TPG_PARAM_ATTR(IFMarkInt, S_IRUGO | S_IWUSR);
 DEF_TPG_PARAM(OFMarkInt);
 TPG_PARAM_ATTR(OFMarkInt, S_IRUGO | S_IWUSR);
 
-static ssize_t iscsi_tpg_param_show_BlkioCgroup(
-   struct se_portal_group *se_tpg,
-   char *page)
-{
-   struct iscsi_portal_group *tpg = container_of(se_tpg,
-   struct iscsi_portal_group, tpg_se_tpg);
-   ssize_t rb;
-
-   if (iscsit_get_tpg(tpg) < 0)
-   return -EINVAL;
-
-   rb = iscsit_ta_tpg_show_blkcg(tpg, page);
-   iscsit_put_tpg(tpg);
-   return rb;
-}
-
-static ssize_t iscsi_tpg_param_store_BlkioCgroup(
-   struct se_portal_group *se_tpg,
-   const char *page,
-   size_t count)
-{
-   struct iscsi_portal_group *tpg = container_of(se_tpg,
-   struct iscsi_portal_group, tpg_se_tpg);
-   u32 val;
-   int ret;
-
-   if (iscsit_get_tpg(tpg) < 0)
-   return -EINVAL;
-
-   ret = kstrtou32(page, 0, );
-   if (ret)
-   goto out;
-   ret = iscsit_ta_tpg_set_blkcg(tpg, val);
-   if (ret < 0)
-   goto out;
-
-   iscsit_put_tpg(tpg);
-   return count;
-out:
-   iscsit_put_tpg(tpg);
-   return ret;
-}
-
-TPG_PARAM_ATTR(BlkioCgroup, S_IRUGO | S_IWUSR);
-
 static struct configfs_attribute *lio_target_tpg_param_attrs[] = {
_tpg_param_AuthMethod.attr,
_tpg_param_HeaderDigest.attr,
@@ -1479,7 +1434,6 @@ static struct configfs_attribute 
*lio_target_tpg_param_attrs[] = {
_tpg_param_OFMarker.attr,
_tpg_param_IFMarkInt.attr,
_tpg_param_OFMarkInt.attr,
-   _tpg_param_BlkioCgroup.attr,
NULL,
 };
 
diff --git a/drivers/target/iscsi/iscsi_target_login.c 
b/drivers/target/iscsi/iscsi_target_login.c
index 4f59416..c20b561 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -20,7 +20,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -713,18 +712,8 @@ static void iscsi_post_login_start_timers(struct 
iscsi_conn *conn)
 
 int iscsit_start_kthreads(struct iscsi_conn *conn)
 {
-   struct iscsi_portal_group *tpg = conn->tpg;
-   struct cgroup_subsys_state *blk_css = NULL;
int ret = 0;
 
-   if (iscsit_get_tpg(tpg) < 0)
-   return -EINVAL;
-   if (tpg->blk_css) {
-   blk_css = tpg->blk_css;
-   css_get(blk_css);
-   }
-   iscsit_put_tpg(tpg);
-
spin_lock(_global->ts_bitmap_lock);
conn->bitmap_id = bitmap_find_free_region(iscsit_global->ts_bitmap,
ISCSIT_BITMAP_BITS, get_order(1));
@@ -733,8 +722,7 @@ int iscsit_start_kthreads(struct iscsi_conn *conn)
if (conn->bitmap_id < 0) {
pr_err("bitmap_find_free_region() failed for"
   " iscsit_start_kthreads()\n");
-   ret = -ENOMEM;
-   goto put_blk_css;
+   return -ENOMEM;
}
 
conn->tx_thread = kthread_run(iscsi_target_tx_thread, conn,
@@ -744,11 +732,6 @@ int iscsit_start_kthreads(struct iscsi_conn *conn)
ret = PTR_ERR(conn->tx_thread);
goto out_bitmap;
}
-   if (blk_css) {
-   ret = cgroup_kernel_attach(blk_css->cgroup, conn->tx_thread);
-   if (ret < 0)
-   goto out_tx;
-   }
conn->tx_thread_active = true;
 
conn->rx_thread = kthread_run(iscsi_target_rx_thread, conn,
@@ -758,21 +741,9 @@ int iscsit_start_kthreads(struct iscsi_conn *conn)
ret = PTR_ERR(conn->rx_thread);
goto out_tx;
}
-   if (blk_css) {
-   ret = cgroup_kernel_attach(blk_css->cgroup, conn->rx_thread);
-   if (ret < 0)
-   goto out_rx;
-   }
conn->rx_thread_active = true;
 
-   if (blk_css)
-   css_put(blk_css);
-
return 0;
-out_rx:
-   send_sig(SIGINT, conn->rx_thread, 1);
-   kthread_stop(conn->rx_thread);
-   conn->rx_thread_active = false;
 out_tx:
send_sig(SIGINT, conn->tx_thread, 1);
kthread_stop(conn->tx_thread);
@@ -782,9 +753,6 @@ out_bitmap:

[Devel] [PATCH 1/3] blkcg: implement bio_associate_blkcg()

2018-05-04 Thread Andrei Vagin

From: Tejun Heo <t...@kernel.org>

ML: 1d933cf096e3aea15f1aec8297657b7a846fab63

Currently, a bio can only be associated with the io_context and
g
of %current using bio_associate_current().  This is too restrictive
for cgroup writeback support.  Implement bio_associate_blkcg() which
associates a bio with the specified blkcg.

bio_associate_blkcg() leaves the io_context unassociated.
bio_associate_current() is updated so that it considers a bio as
already associated if it has a blkcg_css, instead of an io_context,
associated with it.

Signed-off-by: Tejun Heo <t...@kernel.org>
Cc: Jens Axboe <ax...@kernel.dk>
Cc: Vivek Goyal <vgo...@redhat.com>
Signed-off-by: Jens Axboe <ax...@fb.com>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 fs/bio.c| 25 -
 include/linux/bio.h |  3 +++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/fs/bio.c b/fs/bio.c
index bfa6cdc..b9b6a2c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -2126,6 +2126,29 @@ bad:
 EXPORT_SYMBOL(bioset_create);
 
 #ifdef CONFIG_BLK_CGROUP
+
+/**
+ * bio_associate_blkcg - associate a bio with the specified blkcg
+ * @bio: target bio
+ * @blkcg_css: css of the blkcg to associate
+ *
+ * Associate @bio with the blkcg specified by @blkcg_css.  Block layer will
+ * treat @bio as if it were issued by a task which belongs to the blkcg.
+ *
+ * This function takes an extra reference of @blkcg_css which will be put
+ * when @bio is released.  The caller must own @bio and is responsible for
+ * synchronizing calls to this function.
+ */
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
+{
+   if (unlikely(bio->bi_css))
+   return -EBUSY;
+   css_get(blkcg_css);
+   bio->bi_css = blkcg_css;
+   return 0;
+}
+EXPORT_SYMBOL(bio_associate_blkcg);
+
 /**
  * bio_associate_current - associate a bio with %current
  * @bio: target bio
@@ -2144,7 +2167,7 @@ int bio_associate_current(struct bio *bio)
struct io_context *ioc;
struct cgroup_subsys_state *css;
 
-   if (bio->bi_ioc)
+   if (bio->bi_css)
return -EBUSY;
 
ioc = current->io_context;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 34be583..877f8de 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -319,9 +319,12 @@ extern void bvec_free(mempool_t *, struct bio_vec *, 
unsigned int);
 extern unsigned int bvec_nr_vecs(unsigned short idx);
 
 #ifdef CONFIG_BLK_CGROUP
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state 
*blkcg_css);
 int bio_associate_current(struct bio *bio);
 void bio_disassociate_task(struct bio *bio);
 #else  /* CONFIG_BLK_CGROUP */
+static inline int bio_associate_blkcg(struct bio *bio,
+   struct cgroup_subsys_state *blkcg_css) { return 0; }
 static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
 static inline void bio_disassociate_task(struct bio *bio) { }
 #endif /* CONFIG_BLK_CGROUP */
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 2/3] target: allow to set a blkio cgroup for a backstore

2018-05-04 Thread Andrei Vagin

The Block I/O (blkio) subsystem controls and monitors access to I/O on
block devices by tasks in cgroups. With the introduced changes, a
backstore will be like a task in a specified group.

One of interesting feature is an ability to set limits on a number of
I/O operations and bytes per seconds.

A new attribute is added for backstores, it is called blkio_cgroup.

If we write 1 to the attribute file, a blkio cgroup from the current
process is attached to the backstore.

If we write 0 to the attribute file, a current group will be detached
from the backstore.

When we know a blkio cgroup the only thing, what we need to do to make it
work, is to set this group for bio-s.

How to use:
 # Create a test backstore
$ targetcli
targetcli shell version 2.1.fb46
Copyright 2011-2013 by Datera, Inc and others.
/backstores/block> create dev=/dev/loop0 loop0
Created block storage object loop0 using /dev/loop0.
/backstores/block> cd /loopback
/loopback> create
Created target naa.50014056fd3f341c.
/loopback> cd naa.50014056fd3f341c/luns
/loopback/naa...fd3f341c/luns> create /backstores/block/loop0
Created LUN 0.
/loopback/naa...fd3f341c/luns> exit

 # Create a test cgroup and set it to a test backstore
$ CG_PATH=/sys/fs/cgroup/blkio/test
$ BS_PATH=/sys/kernel/config/target/core/iblock_0/loop0/attrib/blkio_cgroup
$ mkdir -p $CG_PATH
$ bash -c "echo 0 > $CG_PATH/tasks && echo 1 > $BS_PATH"
$ cat $BS_PATH
/test

 # Set 6 MB/sec for the backstore
$ echo "7:0 6291456" > $CG_PATH/blkio.throttle.read_bps_device

 # Check that everything work as expected
$ dd if=/dev/sda of=/dev/null iflag=direct bs=1M count=100
100+0 records in
100+0 records out
104857600 bytes (105 MB, 100 MiB) copied, 16.6958 s, 6.3 MB/s

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_device.c  | 58 
 drivers/target/target_core_iblock.c  | 38 +++
 include/target/target_core_backend.h |  4 +++
 include/target/target_core_base.h|  2 ++
 4 files changed, 102 insertions(+)

diff --git a/drivers/target/target_core_device.c 
b/drivers/target/target_core_device.c
index cb17aeb..fb1e940 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -831,6 +831,58 @@ int se_dev_set_emulate_fua_read(struct se_device *dev, int 
flag)
 }
 EXPORT_SYMBOL(se_dev_set_emulate_fua_read);
 
+ssize_t se_dev_blkio_cgroup_show(struct se_device *dev, char *page)
+{
+   int rb;
+
+   read_lock(>dev_attrib_lock);
+   if (dev->dev_attrib.blk_css) {
+   rb = cgroup_path(dev->dev_attrib.blk_css->cgroup,
+   page, PAGE_SIZE - 1);
+   if (rb == 0)
+   rb = strlen(page);
+   page[rb] = '\n';
+   page[rb + 1] = 0;
+   rb++;
+   } else
+   rb = 0;
+   read_unlock(>dev_attrib_lock);
+
+   return rb;
+}
+EXPORT_SYMBOL(se_dev_blkio_cgroup_show);
+
+ssize_t se_dev_blkio_cgroup_store(struct se_device *dev,
+   const char *page, size_t count)
+{
+   struct cgroup_subsys_state *css, *pcss;
+   int ret;
+   u32 val;
+
+   ret = kstrtou32(page, 0, );
+   if (ret < 0)
+   return ret;
+
+   if (val > 1)
+   return -EINVAL;
+   if (val == 1)
+   css = task_get_css(current, blkio_subsys_id);
+   else
+   css = NULL;
+
+   write_lock(>dev_attrib_lock);
+   pcss = dev->dev_attrib.blk_css;
+   dev->dev_attrib.blk_css = css;
+   write_unlock(>dev_attrib_lock);
+
+   if (pcss)
+   css_put(pcss);
+
+   return count;
+}
+EXPORT_SYMBOL(se_dev_blkio_cgroup_store);
+
+
 int se_dev_set_emulate_write_cache(struct se_device *dev, int flag)
 {
if (flag != 0 && flag != 1) {
@@ -1507,6 +1559,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, 
const char *name)
INIT_LIST_HEAD(>state_list);
INIT_LIST_HEAD(>qf_cmd_list);
INIT_LIST_HEAD(>g_dev_node);
+   rwlock_init(>dev_attrib_lock);
spin_lock_init(>execute_task_lock);
spin_lock_init(>delayed_cmd_lock);
spin_lock_init(>dev_reservation_lock);
@@ -1555,6 +1608,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, 
const char *name)
dev->dev_attrib.unmap_zeroes_data =
DA_UNMAP_ZEROES_DATA_DEFAULT;
dev->dev_attrib.max_write_same_len = DA_MAX_WRITE_SAME_LEN;
+   dev->dev_attrib.blk_css = NULL;
 
xcopy_lun = >xcopy_lun;
xcopy_lun->lun_se_dev = dev;
@@ -1728,6 +1782,10 @@ void target_free_device(struct se_device *dev)
if (dev->transport->free_prot)
dev->transport->free_prot(dev);
 
+   if (dev->dev_attrib.blk_css)
+   css_put(dev->dev_a

[Devel] [PATCH 0/3] drivers/target: move setting of a blkio cgroup to the backstore level

2018-05-04 Thread Andrei Vagin

Currently we can set a blkio cgroup for iscsi targets only.
With this series, it will work for all targets.

Andrei Vagin (3):
  blkcg: implement bio_associate_blkcg()
  target: allow to set a blkio cgroup for a backstore
  Revert "target/iscsi: add an ability to set io limits for iscsi
targets"

 drivers/target/iscsi/iscsi_target_configfs.c | 46 --
 drivers/target/iscsi/iscsi_target_login.c| 34 +---
 drivers/target/iscsi/iscsi_target_tpg.c  | 36 -
 drivers/target/iscsi/iscsi_target_tpg.h  |  2 -
 drivers/target/target_core_device.c  | 58 
 drivers/target/target_core_iblock.c  | 38 ++
 fs/bio.c | 25 +++-
 include/linux/bio.h  |  3 ++
 include/target/iscsi/iscsi_target_core.h |  2 -
 include/target/target_core_backend.h |  4 ++
 include/target/target_core_base.h|  2 +
 kernel/cgroup.c  |  1 -
 12 files changed, 130 insertions(+), 121 deletions(-)

-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH] target: Drop signal_pending checks after interruptible lock acquire

2018-04-20 Thread Andrei Vagin

From: Nicholas Bellinger <n...@linux-iscsi.org>

ML: ee7619f2eb21304dcc846b8dc8f8c3d6cbe11792

Once upon a time, iscsit_get_tpg() was using an un-interruptible
lock.  The signal_pending() usage was a check to allow userspace
to break out of the operation with SIGINT.

AFAICT, there's no reason why this is necessary anymore, and as
reported by Alexey can be potentially dangerous.  Also, go ahead
and drop the other two problematic cases within iscsit_access_np()
and sbc_compare_and_write() as well.

Found by Linux Driver Verification project (linuxtesting.org).

https://pmc.acronis.com/browse/VSTOR-9577

Reported-by: Alexey Khoroshilov <khoroshi...@ispras.ru>
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/iscsi/iscsi_target.c | 2 +-
 drivers/target/iscsi/iscsi_target_tpg.c | 5 +
 drivers/target/target_core_sbc.c| 2 +-
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c 
b/drivers/target/iscsi/iscsi_target.c
index 2ec68f2..45fda61 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -232,7 +232,7 @@ int iscsit_access_np(struct iscsi_np *np, struct 
iscsi_portal_group *tpg)
 * Here we serialize access across the TIQN+TPG Tuple.
 */
ret = down_interruptible(>np_login_sem);
-   if ((ret != 0) || signal_pending(current))
+   if (ret != 0)
return -1;
 
spin_lock_bh(>tpg_state_lock);
diff --git a/drivers/target/iscsi/iscsi_target_tpg.c 
b/drivers/target/iscsi/iscsi_target_tpg.c
index 792d3ee..8966816 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c
@@ -164,10 +164,7 @@ struct iscsi_portal_group *iscsit_get_tpg_from_np(
 int iscsit_get_tpg(
struct iscsi_portal_group *tpg)
 {
-   int ret;
-
-   ret = mutex_lock_interruptible(>tpg_access_lock);
-   return ((ret != 0) || signal_pending(current)) ? -1 : 0;
+   return mutex_lock_interruptible(>tpg_access_lock);
 }
 
 void iscsit_put_tpg(struct iscsi_portal_group *tpg)
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 5578e4d..f7f5585 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -567,7 +567,7 @@ sbc_compare_and_write(struct se_cmd *cmd)
 * comparision using SGLs at cmd->t_bidi_data_sg..
 */
rc = down_interruptible(>caw_sem);
-   if ((rc != 0) || signal_pending(current)) {
+   if (rc != 0) {
cmd->transport_complete_callback = NULL;
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
}
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 10/10] target: check XCOPY segment descriptor CSCD IDs

2018-04-02 Thread Andrei Vagin

From: David Disseldorp <dd...@suse.de>

ML: f184210bca6c9d0091ff5e5629dea4cbb8a17c0f

Ensure that the segment descriptor CSCD descriptor ID values correspond
to CSCD descriptor entries located in the XCOPY command parameter list.
SPC4r37 6.4.6.1 Table 150 specifies this range as h to 07FFh, where
the CSCD descriptor location in the parameter list can be located via:
16 + (id * 32)

Signed-off-by: David Disseldorp <dd...@suse.de>
Reviewed-by: Christoph Hellwig <h...@lst.de>
[ bvanassche: inserted "; " in the format string of an error message
  and also moved a "||" operator from the start of a line to the end
  of the previous line ]
Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_xcopy.c | 8 
 drivers/target/target_core_xcopy.h | 6 ++
 2 files changed, 14 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c 
b/drivers/target/target_core_xcopy.c
index a688b75..afced9c 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -305,6 +305,14 @@ static int target_xcopy_parse_segdesc_02(struct se_cmd 
*se_cmd, struct xcopy_op
 
xop->stdi = get_unaligned_be16([4]);
xop->dtdi = get_unaligned_be16([6]);
+
+   if (xop->stdi > XCOPY_CSCD_DESC_ID_LIST_OFF_MAX ||
+   xop->dtdi > XCOPY_CSCD_DESC_ID_LIST_OFF_MAX) {
+   pr_err("XCOPY segment desc 0x02: unsupported CSCD ID > 0x%x; 
stdi: %hu dtdi: %hu\n",
+   XCOPY_CSCD_DESC_ID_LIST_OFF_MAX, xop->stdi, xop->dtdi);
+   return -EINVAL;
+   }
+
pr_debug("XCOPY seg desc 0x02: desc_len: %hu stdi: %hu dtdi: %hu, DC: 
%d\n",
desc_len, xop->stdi, xop->dtdi, dc);
 
diff --git a/drivers/target/target_core_xcopy.h 
b/drivers/target/target_core_xcopy.h
index b5baecc..654b76a 100644
--- a/drivers/target/target_core_xcopy.h
+++ b/drivers/target/target_core_xcopy.h
@@ -4,6 +4,12 @@
 #define XCOPY_NAA_IEEE_REGEX_LEN   16
 #define XCOPY_MAX_SECTORS  1024
 
+/*
+ * SPC4r37 6.4.6.1
+ * Table 150 — CSCD descriptor ID values
+ */
+#define XCOPY_CSCD_DESC_ID_LIST_OFF_MAX0x07FF
+
 enum xcopy_origin_list {
XCOL_SOURCE_RECV_OP = 0x01,
XCOL_DEST_RECV_OP = 0x02,
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 07/10] target: Fix a deadlock between the XCOPY code and iSCSI session shutdown

2018-04-02 Thread Andrei Vagin

From: Bart Van Assche <bart.vanass...@sandisk.com>

ML: d877d7275be34ad70ce92bcbb4bb36cec77ed004

Move the code for parsing an XCOPY command from the context of
the iSCSI receiver thread to the context of the XCOPY workqueue.
Keep the simple XCOPY checks in the context of the iSCSI receiver
thread. Move the code for allocating and freeing struct xcopy_op
from the code that parses an XCOPY command to its caller.

This patch fixes the following deadlock:

==
[ INFO: possible circular locking dependency detected ]
4.10.0-rc7-dbg+ #1 Not tainted
---
rmdir/13321 is trying to acquire lock:
 (>cmdsn_mutex){+.+.+.}, at: [] 
iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]

but task is already holding lock:
 (>s_type->i_mutex_key#14){++}, at: [] 
vfs_rmdir+0x50/0x140

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:
-> #1 (>s_type->i_mutex_key#14){++}:
 lock_acquire+0x71/0x90
 down_write+0x3f/0x70
 configfs_depend_item+0x3a/0xb0 [configfs]
 target_depend_item+0x13/0x20 [target_core_mod]
 target_xcopy_locate_se_dev_e4+0xdd/0x1a0 [target_core_mod]
 target_do_xcopy+0x34b/0x970 [target_core_mod]
 __target_execute_cmd+0x22/0xa0 [target_core_mod]
 target_execute_cmd+0x233/0x2c0 [target_core_mod]
 iscsit_execute_cmd+0x208/0x270 [iscsi_target_mod]
 iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
 iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
 iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
 kthread+0x102/0x140
 ret_from_fork+0x31/0x40

-> #0 (>cmdsn_mutex){+.+.+.}:
 __lock_acquire+0x10e6/0x1260
 lock_acquire+0x71/0x90
 mutex_lock_nested+0x5f/0x670
 iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
 iscsit_close_session+0xac/0x200 [iscsi_target_mod]
 lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
 target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
 core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
 target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
 config_item_release+0x5a/0xc0 [configfs]
 config_item_put+0x1d/0x1f [configfs]
 configfs_rmdir+0x1a6/0x300 [configfs]
 vfs_rmdir+0xb7/0x140
 do_rmdir+0x1f4/0x200
 SyS_rmdir+0x11/0x20
 entry_SYSCALL_64_fastpath+0x23/0xc6

other info that might help us debug this:

 Possible unsafe locking scenario:
   CPU0CPU1
   
  lock(>s_type->i_mutex_key#14);
   lock(>cmdsn_mutex);
   lock(>s_type->i_mutex_key#14);
  lock(>cmdsn_mutex);

 *** DEADLOCK ***

3 locks held by rmdir/13321:
 #0:  (sb_writers#10){.+.+.+}, at: [] mnt_want_write+0x1f/0x50
 #1:  (_group_class[depth - 1]#2/1){+.+.+.}, at: [] 
do_rmdir+0x15e/0x200
 #2:  (>s_type->i_mutex_key#14){++}, at: [] 
vfs_rmdir+0x50/0x140

stack backtrace:
CPU: 2 PID: 13321 Comm: rmdir Not tainted 4.10.0-rc7-dbg+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.0.0-prebuilt.qemu-project.org 04/01/2014
Call Trace:
 dump_stack+0x86/0xc3
 print_circular_bug+0x1c7/0x220
 __lock_acquire+0x10e6/0x1260
 lock_acquire+0x71/0x90
 mutex_lock_nested+0x5f/0x670
 iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
 iscsit_close_session+0xac/0x200 [iscsi_target_mod]
 lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
 target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
 core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
 target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
 config_item_release+0x5a/0xc0 [configfs]
 config_item_put+0x1d/0x1f [configfs]
 configfs_rmdir+0x1a6/0x300 [configfs]
 vfs_rmdir+0xb7/0x140
 do_rmdir+0x1f4/0x200
 SyS_rmdir+0x11/0x20
 entry_SYSCALL_64_fastpath+0x23/0xc6

Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>
Cc: Hannes Reinecke <h...@suse.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Andy Grover <agro...@redhat.com>
Cc: David Disseldorp <dd...@suse.de>
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>

Conflicts:
drivers/target/target_core_xcopy.c

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_xcopy.c | 106 -
 1 file changed, 69 insertions(+), 37 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c 
b/drivers/target/target_core_xcopy.c
index 3ac8c45..495c043 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -49,6 +49,8 @@ extern struct list_head g_device_list;
  */
 extern struct configfs_subsystem *target_core_subsystem[];
 
+static sense_reason_t target_parse_xcopy_cmd(struct xcopy_op *xop);
+
 static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf)
 {
int off = 0;
@@ -811,13 +813,24 @@ static int target_xcopy_write_destination(
 static void targ

[Devel] [PATCH 08/10] target: return UNSUPPORTED TARGET/SEGMENT DESC TYPE CODE sense

2018-04-02 Thread Andrei Vagin

From: David Disseldorp <dd...@suse.de>

ML: c243849720ac237e9e7191fe57f619bb3a871d4c

Use UNSUPPORTED TARGET DESCRIPTOR TYPE CODE and UNSUPPORTED SEGMENT
DESCRIPTOR TYPE CODE additional sense codes if a descriptor type in an
XCOPY request is not supported, as specified in spc4r37 6.4.5 and 6.4.6.

Signed-off-by: David Disseldorp <dd...@suse.de>
Reviewed-by: Christoph Hellwig <h...@lst.de>
Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_xcopy.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c 
b/drivers/target/target_core_xcopy.c
index 495c043..9a6e7d8 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -211,6 +211,7 @@ static int target_xcopy_parse_target_descriptors(struct 
se_cmd *se_cmd,
if (offset != 0) {
pr_err("XCOPY target descriptor list length is not"
" multiple of %d\n", XCOPY_TARGET_DESC_LEN);
+   *sense_ret = TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE;
return -EINVAL;
}
if (tdll > 64) {
@@ -244,6 +245,7 @@ static int target_xcopy_parse_target_descriptors(struct 
se_cmd *se_cmd,
default:
pr_err("XCOPY unsupported descriptor type code:"
" 0x%02x\n", desc[0]);
+   *sense_ret = TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE;
goto out;
}
}
@@ -334,6 +336,7 @@ static int target_xcopy_parse_segment_descriptors(struct 
se_cmd *se_cmd,
if (offset != 0) {
pr_err("XCOPY segment descriptor list length is not"
" multiple of %d\n", XCOPY_SEGMENT_DESC_LEN);
+   *sense_ret = TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE;
return -EINVAL;
}
if (sdll > RCR_OP_MAX_SG_DESC_COUNT * XCOPY_SEGMENT_DESC_LEN) {
@@ -361,6 +364,7 @@ static int target_xcopy_parse_segment_descriptors(struct 
se_cmd *se_cmd,
default:
pr_err("XCOPY unsupported segment descriptor"
"type: 0x%02x\n", desc[0]);
+   *sense_ret = TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE;
goto out;
}
}
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 06/10] target: check for XCOPY parameter truncation

2018-04-02 Thread Andrei Vagin

From: David Disseldorp <dd...@suse.de>

ML: f94fd098f674b78c29f482da1999d8de0c93c74e

Check for XCOPY header, CSCD descriptor and segment descriptor list
truncation, and respond accordingly.

SPC4r37 6.4.1 EXTENDED COPY(LID4) states (also applying to LID1 reqs):
  If the parameter list length causes truncation of the parameter list,
  then the copy manager shall transfer no data and shall terminate the
  EXTENDED COPY command with CHECK CONDITION status, with the sense key
  set to ILLEGAL REQUEST, and the additional sense code set to PARAMETER
  LIST LENGTH ERROR.

This behaviour can be tested using the libiscsi ExtendedCopy.ParamHdr
test.

Signed-off-by: David Disseldorp <dd...@suse.de>
Reviewed-by: Christoph Hellwig <h...@lst.de>
Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_xcopy.c | 14 ++
 drivers/target/target_core_xcopy.h |  1 +
 2 files changed, 15 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c 
b/drivers/target/target_core_xcopy.c
index f2ea6cc..3ac8c45 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -918,6 +918,12 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
return TCM_UNSUPPORTED_SCSI_OPCODE;
}
 
+   if (se_cmd->data_length < XCOPY_HDR_LEN) {
+   pr_err("XCOPY parameter truncation: length %u < hdr_len %u\n",
+   se_cmd->data_length, XCOPY_HDR_LEN);
+   return TCM_PARAMETER_LIST_LENGTH_ERROR;
+   }
+
xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL);
if (!xop) {
pr_err("Unable to allocate xcopy_op\n");
@@ -953,6 +959,14 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
goto out;
}
 
+   if (se_cmd->data_length < (XCOPY_HDR_LEN + tdll + sdll + inline_dl)) {
+   pr_err("XCOPY parameter truncation: data length %u too small "
+   "for tdll: %hu sdll: %u inline_dl: %u\n",
+   se_cmd->data_length, tdll, sdll, inline_dl);
+   ret = TCM_PARAMETER_LIST_LENGTH_ERROR;
+   goto out;
+   }
+
pr_debug("Processing XCOPY with list_id: 0x%02x list_id_usage: 0x%02x"
" tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage,
tdll, sdll, inline_dl);
diff --git a/drivers/target/target_core_xcopy.h 
b/drivers/target/target_core_xcopy.h
index 700a981..b5baecc 100644
--- a/drivers/target/target_core_xcopy.h
+++ b/drivers/target/target_core_xcopy.h
@@ -1,3 +1,4 @@
+#define XCOPY_HDR_LEN  16
 #define XCOPY_TARGET_DESC_LEN  32
 #define XCOPY_SEGMENT_DESC_LEN 28
 #define XCOPY_NAA_IEEE_REGEX_LEN   16
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 04/10] target: use XCOPY segment descriptor CSCD IDs

2018-04-02 Thread Andrei Vagin

From: David Disseldorp <dd...@suse.de>

ML: 66640d35c1e4ef3c96ba5edb3c5e2ff8ab812e7a

The XCOPY specification in SPC4r37 states that the XCOPY source and
destination device(s) should be derived from the copy source and copy
destination (CSCD) descriptor IDs in the XCOPY segment descriptor.

The CSCD IDs are generally (for block -> block copies), indexes into
the corresponding CSCD descriptor list, e.g.
=
EXTENDED COPY Header
=
CSCD Descriptor List
- entry 0
  + LU ID <--<--\
- entry 1   |
  + LU ID <__<_ |
=  ||
Segment Descriptor List||
- segment 0||
  + src CSCD ID = 0 ->-+/
  + dest CSCD ID = 1 ___>__|
  + len
  + src lba
  + dest lba
=

Currently LIO completely ignores the src and dest CSCD IDs in the
Segment Descriptor List, and instead assumes that the first entry in the
CSCD list corresponds to the source, and the second to the destination.

This commit removes this assumption, by ensuring that the Segment
Descriptor List is parsed prior to processing the CSCD Descriptor List.
CSCD Descriptor List processing is modified to compare the current list
index with the previously obtained src and dest CSCD IDs.

Additionally, XCOPY requests where the src and dest CSCD IDs refer to
the CSCD Descriptor List entry can now be successfully processed.

Fixes: cbf031f ("target: Add support for EXTENDED_COPY copy offload")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=191381
Signed-off-by: David Disseldorp <dd...@suse.de>
Reviewed-by: Christoph Hellwig <h...@lst.de>
Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_xcopy.c | 79 +++---
 1 file changed, 48 insertions(+), 31 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c 
b/drivers/target/target_core_xcopy.c
index 5f97a9a..d8d53c4 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -109,7 +109,7 @@ static int target_xcopy_locate_se_dev_e4(const unsigned 
char *dev_wwn,
 }
 
 static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct 
xcopy_op *xop,
-   unsigned char *p, bool src)
+   unsigned char *p, unsigned short cscd_index)
 {
unsigned char *desc = p;
unsigned short ript;
@@ -154,7 +154,13 @@ static int target_xcopy_parse_tiddesc_e4(struct se_cmd 
*se_cmd, struct xcopy_op
return -EINVAL;
}
 
-   if (src) {
+   if (cscd_index != xop->stdi && cscd_index != xop->dtdi) {
+   pr_debug("XCOPY 0xe4: ignoring CSCD entry %d - neither src nor "
+"dest\n", cscd_index);
+   return 0;
+   }
+
+   if (cscd_index == xop->stdi) {
memcpy(>src_tid_wwn[0], [8], 
XCOPY_NAA_IEEE_REGEX_LEN);
/*
 * Determine if the source designator matches the local device
@@ -166,10 +172,15 @@ static int target_xcopy_parse_tiddesc_e4(struct se_cmd 
*se_cmd, struct xcopy_op
pr_debug("XCOPY 0xe4: Set xop->src_dev %p from source"
" received xop\n", xop->src_dev);
}
-   } else {
+   }
+
+   if (cscd_index == xop->dtdi) {
memcpy(>dst_tid_wwn[0], [8], 
XCOPY_NAA_IEEE_REGEX_LEN);
/*
-* Determine if the destination designator matches the local 
device
+* Determine if the destination designator matches the local
+* device. If @cscd_index corresponds to both source (stdi) and
+* destination (dtdi), or dtdi comes after stdi, then
+* XCOL_DEST_RECV_OP wins.
 */
if (!memcmp(>local_dev_wwn[0], >dst_tid_wwn[0],
XCOPY_NAA_IEEE_REGEX_LEN)) {
@@ -189,9 +200,9 @@ static int target_xcopy_parse_target_descriptors(struct 
se_cmd *se_cmd,
 {
struct se_device *local_dev = se_cmd->se_dev;
unsigned char *desc = p;
-   int offset = tdll % XCOPY_TARGET_DESC_LEN, rc, ret = 0;
+   int offset = tdll % XCOPY_TARGET_DESC_LEN, rc;
+   unsigned short cscd_index = 0;
unsigned short start = 0;
-   bool src = true;
 
*sense_ret = TCM_INVALID_PARAMETER_LIST;
 
@@ -214,25 +225,19 @@ static int target_xcopy_parse_target_descriptors(struct 
se_cmd *se_cmd,
 
while (start < tdll) {
/*
-* Check target descriptor identification with 0xE4 type with
-

[Devel] [PATCH 03/10] target: bounds check XCOPY segment descriptor list

2018-04-02 Thread Andrei Vagin

From: David Disseldorp <dd...@suse.de>

ML: af9f62c1686268c0517b289274d38f3a03bebd2a

Check the length of the XCOPY request segment descriptor list against
the value advertised via the MAXIMUM SEGMENT DESCRIPTOR COUNT field in
the RECEIVE COPY OPERATING PARAMETERS response.

spc4r37 6.4.3.5 states:
  If the number of segment descriptors exceeds the allowed number, the
  copy manager shall terminate the command with CHECK CONDITION status,
  with the sense key set to ILLEGAL REQUEST, and the additional sense
  code set to TOO MANY SEGMENT DESCRIPTORS.

This functionality is testable using the libiscsi
ExtendedCopy.DescrLimits test.

Signed-off-by: David Disseldorp <dd...@suse.de>
Reviewed-by: Christoph Hellwig <h...@lst.de>
Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_xcopy.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c 
b/drivers/target/target_core_xcopy.c
index 5a68f2a..5f97a9a 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -307,17 +307,26 @@ static int target_xcopy_parse_segdesc_02(struct se_cmd 
*se_cmd, struct xcopy_op
 
 static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd,
struct xcopy_op *xop, unsigned char *p,
-   unsigned int sdll)
+   unsigned int sdll, sense_reason_t *sense_ret)
 {
unsigned char *desc = p;
unsigned int start = 0;
int offset = sdll % XCOPY_SEGMENT_DESC_LEN, rc, ret = 0;
 
+   *sense_ret = TCM_INVALID_PARAMETER_LIST;
+
if (offset != 0) {
pr_err("XCOPY segment descriptor list length is not"
" multiple of %d\n", XCOPY_SEGMENT_DESC_LEN);
return -EINVAL;
}
+   if (sdll > RCR_OP_MAX_SG_DESC_COUNT * XCOPY_SEGMENT_DESC_LEN) {
+   pr_err("XCOPY supports %u segment descriptor(s), sdll: %u too"
+   " large..\n", RCR_OP_MAX_SG_DESC_COUNT, sdll);
+   /* spc4r37 6.4.3.5 SEGMENT DESCRIPTOR LIST LENGTH field */
+   *sense_ret = TCM_TOO_MANY_SEGMENT_DESCS;
+   return -EINVAL;
+   }
 
while (start < sdll) {
/*
@@ -948,7 +957,8 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
seg_desc = [16];
seg_desc += (rc * XCOPY_TARGET_DESC_LEN);
 
-   rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc, 
sdll);
+   rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc,
+   sdll, );
if (rc <= 0) {
xcopy_pt_undepend_remotedev(xop);
goto out;
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 02/10] target: add XCOPY target/segment desc sense codes

2018-04-02 Thread Andrei Vagin

From: David Disseldorp <dd...@suse.de>

ML: e864212078ded276bdb272b2e0ee6a979357ca8a

As defined in http://www.t10.org/lists/asc-num.htm. To be used during
validation of XCOPY target and segment descriptor lists.

Signed-off-by: David Disseldorp <dd...@suse.de>
Reviewed-by: Christoph Hellwig <h...@lst.de>
Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_transport.c | 24 
 include/target/target_core_base.h  |  4 
 2 files changed, 28 insertions(+)

diff --git a/drivers/target/target_core_transport.c 
b/drivers/target/target_core_transport.c
index d7e0fe1..b35317c 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1657,6 +1657,10 @@ void transport_generic_request_failure(struct se_cmd 
*cmd,
case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED:
case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED:
case TCM_COPY_TARGET_DEVICE_NOT_REACHABLE:
+   case TCM_TOO_MANY_TARGET_DESCS:
+   case TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE:
+   case TCM_TOO_MANY_SEGMENT_DESCS:
+   case TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE:
break;
case TCM_OUT_OF_RESOURCES:
sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -2804,6 +2808,26 @@ static const struct sense_info sense_info_table[] = {
.key = ILLEGAL_REQUEST,
.asc = 0x26, /* INVALID FIELD IN PARAMETER LIST */
},
+   [TCM_TOO_MANY_TARGET_DESCS] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x26,
+   .ascq = 0x06, /* TOO MANY TARGET DESCRIPTORS */
+   },
+   [TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x26,
+   .ascq = 0x07, /* UNSUPPORTED TARGET DESCRIPTOR TYPE CODE */
+   },
+   [TCM_TOO_MANY_SEGMENT_DESCS] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x26,
+   .ascq = 0x08, /* TOO MANY SEGMENT DESCRIPTORS */
+   },
+   [TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x26,
+   .ascq = 0x09, /* UNSUPPORTED SEGMENT DESCRIPTOR TYPE CODE */
+   },
[TCM_PARAMETER_LIST_LENGTH_ERROR] = {
.key = ILLEGAL_REQUEST,
.asc = 0x1a, /* PARAMETER LIST LENGTH ERROR */
diff --git a/include/target/target_core_base.h 
b/include/target/target_core_base.h
index 00ec289..a72b5a5 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -209,6 +209,10 @@ enum tcm_sense_reason_table {
TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED  = R(0x16),
TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED  = R(0x17),
TCM_COPY_TARGET_DEVICE_NOT_REACHABLE= R(0x18),
+   TCM_TOO_MANY_TARGET_DESCS   = R(0x19),
+   TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE   = R(0x1a),
+   TCM_TOO_MANY_SEGMENT_DESCS  = R(0x1b),
+   TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE  = R(0x1c),
 #undef R
 };
 
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 01/10] target: simplify XCOPY wwn->se_dev lookup helper

2018-04-02 Thread Andrei Vagin

From: David Disseldorp <dd...@suse.de>

ML: 94aae4caacda89a1bdb7198b260f4ca3595b7ed7

target_xcopy_locate_se_dev_e4() is used to locate an se_dev, based on
the WWN provided with the XCOPY request. Remove a couple of unneeded
arguments, and rely on the caller for the src/dst test.

Signed-off-by: David Disseldorp <dd...@suse.de>
Reviewed-by: Christoph Hellwig <h...@lst.de>
Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_xcopy.c | 28 +---
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c 
b/drivers/target/target_core_xcopy.c
index 94e9574..5a68f2a 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -62,19 +62,14 @@ static int target_xcopy_gen_naa_ieee(struct se_device *dev, 
unsigned char *buf)
return 0;
 }
 
-static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct 
xcopy_op *xop,
-   bool src)
+static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
+   struct se_device **found_dev)
 {
struct se_device *se_dev;
struct configfs_subsystem *subsys = target_core_subsystem[0];
-   unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn;
+   unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
int rc;
 
-   if (src)
-   dev_wwn = >dst_tid_wwn[0];
-   else
-   dev_wwn = >src_tid_wwn[0];
-
mutex_lock(_device_mutex);
list_for_each_entry(se_dev, _device_list, g_dev_node) {
 
@@ -88,15 +83,8 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd 
*se_cmd, struct xcopy_op
if (rc != 0)
continue;
 
-   if (src) {
-   xop->dst_dev = se_dev;
-   pr_debug("XCOPY 0xe4: Setting xop->dst_dev: %p from 
located"
-   " se_dev\n", xop->dst_dev);
-   } else {
-   xop->src_dev = se_dev;
-   pr_debug("XCOPY 0xe4: Setting xop->src_dev: %p from 
located"
-   " se_dev\n", xop->src_dev);
-   }
+   *found_dev = se_dev;
+   pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
 
rc = configfs_depend_item(subsys,
_dev->dev_group.cg_item);
@@ -254,9 +242,11 @@ static int target_xcopy_parse_target_descriptors(struct 
se_cmd *se_cmd,
}
 
if (xop->op_origin == XCOL_SOURCE_RECV_OP)
-   rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, true);
+   rc = target_xcopy_locate_se_dev_e4(xop->dst_tid_wwn,
+   >dst_dev);
else
-   rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, false);
+   rc = target_xcopy_locate_se_dev_e4(xop->src_tid_wwn,
+   >src_dev);
/*
 * If a matching IEEE NAA 0x83 descriptor for the requested device
 * is not located on this node, return COPY_ABORTED with ASQ/ASQC
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 00/10] target: backport bug fixes for XCOPY

2018-04-02 Thread Andrei Vagin


Bart Van Assche (1):
  target: Fix a deadlock between the XCOPY code and iSCSI session
shutdown

David Disseldorp (9):
  target: simplify XCOPY wwn->se_dev lookup helper
  target: add XCOPY target/segment desc sense codes
  target: bounds check XCOPY segment descriptor list
  target: use XCOPY segment descriptor CSCD IDs
  target: bounds check XCOPY total descriptor list length
  target: check for XCOPY parameter truncation
  target: return UNSUPPORTED TARGET/SEGMENT DESC TYPE CODE sense
  target: use XCOPY TOO MANY TARGET DESCRIPTORS sense
  target: check XCOPY segment descriptor CSCD IDs

 drivers/target/target_core_transport.c |  24 
 drivers/target/target_core_xcopy.c | 247 ++---
 drivers/target/target_core_xcopy.h |   7 +
 include/target/target_core_base.h  |   4 +
 4 files changed, 200 insertions(+), 82 deletions(-)

-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 05/10] target: bounds check XCOPY total descriptor list length

2018-04-02 Thread Andrei Vagin

From: David Disseldorp <dd...@suse.de>

ML: 7d38706669ce00603b187f667a4eb67c94eac098

spc4r37 6.4.3.5 states:
  If the combined length of the CSCD descriptors and segment descriptors
  exceeds the allowed value, then the copy manager shall terminate the
  command with CHECK CONDITION status, with the sense key set to ILLEGAL
  REQUEST, and the additional sense code set to PARAMETER LIST LENGTH
  ERROR.

This functionality can be tested using the libiscsi
ExtendedCopy.DescrLimits test.

Signed-off-by: David Disseldorp <dd...@suse.de>
Reviewed-by: Christoph Hellwig <h...@lst.de>
Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_xcopy.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c 
b/drivers/target/target_core_xcopy.c
index d8d53c4..f2ea6cc 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -940,6 +940,12 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
 */
tdll = get_unaligned_be16([2]);
sdll = get_unaligned_be32([8]);
+   if (tdll + sdll > RCR_OP_MAX_DESC_LIST_LEN) {
+   pr_err("XCOPY descriptor list length %u exceeds maximum %u\n",
+  tdll + sdll, RCR_OP_MAX_DESC_LIST_LEN);
+   ret = TCM_PARAMETER_LIST_LENGTH_ERROR;
+   goto out;
+   }
 
inline_dl = get_unaligned_be32([12]);
if (inline_dl != 0) {
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 01/23] target: Fix VERIFY_16 handling in sbc_parse_cdb

2018-03-27 Thread Andrei Vagin

From: Max Lohrmann <p...@wickenrode.com>

ML: 13603685c1f12c67a7a2427f00b63f39a2b6f7c9

As reported by Max, the Windows 2008 R2 chkdsk utility expects
VERIFY_16 to be supported, and does not handle the returned
CHECK_CONDITION properly, resulting in an infinite loop.

The kernel will log huge amounts of this error:

kernel: TARGET_CORE[iSCSI]: Unsupported SCSI Opcode 0x8f, sending
CHECK_CONDITION.

Signed-off-by: Max Lohrmann <p...@wickenrode.com>
Cc: <sta...@vger.kernel.org>
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_sbc.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 4be6c69..8a799a8 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -1018,9 +1018,15 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
return ret;
break;
case VERIFY:
+   case VERIFY_16:
size = 0;
-   sectors = transport_get_sectors_10(cdb);
-   cmd->t_task_lba = transport_lba_32(cdb);
+   if (cdb[0] == VERIFY) {
+   sectors = transport_get_sectors_10(cdb);
+   cmd->t_task_lba = transport_lba_32(cdb);
+   } else {
+   sectors = transport_get_sectors_16(cdb);
+   cmd->t_task_lba = transport_lba_64(cdb);
+   }
cmd->execute_cmd = sbc_emulate_noop;
goto check_lba;
case REZERO_UNIT:
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 18/23] target: Fix remote-port TMR ABORT + se_cmd fabric stop

2018-03-27 Thread Andrei Vagin

From: Nicholas Bellinger <n...@linux-iscsi.org>

ML: 0f4a943168f31d29a1701908931acaba518b131a

To address the bug where fabric driver level shutdown
of se_cmd occurs at the same time when TMR CMD_T_ABORTED
is happening resulting in a -1 ->cmd_kref, this patch
adds a CMD_T_FABRIC_STOP bit that is used to determine
when TMR + driver I_T nexus shutdown is happening
concurrently.

It changes target_sess_cmd_list_set_waiting() to obtain
se_cmd->cmd_kref + set CMD_T_FABRIC_STOP, and drop local
reference in target_wait_for_sess_cmds() and invoke extra
target_put_sess_cmd() during Task Aborted Status (TAS)
when necessary.

Also, it adds a new target_wait_free_cmd() wrapper around
transport_wait_for_tasks() for the special case within
transport_generic_free_cmd() to set CMD_T_FABRIC_STOP,
and is now aware of CMD_T_ABORTED + CMD_T_TAS status
bits to know when an extra transport_put_cmd() during
TAS is required.

Note transport_generic_free_cmd() is expected to block on
cmd->cmd_wait_comp in order to follow what iscsi-target
expects during iscsi_conn context se_cmd shutdown.

Cc: Quinn Tran <quinn.t...@qlogic.com>
Cc: Himanshu Madhani <himanshu.madh...@qlogic.com>
Cc: Sagi Grimberg <sa...@mellanox.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Hannes Reinecke <h...@suse.de>
Cc: Andy Grover <agro...@redhat.com>
Cc: Mike Christie <mchri...@redhat.com>
Cc: sta...@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <n...@daterainc.com>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_tmr.c   |  57 +
 drivers/target/target_core_transport.c | 150 +
 include/target/target_core_base.h  |   2 +
 3 files changed, 154 insertions(+), 55 deletions(-)

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 8f935fb..956de70 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -78,16 +78,18 @@ void core_tmr_release_req(struct se_tmr_req *tmr)
kfree(tmr);
 }
 
-static void core_tmr_handle_tas_abort(
-   struct se_session *tmr_sess,
-   struct se_cmd *cmd,
-   int tas)
+static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
 {
-   bool remove = true;
+   unsigned long flags;
+   bool remove = true, send_tas;
/*
 * TASK ABORTED status (TAS) bit support
 */
-   if (tmr_sess && tmr_sess != cmd->se_sess && tas) {
+   spin_lock_irqsave(>t_state_lock, flags);
+   send_tas = (cmd->transport_state & CMD_T_TAS);
+   spin_unlock_irqrestore(>t_state_lock, flags);
+
+   if (send_tas) {
remove = false;
transport_send_task_abort(cmd);
}
@@ -110,31 +112,42 @@ static int target_check_cdb_and_preempt(struct list_head 
*list,
return 1;
 }
 
-static bool __target_check_io_state(struct se_cmd *se_cmd)
+static bool __target_check_io_state(struct se_cmd *se_cmd,
+   struct se_session *tmr_sess, int tas)
 {
struct se_session *sess = se_cmd->se_sess;
+   int ref_tag = se_cmd->se_tfo->get_task_tag(se_cmd);
 
assert_spin_locked(>sess_cmd_lock);
WARN_ON_ONCE(!irqs_disabled());
/*
 * If command already reached CMD_T_COMPLETE state within
-* target_complete_cmd(), this se_cmd has been passed to
-* fabric driver and will not be aborted.
+* target_complete_cmd() or CMD_T_FABRIC_STOP due to shutdown,
+* this se_cmd has been passed to fabric driver and will
+* not be aborted.
 *
 * Otherwise, obtain a local se_cmd->cmd_kref now for TMR
 * ABORT_TASK + LUN_RESET for CMD_T_ABORTED processing as
 * long as se_cmd->cmd_kref is still active unless zero.
 */
spin_lock(_cmd->t_state_lock);
-   if (se_cmd->transport_state & CMD_T_COMPLETE) {
-   int ref_tag = se_cmd->se_tfo->get_task_tag(se_cmd);
-
-   pr_debug("Attempted to abort io tag: %u already complete,"
+   if (se_cmd->transport_state & (CMD_T_COMPLETE | CMD_T_FABRIC_STOP)) {
+   pr_debug("Attempted to abort io tag: %u already complete or"
+   " fabric stop, skipping\n", ref_tag);
+   spin_unlock(_cmd->t_state_lock);
+   return false;
+   }
+   if (sess->sess_tearing_down || se_cmd->cmd_wait_set) {
+   pr_debug("Attempted to abort io tag: %u already shutdown,"
" skipping\n", ref_tag);
spin_unlock(_cmd->t_state_lock);
return false;
}
se_cmd->transport_state |= CMD_T_ABORTED;
+
+   if ((tmr_sess != se_cmd->se_sess) && tas)
+   se_cmd->transport_state |= CMD_

[Devel] [PATCH 12/23] target: Fix WRITE_SAME/DISCARD conversion to linux 512b sectors

2018-03-27 Thread Andrei Vagin

From: Mike Christie <mchri...@redhat.com>

ML: 8a9ebe717a133ba7bc90b06047f43cc6b8bcb8b3

In a couple places we are not converting to/from the Linux
block layer 512 bytes sectors.

1.

The request queue values and what we do are a mismatch of
things:

max_discard_sectors - This is in linux block layer 512 byte
sectors. We are just copying this to max_unmap_lba_count.

discard_granularity - This is in bytes. We are converting it
to Linux block layer 512 byte sectors.

discard_alignment - This is in bytes. We are just copying
this over.

The problem is that the core LIO code exports these values in
spc_emulate_evpd_b0 and we use them to test request arguments
in sbc_execute_unmap, but we never convert to the block size
we export to the initiator. If we are not using 512 byte sectors
then we are exporting the wrong values or are checks are off.
And, for the discard_alignment/bytes case we are just plain messed
up.

2.

blkdev_issue_discard's start and number of sector arguments
are supposed to be in linux block layer 512 byte sectors. We are
currently passing in the values we get from the initiator which
might be based on some other sector size.

There is a similar problem in iblock_execute_write_same where
the bio functions want values in 512 byte sectors but we are
passing in what we got from the initiator.

Signed-off-by: Mike Christie <mchri...@redhat.com>
Cc: sta...@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_device.c  | 44 +++
 drivers/target/target_core_file.c| 29 ++
 drivers/target/target_core_iblock.c  | 58 +---
 include/target/target_core_backend.h |  4 +++
 4 files changed, 71 insertions(+), 64 deletions(-)

diff --git a/drivers/target/target_core_device.c 
b/drivers/target/target_core_device.c
index 3621e64..81156de 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1583,6 +1583,50 @@ struct se_device *target_alloc_device(struct se_hba 
*hba, const char *name)
return dev;
 }
 
+/*
+ * Check if the underlying struct block_device request_queue supports
+ * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM
+ * in ATA and we need to set TPE=1
+ */
+bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
+  struct request_queue *q, int block_size)
+{
+   if (!blk_queue_discard(q))
+   return false;
+
+   attrib->max_unmap_lba_count = (q->limits.max_discard_sectors << 9) /
+   block_size;
+   /*
+* Currently hardcoded to 1 in Linux/SCSI code..
+*/
+   attrib->max_unmap_block_desc_count = 1;
+   attrib->unmap_granularity = q->limits.discard_granularity / block_size;
+   attrib->unmap_granularity_alignment = q->limits.discard_alignment /
+   block_size;
+   attrib->unmap_zeroes_data = q->limits.discard_zeroes_data;
+   return true;
+}
+EXPORT_SYMBOL(target_configure_unmap_from_queue);
+
+/*
+ * Convert from blocksize advertised to the initiator to the 512 byte
+ * units unconditionally used by the Linux block layer.
+ */
+sector_t target_to_linux_sector(struct se_device *dev, sector_t lb)
+{
+   switch (dev->dev_attrib.block_size) {
+   case 4096:
+   return lb << 3;
+   case 2048:
+   return lb << 2;
+   case 1024:
+   return lb << 1;
+   default:
+   return lb;
+   }
+}
+EXPORT_SYMBOL(target_to_linux_sector);
+
 int target_configure_device(struct se_device *dev)
 {
struct se_hba *hba = dev->se_hba;
diff --git a/drivers/target/target_core_file.c 
b/drivers/target/target_core_file.c
index de165f5..5c92a08 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -165,25 +165,11 @@ static int fd_configure_device(struct se_device *dev)
" block_device blocks: %llu logical_block_size: %d\n",
dev_size, div_u64(dev_size, fd_dev->fd_block_size),
fd_dev->fd_block_size);
-   /*
-* Check if the underlying struct block_device request_queue 
supports
-* the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + 
TRIM
-* in ATA and we need to set TPE=1
-*/
-   if (blk_queue_discard(q)) {
-   dev->dev_attrib.max_unmap_lba_count =
-   q->limits.max_discard_sectors;
-   /*
-* Currently hardcoded to 1 in Linux/SCSI code..
-*/
-   de

[Devel] [PATCH 08/23] target/iscsi: Fix double free of a TUR followed by a solicited NOPOUT

2018-03-27 Thread Andrei Vagin

From: Alexei Potashnik <ale...@purestorage.com>

ML: 9547308bda296b6f69876c840a0291fcfbeddbb8

Make sure all non-READ SCSI commands get targ_xfer_tag initialized
to 0x, not just WRITEs.

Double-free of a TUR cmd object occurs under the following scenario:

1. TUR received (targ_xfer_tag is uninitialized and left at 0)
2. TUR status sent
3. First unsolicited NOPIN is sent to initiator (gets targ_xfer_tag of 0)
4. NOPOUT for NOPIN (with TTT=0) arrives
 - its ExpStatSN acks TUR status, TUR is queued for removal
 - LIO tries to find NOPIN with TTT=0, but finds the same TUR instead,
   TUR is queued for removal for the 2nd time

(Drop unbalanced conditional bracket usage - nab)

Signed-off-by: Alexei Potashnik <ale...@purestorage.com>
Signed-off-by: Spencer Baugh <sba...@catern.com>
Cc: <sta...@vger.kernel.org> # v3.1+
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/iscsi/iscsi_target.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c 
b/drivers/target/iscsi/iscsi_target.c
index 35209f0..ea8573e 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1117,9 +1117,9 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct 
iscsi_cmd *cmd,
cmd->cmd_flags |= ICF_NON_IMMEDIATE_UNSOLICITED_DATA;
 
conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt;
-   if (hdr->flags & ISCSI_FLAG_CMD_READ) {
+   if (hdr->flags & ISCSI_FLAG_CMD_READ)
cmd->targ_xfer_tag = session_get_next_ttt(conn->sess);
-   } else if (hdr->flags & ISCSI_FLAG_CMD_WRITE)
+   else
cmd->targ_xfer_tag = 0x;
cmd->cmd_sn = be32_to_cpu(hdr->cmdsn);
cmd->exp_stat_sn= be32_to_cpu(hdr->exp_statsn);
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 20/23] target: Fix target_release_cmd_kref shutdown comp leak

2018-03-27 Thread Andrei Vagin

From: Himanshu Madhani <himanshu.madh...@qlogic.com>

ML: 5e47f1985d7107331c3f64fb3ec83d66fd73577e

This patch fixes an active I/O shutdown bug for fabric
drivers using target_wait_for_sess_cmds(), where se_cmd
descriptor shutdown would result in hung tasks waiting
indefinitely for se_cmd->cmd_wait_comp to complete().

To address this bug, drop the incorrect list_del_init()
usage in target_wait_for_sess_cmds() and always complete()
during se_cmd target_release_cmd_kref() put, in order to
let caller invoke the final fabric release callback
into se_cmd->se_tfo->release_cmd() code.

Reported-by: Himanshu Madhani <himanshu.madh...@qlogic.com>
Tested-by: Himanshu Madhani <himanshu.madh...@qlogic.com>
Signed-off-by: Himanshu Madhani <himanshu.madh...@qlogic.com>
Cc: sta...@vger.kernel.org
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_transport.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/target/target_core_transport.c 
b/drivers/target/target_core_transport.c
index 96900a8..d7e0fe1 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2617,8 +2617,6 @@ void target_wait_for_sess_cmds(struct se_session *se_sess)
 
list_for_each_entry_safe(se_cmd, tmp_cmd,
_sess->sess_wait_list, se_cmd_list) {
-   list_del_init(_cmd->se_cmd_list);
-
pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:"
" %d\n", se_cmd, se_cmd->t_state,
se_cmd->se_tfo->get_cmd_state(se_cmd));
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 19/23] target: Drop incorrect ABORT_TASK put for completed commands

2018-03-27 Thread Andrei Vagin

From: Nicholas Bellinger <n...@linux-iscsi.org>

ML: 7f54ab5ff52fb0b91569bc69c4a6bc5cac1b768d

This patch fixes a recent ABORT_TASK regression associated
with commit febe562c, where a left-over target_put_sess_cmd()
would still be called when __target_check_io_state() detected
a command has already been completed, and explicit ABORT must
be avoided.

Note commit febe562c dropped the local kref_get_unless_zero()
check in core_tmr_abort_task(), but did not drop this extra
corresponding target_put_sess_cmd() in the failure path.

So go ahead and drop this now bogus target_put_sess_cmd(),
and avoid this potential use-after-free.

Reported-by: Dan Lane <draco...@gmail.com>
Cc: Quinn Tran <quinn.t...@qlogic.com>
Cc: Himanshu Madhani <himanshu.madh...@qlogic.com>
Cc: Sagi Grimberg <sa...@mellanox.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Hannes Reinecke <h...@suse.de>
Cc: Andy Grover <agro...@redhat.com>
Cc: Mike Christie <mchri...@redhat.com>
Cc: sta...@vger.kernel.org # 3.14+
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_tmr.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 956de70..78562d9 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -181,7 +181,6 @@ void core_tmr_abort_task(
 
if (!__target_check_io_state(se_cmd, se_sess, 0)) {
spin_unlock_irqrestore(_sess->sess_cmd_lock, flags);
-   target_put_sess_cmd(se_cmd);
goto out;
}
list_del_init(_cmd->se_cmd_list);
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 16/23] target: Fix LUN_RESET active TMR descriptor handling

2018-03-27 Thread Andrei Vagin

From: Nicholas Bellinger <n...@linux-iscsi.org>

ML: a6d9bb1c9605cd4f44e2d8290dc4d0e88f20292d

This patch fixes a NULL pointer se_cmd->cmd_kref < 0
refcount bug during TMR LUN_RESET with active TMRs,
triggered during se_cmd + se_tmr_req descriptor
shutdown + release via core_tmr_drain_tmr_list().

To address this bug, go ahead and obtain a local
kref_get_unless_zero(_cmd->cmd_kref) for active I/O
to set CMD_T_ABORTED, and transport_wait_for_tasks()
followed by the final target_put_sess_cmd() to drop
the local ->cmd_kref.

Also add two new checks within target_tmr_work() to
avoid CMD_T_ABORTED -> TFO->queue_tm_rsp() callbacks
ahead of invoking the backend -> fabric put in
transport_cmd_check_stop_to_fabric().

For good measure, also change core_tmr_release_req()
to use list_del_init() ahead of se_tmr_req memory
free.

Reviewed-by: Quinn Tran <quinn.t...@qlogic.com>
Cc: Himanshu Madhani <himanshu.madh...@qlogic.com>
Cc: Sagi Grimberg <sa...@mellanox.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Hannes Reinecke <h...@suse.de>
Cc: Andy Grover <agro...@redhat.com>
Cc: Mike Christie <mchri...@redhat.com>
Cc: sta...@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_tmr.c   | 22 +-
 drivers/target/target_core_transport.c | 17 +
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index e2c9672..99bc3d4 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -71,7 +71,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr)
 
if (dev) {
spin_lock_irqsave(>se_tmr_lock, flags);
-   list_del(>tmr_list);
+   list_del_init(>tmr_list);
spin_unlock_irqrestore(>se_tmr_lock, flags);
}
 
@@ -202,9 +202,11 @@ static void core_tmr_drain_tmr_list(
struct list_head *preempt_and_abort_list)
 {
LIST_HEAD(drain_tmr_list);
+   struct se_session *sess;
struct se_tmr_req *tmr_p, *tmr_pp;
struct se_cmd *cmd;
unsigned long flags;
+   bool rc;
/*
 * Release all pending and outgoing TMRs aside from the received
 * LUN_RESET tmr..
@@ -230,17 +232,31 @@ static void core_tmr_drain_tmr_list(
if (target_check_cdb_and_preempt(preempt_and_abort_list, cmd))
continue;
 
+   sess = cmd->se_sess;
+   if (WARN_ON_ONCE(!sess))
+   continue;
+
+   spin_lock(>sess_cmd_lock);
spin_lock(>t_state_lock);
if (!(cmd->transport_state & CMD_T_ACTIVE)) {
spin_unlock(>t_state_lock);
+   spin_unlock(>sess_cmd_lock);
continue;
}
if (cmd->t_state == TRANSPORT_ISTATE_PROCESSING) {
spin_unlock(>t_state_lock);
+   spin_unlock(>sess_cmd_lock);
continue;
}
+   cmd->transport_state |= CMD_T_ABORTED;
spin_unlock(>t_state_lock);
 
+   rc = kref_get_unless_zero(>cmd_kref);
+   spin_unlock(>sess_cmd_lock);
+   if (!rc) {
+   printk("LUN_RESET TMR: non-zero 
kref_get_unless_zero\n");
+   continue;
+   }
list_move_tail(_p->tmr_list, _tmr_list);
}
spin_unlock_irqrestore(>se_tmr_lock, flags);
@@ -254,7 +270,11 @@ static void core_tmr_drain_tmr_list(
(preempt_and_abort_list) ? "Preempt" : "", tmr_p,
tmr_p->function, tmr_p->response, cmd->t_state);
 
+   cancel_work_sync(>work);
+   transport_wait_for_tasks(cmd);
+
transport_cmd_finish_abort(cmd, 1);
+   target_put_sess_cmd(cmd);
}
 }
 
diff --git a/drivers/target/target_core_transport.c 
b/drivers/target/target_core_transport.c
index 460d96b..daf23ae 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2959,8 +2959,17 @@ static void target_tmr_work(struct work_struct *work)
struct se_cmd *cmd = container_of(work, struct se_cmd, work);
struct se_device *dev = cmd->se_dev;
struct se_tmr_req *tmr = cmd->se_tmr_req;
+   unsigned long flags;
int ret;
 
+   spin_lock_irqsave(>t_state_lock, flags);
+   if (cmd->transport_state & CMD_T_ABORTED) {
+   tmr->response = TMR_FUNCTION_REJECTED;
+   spin_unlock_irqrestore(>t_state_lock, flags);
+   goto check

[Devel] [PATCH 13/23] target: Fix race with SCF_SEND_DELAYED_TAS handling

2018-03-27 Thread Andrei Vagin

From: Nicholas Bellinger <n...@linux-iscsi.org>

ML: 310d3d314be7f0a84011ebdc4bdccbcae9755a87

This patch fixes a race between setting of SCF_SEND_DELAYED_TAS
in transport_send_task_abort(), and check of the same bit in
transport_check_aborted_status().

It adds a __transport_check_aborted_status() version that is
used by target_execute_cmd() when se_cmd->t_state_lock is
held, and a transport_check_aborted_status() wrapper for
all other existing callers.

Also, it handles the case where the check happens before
transport_send_task_abort() gets called.  For this, go
ahead and set SCF_SEND_DELAYED_TAS early when necessary,
and have transport_send_task_abort() send the abort.

Cc: Quinn Tran <quinn.t...@qlogic.com>
Cc: Himanshu Madhani <himanshu.madh...@qlogic.com>
Cc: Sagi Grimberg <sa...@mellanox.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Hannes Reinecke <h...@suse.de>
Cc: Andy Grover <agro...@redhat.com>
Cc: Mike Christie <mchri...@redhat.com>
Cc: sta...@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_transport.c | 49 +++---
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/drivers/target/target_core_transport.c 
b/drivers/target/target_core_transport.c
index a4bf418..0b20071dd 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1779,19 +1779,21 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
return true;
 }
 
+static int __transport_check_aborted_status(struct se_cmd *, int);
+
 void target_execute_cmd(struct se_cmd *cmd)
 {
/*
-* If the received CDB has aleady been aborted stop processing it here.
-*/
-   if (transport_check_aborted_status(cmd, 1))
-   return;
-
-   /*
 * Determine if frontend context caller is requesting the stopping of
 * this command for frontend exceptions.
+*
+* If the received CDB has aleady been aborted stop processing it here.
 */
spin_lock_irq(>t_state_lock);
+   if (__transport_check_aborted_status(cmd, 1)) {
+   spin_unlock_irq(>t_state_lock);
+   return;
+   }
if (cmd->transport_state & CMD_T_STOP) {
pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08x\n",
__func__, __LINE__,
@@ -2862,17 +2864,24 @@ transport_send_check_condition_and_sense(struct se_cmd 
*cmd,
 }
 EXPORT_SYMBOL(transport_send_check_condition_and_sense);
 
-int transport_check_aborted_status(struct se_cmd *cmd, int send_status)
+static int __transport_check_aborted_status(struct se_cmd *cmd, int 
send_status)
+   __releases(>t_state_lock)
+   __acquires(>t_state_lock)
 {
+   assert_spin_locked(>t_state_lock);
+   WARN_ON_ONCE(!irqs_disabled());
+
if (!(cmd->transport_state & CMD_T_ABORTED))
return 0;
-
/*
 * If cmd has been aborted but either no status is to be sent or it has
 * already been sent, just return
 */
-   if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS))
+   if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS)) {
+   if (send_status)
+   cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS;
return 1;
+   }
 
pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB: 0x%02x 
ITT: 0x%08x\n",
 cmd->t_task_cdb[0], cmd->se_tfo->get_task_tag(cmd));
@@ -2880,10 +2889,24 @@ int transport_check_aborted_status(struct se_cmd *cmd, 
int send_status)
cmd->se_cmd_flags &= ~SCF_SEND_DELAYED_TAS;
cmd->scsi_status = SAM_STAT_TASK_ABORTED;
trace_target_cmd_complete(cmd);
+
+   spin_unlock_irq(>t_state_lock);
cmd->se_tfo->queue_status(cmd);
+   spin_lock_irq(>t_state_lock);
 
return 1;
 }
+
+int transport_check_aborted_status(struct se_cmd *cmd, int send_status)
+{
+   int ret;
+
+   spin_lock_irq(>t_state_lock);
+   ret = __transport_check_aborted_status(cmd, send_status);
+   spin_unlock_irq(>t_state_lock);
+
+   return ret;
+}
 EXPORT_SYMBOL(transport_check_aborted_status);
 
 void transport_send_task_abort(struct se_cmd *cmd)
@@ -2906,11 +2929,17 @@ void transport_send_task_abort(struct se_cmd *cmd)
 */
if (cmd->data_direction == DMA_TO_DEVICE) {
if (cmd->se_tfo->write_pending_status(cmd) != 0) {
-   cmd->transport_state |= CMD_T_ABORTED;
+   spin_lock_irqsave(>t_state_lock, flags);
+   if (cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS) {
+   spin_unlock_irqrestore(>t_state_lock, 
flags

[Devel] [PATCH 02/23] target: reject COMPARE_AND_WRITE if emulate_caw is not set

2018-03-27 Thread Andrei Vagin

From: Jiang Yi <jiangyil...@gmail.com>

ML: 12f66e4a0f7b5624901ba4301210e026c9ddf78d

In struct se_dev_attrib, there is a field emulate_caw exposed
as a /sys/kernel/config/target/core/$HBA/$DEV/attrib/.

If this field is set zero, it means the corresponding struct se_device
does not support the scsi cmd COMPARE_AND_WRITE

In function sbc_parse_cdb(), go ahead and reject scsi COMPARE_AND_WRITE
if emulate_caw is not set, because it has been explicitly disabled
from user-space.

(Make pr_err ratelimited - nab)

Signed-off-by: Jiang Yi <jiangyil...@gmail.com>
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_sbc.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 8a799a8..17889bd 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -913,6 +913,12 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
break;
}
case COMPARE_AND_WRITE:
+   if (!dev->dev_attrib.emulate_caw) {
+   pr_err_ratelimited("se_device %s/%s (vpd_unit_serial 
%s) reject"
+   " COMPARE_AND_WRITE\n", dev->transport->name,
+   dev->dev_group.cg_item.ci_name, 
dev->t10_wwn.unit_serial);
+   return TCM_UNSUPPORTED_SCSI_OPCODE;
+   }
sectors = cdb[13];
/*
 * Currently enforce COMPARE_AND_WRITE for a single sector
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 07/23] target: Use correct SCSI status during EXTENDED_COPY exception

2018-03-27 Thread Andrei Vagin

From: Nicholas Bellinger <n...@linux-iscsi.org>

ML: 0583c261e6325f392c1f7a1b9112e31298e1a4bd

This patch adds the missing target_complete_cmd() SCSI status
parameter change in target_xcopy_do_work(), that was originally
missing in commit 926317de33.

It correctly propigates up the correct SCSI status during
EXTENDED_COPY exception cases, instead of always using the
hardcoded SAM_STAT_CHECK_CONDITION from original code.

This is required for ESX host environments that expect to
hit SAM_STAT_RESERVATION_CONFLICT for certain scenarios,
and SAM_STAT_CHECK_CONDITION results in non-retriable
status for these cases.

Reported-by: Nixon Vincent <nixon.vinc...@calsoftinc.com>
Tested-by: Nixon Vincent <nixon.vinc...@calsoftinc.com>
Cc: Nixon Vincent <nixon.vinc...@calsoftinc.com>
Reviewed-by: Christoph Hellwig <h...@lst.de>
Cc: sta...@vger.kernel.org # 3.14+
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_xcopy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/target_core_xcopy.c 
b/drivers/target/target_core_xcopy.c
index 9f5a002..94e9574 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -881,7 +881,7 @@ out:
" CHECK_CONDITION -> sending response\n", rc);
ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION;
}
-   target_complete_cmd(ec_cmd, SAM_STAT_CHECK_CONDITION);
+   target_complete_cmd(ec_cmd, ec_cmd->scsi_status);
 }
 
 sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 09/23] target: fix COMPARE_AND_WRITE non zero SGL offset data corruption

2018-03-27 Thread Andrei Vagin

From: Jan Engelhardt <jeng...@inai.de>

ML: d94e5a61357a04938ce14d6033b4d33a3c5fd780

target_core_sbc's compare_and_write functionality suffers from taking
data at the wrong memory location when writing a CAW request to disk
when a SGL offset is non-zero.

This can happen with loopback and vhost-scsi fabric drivers when
SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC is used to map existing user-space
SGL memory into COMPARE_AND_WRITE READ/WRITE payload buffers.

Given the following sample LIO subtopology,

% targetcli ls /loopback/
o- loopback . [1 Target]
  o- naa.6001405ebb8df14a ... [naa.60014059143ed2b3]
o- luns ... [2 LUNs]
  o- lun0  [iblock/ram0 (/dev/ram0)]
  o- lun1  [iblock/ram1 (/dev/ram1)]
% lsscsi -g
[3:0:1:0]diskLIO-ORG  IBLOCK   4.0   /dev/sdc   /dev/sg3
[3:0:1:1]diskLIO-ORG  IBLOCK   4.0   /dev/sdd   /dev/sg4

the following bug can be observed in Linux 4.3 and 4.4~rc1:

% perl -e 'print chr$_ for 0..255,reverse 0..255' >rand
% perl -e 'print "\0" x 512' >zero
% cat rand >/dev/sdd
% sg_compare_and_write -i rand -D zero --lba 0 /dev/sdd
% sg_compare_and_write -i zero -D rand --lba 0 /dev/sdd
Miscompare reported
% hexdump -Cn 512 /dev/sdd
  0f 0e 0d 0c 0b 0a 09 08  07 06 05 04 03 02 01 00
0010  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00
*
0200

Rather than writing all-zeroes as instructed with the -D file, it
corrupts the data in the sector by splicing some of the original
bytes in. The page of the first entry of cmd->t_data_sg includes the
CDB, and sg->offset is set to a position past the CDB. I presume that
sg->offset is also the right choice to use for subsequent sglist
members.

Signed-off-by: Jan Engelhardt <jeng...@netitwork.de>
Tested-by: Douglas Gilbert <dgilb...@interlog.com>
Cc: <sta...@vger.kernel.org> # v3.12+
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_sbc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 17889bd..1448dce 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -503,11 +503,11 @@ static sense_reason_t compare_and_write_callback(struct 
se_cmd *cmd, bool succes
 
if (block_size < PAGE_SIZE) {
sg_set_page(_sg[i], m.page, block_size,
-   block_size);
+   m.piter.sg->offset + block_size);
} else {
sg_miter_next();
sg_set_page(_sg[i], m.page, block_size,
-   0);
+   m.piter.sg->offset);
}
len -= block_size;
i++;
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 11/23] target/sbc: Add LBPRZ attribute + control CDB emulation

2018-03-27 Thread Andrei Vagin

From: Jamie Pocas <jamie.po...@emc.com>

ML: e6f41633cb79b55ead84b023c02035322c7827e7

This change sets the LBPRZ flag in EVPD page b2h and READ CAPACITY (16)
based on a new unmap_zeroes_data device attribute. This flag is set
automatically for iblock based on underlying block device queue's
discard_zeroes_data flag.

Signed-off-by: Jamie Pocas <jamie.po...@emc.com>
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_iblock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/target/target_core_iblock.c 
b/drivers/target/target_core_iblock.c
index c68e255..02c5ab0a 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -144,6 +144,8 @@ static int iblock_configure_device(struct se_device *dev)
q->limits.discard_granularity >> 9;
dev->dev_attrib.unmap_granularity_alignment =
q->limits.discard_alignment;
+   dev->dev_attrib.unmap_zeroes_data =
+   q->limits.discard_zeroes_data;
 
pr_debug("IBLOCK: BLOCK Discard support available,"
" disabled by default\n");
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 05/23] target: Make EXTENDED_COPY 0xe4 failure return COPY TARGET DEVICE NOT REACHABLE

2018-03-27 Thread Andrei Vagin

From: Nicholas Bellinger <n...@linux-iscsi.org>

ML: 449a137846c84829a328757cd21fd9ca65c08519

This patch addresses a bug where EXTENDED_COPY across multiple LUNs
results in a CHECK_CONDITION when the source + destination are not
located on the same physical node.

ESX Host environments expect sense COPY_ABORTED w/ COPY TARGET DEVICE
NOT REACHABLE to be returned when this occurs, in order to signal
fallback to local copy method.

As described in section 6.3.3 of spc4r22:

  "If it is not possible to complete processing of a segment because the
   copy manager is unable to establish communications with a copy target
   device, because the copy target device does not respond to INQUIRY,
   or because the data returned in response to INQUIRY indicates
   an unsupported logical unit, then the EXTENDED COPY command shall be
   terminated with CHECK CONDITION status, with the sense key set to
   COPY ABORTED, and the additional sense code set to COPY TARGET DEVICE
   NOT REACHABLE."

Tested on v4.1.y with ESX v5.5u2+ with BlockCopy across multiple nodes.

Reported-by: Nixon Vincent <nixon.vinc...@calsoftinc.com>
Tested-by: Nixon Vincent <nixon.vinc...@calsoftinc.com>
Cc: Nixon Vincent <nixon.vinc...@calsoftinc.com>
Tested-by: Dinesh Israni <d...@datera.io>
Signed-off-by: Dinesh Israni <d...@datera.io>
Cc: Dinesh Israni <d...@datera.io>
Cc: sta...@vger.kernel.org # 3.14+
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_transport.c |  7 +++
 drivers/target/target_core_xcopy.c | 22 --
 include/target/target_core_base.h  |  1 +
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_transport.c 
b/drivers/target/target_core_transport.c
index 8af8049..a4bf418 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1656,6 +1656,7 @@ void transport_generic_request_failure(struct se_cmd *cmd,
case TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED:
case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED:
case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED:
+   case TCM_COPY_TARGET_DEVICE_NOT_REACHABLE:
break;
case TCM_OUT_OF_RESOURCES:
sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -2783,6 +2784,12 @@ static const struct sense_info sense_info_table[] = {
.ascq = 0x03, /* LOGICAL BLOCK REFERENCE TAG CHECK FAILED */
.add_sector_info = true,
},
+   [TCM_COPY_TARGET_DEVICE_NOT_REACHABLE] = {
+   .key = COPY_ABORTED,
+   .asc = 0x0d,
+   .ascq = 0x02, /* COPY TARGET DEVICE NOT REACHABLE */
+
+   },
[TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE] = {
/*
 * Returning ILLEGAL REQUEST would cause immediate IO errors on
diff --git a/drivers/target/target_core_xcopy.c 
b/drivers/target/target_core_xcopy.c
index a7c8974..5cfb382 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -116,7 +116,7 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd 
*se_cmd, struct xcopy_op
}
mutex_unlock(_device_mutex);
 
-   pr_err("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
+   pr_debug_ratelimited("Unable to locate 0xe4 descriptor for 
EXTENDED_COPY\n");
return -EINVAL;
 }
 
@@ -197,7 +197,7 @@ static int target_xcopy_parse_tiddesc_e4(struct se_cmd 
*se_cmd, struct xcopy_op
 
 static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
struct xcopy_op *xop, unsigned char *p,
-   unsigned short tdll)
+   unsigned short tdll, sense_reason_t *sense_ret)
 {
struct se_device *local_dev = se_cmd->se_dev;
unsigned char *desc = p;
@@ -205,6 +205,8 @@ static int target_xcopy_parse_target_descriptors(struct 
se_cmd *se_cmd,
unsigned short start = 0;
bool src = true;
 
+   *sense_ret = TCM_INVALID_PARAMETER_LIST;
+
if (offset != 0) {
pr_err("XCOPY target descriptor list length is not"
" multiple of %d\n", XCOPY_TARGET_DESC_LEN);
@@ -255,9 +257,16 @@ static int target_xcopy_parse_target_descriptors(struct 
se_cmd *se_cmd,
rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, true);
else
rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, false);
-
-   if (rc < 0)
+   /*
+* If a matching IEEE NAA 0x83 descriptor for the requested device
+* is not located on this node, return COPY_ABORTED with ASQ/ASQC
+* 0x0d/0x02 - COPY_TARGET_DEVICE_NOT_REACHABLE to request the
+* initiator to fall back to normal copy method.
+*/
+   if (rc < 0) {
+

[Devel] [PATCH 22/23] iscsi-target: fix invalid flags in text response

2018-03-27 Thread Andrei Vagin

From: Varun Prakash <va...@chelsio.com>

ML: 310d40a973c560a24c79f84cb5f16dc540a05686

In case of multiple text responses iscsi-target
sets both 'F' and 'C' bit for the final text response
pdu, this issue happens because hdr->flags is not
zeroed out before ORing with 'F' bit.

This patch removes the | operator to fix this issue.

Signed-off-by: Varun Prakash <va...@chelsio.com>
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/iscsi/iscsi_target.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c 
b/drivers/target/iscsi/iscsi_target.c
index aeba04a..2ec68f2 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -3490,9 +3490,9 @@ iscsit_build_text_rsp(struct iscsi_cmd *cmd, struct 
iscsi_conn *conn,
return text_length;
 
if (completed) {
-   hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+   hdr->flags = ISCSI_FLAG_CMD_FINAL;
} else {
-   hdr->flags |= ISCSI_FLAG_TEXT_CONTINUE;
+   hdr->flags = ISCSI_FLAG_TEXT_CONTINUE;
cmd->read_data_done += text_length;
if (cmd->targ_xfer_tag == 0x)
cmd->targ_xfer_tag = session_get_next_ttt(conn->sess);
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 23/23] target: Fix max_unmap_lba_count calc overflow

2018-03-27 Thread Andrei Vagin

From: Mike Christie <mchri...@redhat.com>

ML: ea263c7fada4af8ec7fe5fcfd6e7d7705a89351b

max_discard_sectors only 32bits, and some non scsi backend
devices will set this to the max 0x, so we can end up
overflowing during the max_unmap_lba_count calculation.

This fixes a regression caused by my patch:

commit 8a9ebe717a133ba7bc90b06047f43cc6b8bcb8b3
Author: Mike Christie <mchri...@redhat.com>
Date:   Mon Jan 18 14:09:27 2016 -0600

target: Fix WRITE_SAME/DISCARD conversion to linux 512b sectors

which can result in extra discards being sent to due the overflow
causing max_unmap_lba_count to be smaller than what the backing
device can actually support.

Signed-off-by: Mike Christie <mchri...@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanass...@sandisk.com>
Cc: sta...@vger.kernel.org
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_device.c  | 8 +---
 drivers/target/target_core_file.c| 3 +--
 drivers/target/target_core_iblock.c  | 3 +--
 include/target/target_core_backend.h | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/target/target_core_device.c 
b/drivers/target/target_core_device.c
index 7dfe964..cb17aeb 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1573,13 +1573,15 @@ struct se_device *target_alloc_device(struct se_hba 
*hba, const char *name)
  * in ATA and we need to set TPE=1
  */
 bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
-  struct request_queue *q, int block_size)
+  struct request_queue *q)
 {
+   int block_size = queue_logical_block_size(q);
+
if (!blk_queue_discard(q))
return false;
 
-   attrib->max_unmap_lba_count = (q->limits.max_discard_sectors << 9) /
-   block_size;
+   attrib->max_unmap_lba_count =
+   q->limits.max_discard_sectors >> (ilog2(block_size) - 9);
/*
 * Currently hardcoded to 1 in Linux/SCSI code..
 */
diff --git a/drivers/target/target_core_file.c 
b/drivers/target/target_core_file.c
index 4e87701..ef5ea71 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -166,8 +166,7 @@ static int fd_configure_device(struct se_device *dev)
dev_size, div_u64(dev_size, fd_dev->fd_block_size),
fd_dev->fd_block_size);
 
-   if (target_configure_unmap_from_queue(>dev_attrib, q,
- fd_dev->fd_block_size))
+   if (target_configure_unmap_from_queue(>dev_attrib, q))
pr_debug("IFILE: BLOCK Discard support available,"
 " disabled by default\n");
/*
diff --git a/drivers/target/target_core_iblock.c 
b/drivers/target/target_core_iblock.c
index 00bfe97..55be98b 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -127,8 +127,7 @@ static int iblock_configure_device(struct se_device *dev)
dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q);
dev->dev_attrib.hw_queue_depth = q->nr_requests;
 
-   if (target_configure_unmap_from_queue(>dev_attrib, q,
- dev->dev_attrib.hw_block_size))
+   if (target_configure_unmap_from_queue(>dev_attrib, q))
pr_debug("IBLOCK: BLOCK Discard support available,"
 " disabled by default\n");
 
diff --git a/include/target/target_core_backend.h 
b/include/target/target_core_backend.h
index 3905c1c7..5e1124d 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -145,6 +145,6 @@ int se_dev_set_block_size(struct se_device *, u32);
 
 sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
 bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
-  struct request_queue *q, int block_size);
+  struct request_queue *q);
 
 #endif /* TARGET_CORE_BACKEND_H */
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 17/23] target: Fix TAS handling for multi-session se_node_acls

2018-03-27 Thread Andrei Vagin

From: Nicholas Bellinger <n...@linux-iscsi.org>

ML: ebde1ca5a908b10312db4ecd7553e3ba039319ab

This patch fixes a bug in TMR task aborted status (TAS)
handling when multiple sessions are connected to the
same target WWPN endpoint and se_node_acl descriptor,
resulting in TASK_ABORTED status to not be generated
for aborted se_cmds on the remote port.

This is due to core_tmr_handle_tas_abort() incorrectly
comparing se_node_acl instead of se_session, for which
the multi-session case is expected to be sharing the
same se_node_acl.

Instead, go ahead and update core_tmr_handle_tas_abort()
to compare tmr_sess + cmd->se_sess in order to determine
if the LUN_RESET was received on a different I_T nexus,
and TASK_ABORTED status response needs to be generated.

Reviewed-by: Christoph Hellwig <h...@lst.de>
Cc: Quinn Tran <quinn.t...@qlogic.com>
Cc: Himanshu Madhani <himanshu.madh...@qlogic.com>
Cc: Sagi Grimberg <sa...@mellanox.com>
Cc: Hannes Reinecke <h...@suse.de>
Cc: Andy Grover <agro...@redhat.com>
Cc: Mike Christie <mchri...@redhat.com>
Cc: sta...@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_tmr.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 99bc3d4..8f935fb 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -79,7 +79,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr)
 }
 
 static void core_tmr_handle_tas_abort(
-   struct se_node_acl *tmr_nacl,
+   struct se_session *tmr_sess,
struct se_cmd *cmd,
int tas)
 {
@@ -87,7 +87,7 @@ static void core_tmr_handle_tas_abort(
/*
 * TASK ABORTED status (TAS) bit support
 */
-   if ((tmr_nacl && (tmr_nacl != cmd->se_sess->se_node_acl)) && tas) {
+   if (tmr_sess && tmr_sess != cmd->se_sess && tas) {
remove = false;
transport_send_task_abort(cmd);
}
@@ -281,7 +281,7 @@ static void core_tmr_drain_tmr_list(
 static void core_tmr_drain_state_list(
struct se_device *dev,
struct se_cmd *prout_cmd,
-   struct se_node_acl *tmr_nacl,
+   struct se_session *tmr_sess,
int tas,
struct list_head *preempt_and_abort_list)
 {
@@ -372,7 +372,7 @@ static void core_tmr_drain_state_list(
cancel_work_sync(>work);
transport_wait_for_tasks(cmd);
 
-   core_tmr_handle_tas_abort(tmr_nacl, cmd, tas);
+   core_tmr_handle_tas_abort(tmr_sess, cmd, tas);
target_put_sess_cmd(cmd);
}
 }
@@ -385,6 +385,7 @@ int core_tmr_lun_reset(
 {
struct se_node_acl *tmr_nacl = NULL;
struct se_portal_group *tmr_tpg = NULL;
+   struct se_session *tmr_sess = NULL;
int tas;
 /*
 * TASK_ABORTED status bit, this is configurable via ConfigFS
@@ -403,8 +404,9 @@ int core_tmr_lun_reset(
 * or struct se_device passthrough..
 */
if (tmr && tmr->task_cmd && tmr->task_cmd->se_sess) {
-   tmr_nacl = tmr->task_cmd->se_sess->se_node_acl;
-   tmr_tpg = tmr->task_cmd->se_sess->se_tpg;
+   tmr_sess = tmr->task_cmd->se_sess;
+   tmr_nacl = tmr_sess->se_node_acl;
+   tmr_tpg = tmr_sess->se_tpg;
if (tmr_nacl && tmr_tpg) {
pr_debug("LUN_RESET: TMR caller fabric: %s"
" initiator port %s\n",
@@ -417,7 +419,7 @@ int core_tmr_lun_reset(
dev->transport->name, tas);
 
core_tmr_drain_tmr_list(dev, tmr, preempt_and_abort_list);
-   core_tmr_drain_state_list(dev, prout_cmd, tmr_nacl, tas,
+   core_tmr_drain_state_list(dev, prout_cmd, tmr_sess, tas,
preempt_and_abort_list);
 
/*
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 03/23] target: Inline transport_get_sense_codes()

2018-03-27 Thread Andrei Vagin

From: Bart Van Assche <bart.vanass...@sandisk.com>

ML: 46d5bd62ef9e3d6e2018963cbb725c91f864922d

Inline this function in its call site since it performs a trivial
task and since it is only called once.

Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>
Signed-off-by: Sagi Grimberg <sa...@mellanox.com>
Reviewed-by: Hannes Reinecke <h...@suse.de>
Reviewed-by: Christoph Hellwig <h...@lst.de>
Reviewed-by: Martin K. Petersen <martin.peter...@oracle.com>
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_transport.c | 16 ++--
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/drivers/target/target_core_transport.c 
b/drivers/target/target_core_transport.c
index 495cbb2..66f5438 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2669,17 +2669,6 @@ bool transport_wait_for_tasks(struct se_cmd *cmd)
 }
 EXPORT_SYMBOL(transport_wait_for_tasks);
 
-static int transport_get_sense_codes(
-   struct se_cmd *cmd,
-   u8 *asc,
-   u8 *ascq)
-{
-   *asc = cmd->scsi_asc;
-   *ascq = cmd->scsi_ascq;
-
-   return 0;
-}
-
 static
 void transport_err_sector_info(unsigned char *buffer, sector_t bad_sector)
 {
@@ -2873,9 +2862,8 @@ transport_send_check_condition_and_sense(struct se_cmd 
*cmd,
buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
/* Not Ready */
buffer[SPC_SENSE_KEY_OFFSET] = NOT_READY;
-   transport_get_sense_codes(cmd, , );
-   buffer[SPC_ASC_KEY_OFFSET] = asc;
-   buffer[SPC_ASCQ_KEY_OFFSET] = ascq;
+   buffer[SPC_ASC_KEY_OFFSET] = cmd->scsi_asc;
+   buffer[SPC_ASCQ_KEY_OFFSET] = cmd->scsi_ascq;
break;
case TCM_MISCOMPARE_VERIFY:
/* CURRENT ERROR */
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 04/23] target: Split transport_send_check_condition_and_sense()

2018-03-27 Thread Andrei Vagin

From: Bart Van Assche <bart.vanass...@sandisk.com>

ML: ab78fef4d5f79134042ae0e1e2c259e1226aa5bd

Move the code for translating a sense_reason_t code into a SCSI status
ASC and ASCQ codes from transport_send_check_condition_and_sense() into
the new function translate_sense_reason(). Convert the switch statement
that performs the translation into table-driven code.

Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>
Signed-off-by: Sagi Grimberg <sa...@mellanox.com>
Reviewed-by: Hannes Reinecke <h...@suse.de>
Reviewed-by: Christoph Hellwig <h...@lst.de>
Reviewed-by: Martin K. Petersen <martin.peter...@oracle.com>
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_transport.c | 386 +
 1 file changed, 151 insertions(+), 235 deletions(-)

diff --git a/drivers/target/target_core_transport.c 
b/drivers/target/target_core_transport.c
index 66f5438..8af8049 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2682,13 +2682,158 @@ void transport_err_sector_info(unsigned char *buffer, 
sector_t bad_sector)
put_unaligned_be64(bad_sector, [12]);
 }
 
+struct sense_info {
+   u8 key;
+   u8 asc;
+   u8 ascq;
+   bool add_sector_info;
+};
+
+static const struct sense_info sense_info_table[] = {
+   [TCM_NO_SENSE] = {
+   .key = NOT_READY
+   },
+   [TCM_NON_EXISTENT_LUN] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x25 /* LOGICAL UNIT NOT SUPPORTED */
+   },
+   [TCM_UNSUPPORTED_SCSI_OPCODE] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x20, /* INVALID COMMAND OPERATION CODE */
+   },
+   [TCM_SECTOR_COUNT_TOO_MANY] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x20, /* INVALID COMMAND OPERATION CODE */
+   },
+   [TCM_UNKNOWN_MODE_PAGE] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x24, /* INVALID FIELD IN CDB */
+   },
+   [TCM_CHECK_CONDITION_ABORT_CMD] = {
+   .key = ABORTED_COMMAND,
+   .asc = 0x29, /* BUS DEVICE RESET FUNCTION OCCURRED */
+   .ascq = 0x03,
+   },
+   [TCM_INCORRECT_AMOUNT_OF_DATA] = {
+   .key = ABORTED_COMMAND,
+   .asc = 0x0c, /* WRITE ERROR */
+   .ascq = 0x0d, /* NOT ENOUGH UNSOLICITED DATA */
+   },
+   [TCM_INVALID_CDB_FIELD] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x24, /* INVALID FIELD IN CDB */
+   },
+   [TCM_INVALID_PARAMETER_LIST] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x26, /* INVALID FIELD IN PARAMETER LIST */
+   },
+   [TCM_PARAMETER_LIST_LENGTH_ERROR] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x1a, /* PARAMETER LIST LENGTH ERROR */
+   },
+   [TCM_UNEXPECTED_UNSOLICITED_DATA] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x0c, /* WRITE ERROR */
+   .ascq = 0x0c, /* UNEXPECTED_UNSOLICITED_DATA */
+   },
+   [TCM_SERVICE_CRC_ERROR] = {
+   .key = ABORTED_COMMAND,
+   .asc = 0x47, /* PROTOCOL SERVICE CRC ERROR */
+   .ascq = 0x05, /* N/A */
+   },
+   [TCM_SNACK_REJECTED] = {
+   .key = ABORTED_COMMAND,
+   .asc = 0x11, /* READ ERROR */
+   .ascq = 0x13, /* FAILED RETRANSMISSION REQUEST */
+   },
+   [TCM_WRITE_PROTECTED] = {
+   .key = DATA_PROTECT,
+   .asc = 0x27, /* WRITE PROTECTED */
+   },
+   [TCM_ADDRESS_OUT_OF_RANGE] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x21, /* LOGICAL BLOCK ADDRESS OUT OF RANGE */
+   },
+   [TCM_CHECK_CONDITION_UNIT_ATTENTION] = {
+   .key = UNIT_ATTENTION,
+   },
+   [TCM_CHECK_CONDITION_NOT_READY] = {
+   .key = NOT_READY,
+   },
+   [TCM_MISCOMPARE_VERIFY] = {
+   .key = MISCOMPARE,
+   .asc = 0x1d, /* MISCOMPARE DURING VERIFY OPERATION */
+   .ascq = 0x00,
+   },
+   [TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x10,
+   .ascq = 0x01, /* LOGICAL BLOCK GUARD CHECK FAILED */
+   .add_sector_info = true,
+   },
+   [TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x10,
+   .ascq = 0x02, /* LOGICAL BLOCK APPLICATION TAG CHECK FAILED */
+   .add_sector_info = true,
+   },
+   [TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED] = {
+   .key = ILLEGAL_REQUEST,
+   .asc = 0x10,
+   .ascq = 0x03, /* LOGICAL BLOCK REFERENCE TAG CHECK FAILED */
+   .add_sector_info = true,
+   },

[Devel] [PATCH 00/23] target: backport bug fixes from the upstream kernel

2018-03-27 Thread Andrei Vagin

Alexei Potashnik (1):
  target/iscsi: Fix double free of a TUR followed by a solicited NOPOUT

Bart Van Assche (2):
  target: Inline transport_get_sense_codes()
  target: Split transport_send_check_condition_and_sense()

Christoph Hellwig (1):
  target: fix DPO and FUA bit checks

Dinesh Israni (1):
  target: Don't override EXTENDED_COPY xcopy_pt_cmd SCSI status code

Himanshu Madhani (1):
  target: Fix target_release_cmd_kref shutdown comp leak

Jamie Pocas (1):
  target/sbc: Add LBPRZ attribute + control CDB emulation

Jan Engelhardt (1):
  target: fix COMPARE_AND_WRITE non zero SGL offset data corruption

Jiang Yi (1):
  target: reject COMPARE_AND_WRITE if emulate_caw is not set

Max Lohrmann (1):
  target: Fix VERIFY_16 handling in sbc_parse_cdb

Mike Christie (2):
  target: Fix WRITE_SAME/DISCARD conversion to linux 512b sectors
  target: Fix max_unmap_lba_count calc overflow

Nicholas Bellinger (9):
  target: Make EXTENDED_COPY 0xe4 failure return COPY TARGET DEVICE NOT
REACHABLE
  target: Use correct SCSI status during EXTENDED_COPY exception
  iscsi-target: Fix potential dead-lock during node acl delete
  target: Fix race with SCF_SEND_DELAYED_TAS handling
  target: Fix LUN_RESET active I/O handling for ACK_KREF
  target: Fix LUN_RESET active TMR descriptor handling
  target: Fix TAS handling for multi-session se_node_acls
  target: Fix remote-port TMR ABORT + se_cmd fabric stop
  target: Drop incorrect ABORT_TASK put for completed commands

Varun Prakash (2):
  iscsi-target: fix memory leak in iscsit_setup_text_cmd()
  iscsi-target: fix invalid flags in text response

 drivers/target/iscsi/iscsi_target.c  |   9 +-
 drivers/target/iscsi/iscsi_target_configfs.c |  16 +-
 drivers/target/target_core_device.c  |  76 +++-
 drivers/target/target_core_file.c|  32 +-
 drivers/target/target_core_iblock.c  |  55 +--
 drivers/target/target_core_internal.h|   2 +
 drivers/target/target_core_sbc.c |  25 +-
 drivers/target/target_core_spc.c |  12 +-
 drivers/target/target_core_tmr.c |  89 +++-
 drivers/target/target_core_transport.c   | 636 +++
 drivers/target/target_core_xcopy.c   |  36 +-
 include/target/target_core_backend.h |   4 +
 include/target/target_core_base.h|   9 +-
 13 files changed, 566 insertions(+), 435 deletions(-)

-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 21/23] iscsi-target: fix memory leak in iscsit_setup_text_cmd()

2018-03-27 Thread Andrei Vagin

From: Varun Prakash <va...@chelsio.com>

ML: ea8dc5b4cd2195ee582cae28afa4164c6dea1738

On receiving text request iscsi-target allocates buffer for
payload in iscsit_handle_text_cmd() and assigns buffer pointer
to cmd->text_in_ptr, this buffer is currently freed in
iscsit_release_cmd(), if iscsi-target sets 'C' bit in text
response then it will receive another text request from the
initiator with ttt != 0x in this case iscsi-target
will find cmd using itt and call iscsit_setup_text_cmd()
which will set cmd->text_in_ptr to NULL without freeing
previously allocated buffer.

This patch fixes this issue by calling kfree(cmd->text_in_ptr)
in iscsit_setup_text_cmd() before assigning NULL to it.

For the first text request cmd->text_in_ptr is NULL as
cmd is memset to 0 in iscsit_allocate_cmd().

Signed-off-by: Varun Prakash <va...@chelsio.com>
Cc: <sta...@vger.kernel.org> # 4.0+
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/iscsi/iscsi_target.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/target/iscsi/iscsi_target.c 
b/drivers/target/iscsi/iscsi_target.c
index ea8573e..aeba04a 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -2164,6 +2164,7 @@ iscsit_setup_text_cmd(struct iscsi_conn *conn, struct 
iscsi_cmd *cmd,
cmd->cmd_sn = be32_to_cpu(hdr->cmdsn);
cmd->exp_stat_sn= be32_to_cpu(hdr->exp_statsn);
cmd->data_direction = DMA_NONE;
+   kfree(cmd->text_in_ptr);
cmd->text_in_ptr= NULL;
 
return 0;
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 14/23] target: fix DPO and FUA bit checks

2018-03-27 Thread Andrei Vagin

From: Christoph Hellwig <h...@lst.de>

ML: 814e5b45182f4aaf6c0b0deac7104bc2cba5109e

Drivers may override the WCE flag, in which case the DPOFUA flag in
MODE SENSE might differ from the check used to reject invalid FUA
bits in sbc_check_dpofua.  Also now that we reject invalid FUA
bits early there is no need to duplicate the same buggy check
down in the fileio code.

As the DPOFUA flag controls th support for FUA bits on read and
write commands as well as DPO key off all the checks off a single
helper, and deprecate the emulate_dpo and emulate_fua_read attributs.

This fixes various failures in the libiscsi testsuite.

Personally I'd prefer to also remove the emulate_fua_write attribute
as there is no good reason to disable it, but I'll leave that for
a separate discussion.

Signed-off-by: Christoph Hellwig <h...@lst.de>
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_device.c| 30 +++---
 drivers/target/target_core_file.c  |  4 +---
 drivers/target/target_core_internal.h  |  2 ++
 drivers/target/target_core_sbc.c   |  5 +++--
 drivers/target/target_core_spc.c   | 12 
 drivers/target/target_core_transport.c | 19 +++
 include/target/target_core_base.h  |  6 --
 7 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/drivers/target/target_core_device.c 
b/drivers/target/target_core_device.c
index 81156de..7dfe964 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -794,16 +794,8 @@ EXPORT_SYMBOL(se_dev_set_emulate_model_alias);
 
 int se_dev_set_emulate_dpo(struct se_device *dev, int flag)
 {
-   if (flag != 0 && flag != 1) {
-   pr_err("Illegal value %d\n", flag);
-   return -EINVAL;
-   }
-
-   if (flag) {
-   pr_err("dpo_emulated not supported\n");
-   return -EINVAL;
-   }
-
+   printk_once(KERN_WARNING
+   "ignoring deprecated emulate_dpo attribute\n");
return 0;
 }
 EXPORT_SYMBOL(se_dev_set_emulate_dpo);
@@ -833,16 +825,8 @@ EXPORT_SYMBOL(se_dev_set_emulate_fua_write);
 
 int se_dev_set_emulate_fua_read(struct se_device *dev, int flag)
 {
-   if (flag != 0 && flag != 1) {
-   pr_err("Illegal value %d\n", flag);
-   return -EINVAL;
-   }
-
-   if (flag) {
-   pr_err("ua read emulated not supported\n");
-   return -EINVAL;
-   }
-
+   printk_once(KERN_WARNING
+   "ignoring deprecated emulate_fua_read attribute\n");
return 0;
 }
 EXPORT_SYMBOL(se_dev_set_emulate_fua_read);
@@ -1547,9 +1531,9 @@ struct se_device *target_alloc_device(struct se_hba *hba, 
const char *name)
 
dev->dev_attrib.da_dev = dev;
dev->dev_attrib.emulate_model_alias = DA_EMULATE_MODEL_ALIAS;
-   dev->dev_attrib.emulate_dpo = DA_EMULATE_DPO;
-   dev->dev_attrib.emulate_fua_write = DA_EMULATE_FUA_WRITE;
-   dev->dev_attrib.emulate_fua_read = DA_EMULATE_FUA_READ;
+   dev->dev_attrib.emulate_dpo = 1;
+   dev->dev_attrib.emulate_fua_write = 1;
+   dev->dev_attrib.emulate_fua_read = 1;
dev->dev_attrib.emulate_write_cache = DA_EMULATE_WRITE_CACHE;
dev->dev_attrib.emulate_ua_intlck_ctrl = DA_EMULATE_UA_INTLLCK_CTRL;
dev->dev_attrib.emulate_tas = DA_EMULATE_TAS;
diff --git a/drivers/target/target_core_file.c 
b/drivers/target/target_core_file.c
index 5c92a08..4e87701 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -726,9 +726,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, 
u32 sgl_nents,
 * for SCSI WRITEs with Forced Unit Access (FUA) set.
 * Allow this to happen independent of WCE=0 setting.
 */
-   if (ret > 0 &&
-   dev->dev_attrib.emulate_fua_write > 0 &&
-   (cmd->se_cmd_flags & SCF_FUA)) {
+   if (ret > 0 && (cmd->se_cmd_flags & SCF_FUA)) {
struct fd_dev *fd_dev = FD_DEV(dev);
loff_t start = cmd->t_task_lba *
dev->dev_attrib.block_size;
diff --git a/drivers/target/target_core_internal.h 
b/drivers/target/target_core_internal.h
index 60381db..06f372c 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -78,6 +78,8 @@ int   transport_clear_lun_ref(struct se_lun *);
 void   transport_send_task_abort(struct se_cmd *);
 sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size);
 void   target_qf_do_work(struct work_struct *work);
+bool   target_check_wce(struct se_device *dev);
+bool   target_c

[Devel] [PATCH 06/23] target: Don't override EXTENDED_COPY xcopy_pt_cmd SCSI status code

2018-03-27 Thread Andrei Vagin

From: Dinesh Israni <d...@datera.io>

ML: 926317de33998c112c5510301868ea9aa34097e2

This patch addresses a bug where a local EXTENDED_COPY WRITE or READ
backend I/O request would always return SAM_STAT_CHECK_CONDITION,
even if underlying xcopy_pt_cmd->se_cmd generated a different
SCSI status code.

ESX host environments expect to hit SAM_STAT_RESERVATION_CONFLICT
for certain scenarios, and SAM_STAT_CHECK_CONDITION results in
non-retriable status for these cases.

Tested on v4.1.y with ESX v5.5u2+ with local IBLOCK backend copy.

Reported-by: Nixon Vincent <nixon.vinc...@calsoftinc.com>
Tested-by: Nixon Vincent <nixon.vinc...@calsoftinc.com>
Cc: Nixon Vincent <nixon.vinc...@calsoftinc.com>
Tested-by: Dinesh Israni <d...@datera.io>
Signed-off-by: Dinesh Israni <d...@datera.io>
Cc: Dinesh Israni <d...@datera.io>
Cc: sta...@vger.kernel.org # 3.14+
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_xcopy.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c 
b/drivers/target/target_core_xcopy.c
index 5cfb382..9f5a002 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -707,6 +707,7 @@ static int target_xcopy_read_source(
rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, src_dev, [0],
remote_port, true);
if (rc < 0) {
+   ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status;
transport_generic_free_cmd(se_cmd, 0);
return rc;
}
@@ -718,6 +719,7 @@ static int target_xcopy_read_source(
 
rc = target_xcopy_issue_pt_cmd(xpt_cmd);
if (rc < 0) {
+   ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status;
transport_generic_free_cmd(se_cmd, 0);
return rc;
}
@@ -768,6 +770,7 @@ static int target_xcopy_write_destination(
remote_port, false);
if (rc < 0) {
struct se_cmd *src_cmd = >src_pt_cmd->se_cmd;
+   ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status;
/*
 * If the failure happened before the t_mem_list hand-off in
 * target_xcopy_setup_pt_cmd(), Reset memory + clear flag so 
that
@@ -783,6 +786,7 @@ static int target_xcopy_write_destination(
 
rc = target_xcopy_issue_pt_cmd(xpt_cmd);
if (rc < 0) {
+   ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status;
se_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC;
transport_generic_free_cmd(se_cmd, 0);
return rc;
@@ -869,10 +873,14 @@ static void target_xcopy_do_work(struct work_struct *work)
 out:
xcopy_pt_undepend_remotedev(xop);
kfree(xop);
-
-   pr_warn_ratelimited("target_xcopy_do_work: rc: %d, Setting X-COPY 
CHECK_CONDITION"
-   " -> sending response\n", rc);
-   ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION;
+   /*
+* Don't override an error scsi status if it has already been set
+*/
+   if (ec_cmd->scsi_status == SAM_STAT_GOOD) {
+   pr_warn_ratelimited("target_xcopy_do_work: rc: %d, Setting 
X-COPY"
+   " CHECK_CONDITION -> sending response\n", rc);
+   ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION;
+   }
target_complete_cmd(ec_cmd, SAM_STAT_CHECK_CONDITION);
 }
 
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 15/23] target: Fix LUN_RESET active I/O handling for ACK_KREF

2018-03-27 Thread Andrei Vagin

From: Nicholas Bellinger <n...@linux-iscsi.org>

ML: febe562c20dfa8f33bee7d419c6b517986a5aa33

This patch fixes a NULL pointer se_cmd->cmd_kref < 0
refcount bug during TMR LUN_RESET with active se_cmd
I/O, that can be triggered during se_cmd descriptor
shutdown + release via core_tmr_drain_state_list() code.

To address this bug, add common __target_check_io_state()
helper for ABORT_TASK + LUN_RESET w/ CMD_T_COMPLETE
checking, and set CMD_T_ABORTED + obtain ->cmd_kref for
both cases ahead of last target_put_sess_cmd() after
TFO->aborted_task() -> transport_cmd_finish_abort()
callback has completed.

It also introduces SCF_ACK_KREF to determine when
transport_cmd_finish_abort() needs to drop the second
extra reference, ahead of calling target_put_sess_cmd()
for the final kref_put(_cmd->cmd_kref).

It also updates transport_cmd_check_stop() to avoid
holding se_cmd->t_state_lock while dropping se_cmd
device state via target_remove_from_state_list(), now
that core_tmr_drain_state_list() is holding the
se_device lock while checking se_cmd state from
within TMR logic.

Finally, move transport_put_cmd() release of SGL +
TMR + extended CDB memory into target_free_cmd_mem()
in order to avoid potential resource leaks in TMR
ABORT_TASK + LUN_RESET code-paths.  Also update
target_release_cmd_kref() accordingly.

Reviewed-by: Quinn Tran <quinn.t...@qlogic.com>
Cc: Himanshu Madhani <himanshu.madh...@qlogic.com>
Cc: Sagi Grimberg <sa...@mellanox.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Hannes Reinecke <h...@suse.de>
Cc: Andy Grover <agro...@redhat.com>
Cc: Mike Christie <mchri...@redhat.com>
Cc: sta...@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_tmr.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 4fe985a..e2c9672 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -177,7 +177,6 @@ void core_tmr_abort_task(
cancel_work_sync(_cmd->work);
transport_wait_for_tasks(se_cmd);
 
-
if (se_cmd->se_cmd_flags & SCF_SE_LUN_CMD)
atomic_long_inc(_cmd->se_lun->lun_stats.aborts);
 
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 10/23] iscsi-target: Fix potential dead-lock during node acl delete

2018-03-27 Thread Andrei Vagin

From: Nicholas Bellinger <n...@linux-iscsi.org>

ML: 26a99c19f810b2593410899a5b304b21b47428a6

This patch is a iscsi-target specific bug-fix for a dead-lock
that can occur during explicit struct se_node_acl->acl_group
se_session deletion via configfs rmdir(2), when iscsi-target
time2retain timer is still active.

It changes iscsi-target to obtain se_portal_group->session_lock
internally using spin_in_locked() to check for the specific
se_node_acl configfs shutdown rmdir(2) case.

Note this patch is intended for stable, and the subsequent
v4.5-rc patch converts target_core_tpg.c to use proper
se_sess->sess_kref reference counting for both se_node_acl
deletion + se_node_acl->queue_depth se_session restart.

Reported-by:: Sagi Grimberg <sa...@mellanox.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Hannes Reinecke <h...@suse.de>
Cc: Andy Grover <agro...@redhat.com>
Cc: Mike Christie <micha...@cs.wisc.edu>
Cc: sta...@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/iscsi/iscsi_target_configfs.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/target/iscsi/iscsi_target_configfs.c 
b/drivers/target/iscsi/iscsi_target_configfs.c
index 6d754cf..f43e975 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -2016,7 +2016,8 @@ static void lio_tpg_release_fabric_acl(
 }
 
 /*
- * Called with spin_lock_bh(struct se_portal_group->session_lock) held..
+ * Called with spin_lock_irq(struct se_portal_group->session_lock) held
+ * or not held.
  *
  * Also, this function calls iscsit_inc_session_usage_count() on the
  * struct iscsi_session in question.
@@ -2024,19 +2025,32 @@ static void lio_tpg_release_fabric_acl(
 static int lio_tpg_shutdown_session(struct se_session *se_sess)
 {
struct iscsi_session *sess = se_sess->fabric_sess_ptr;
+   struct se_portal_group *se_tpg = se_sess->se_tpg;
+   bool local_lock = false;
+
+   if (!spin_is_locked(_tpg->session_lock)) {
+   spin_lock_irq(_tpg->session_lock);
+   local_lock = true;
+   }
 
spin_lock(>conn_lock);
if (atomic_read(>session_fall_back_to_erl0) ||
atomic_read(>session_logout) ||
(sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) {
spin_unlock(>conn_lock);
+   if (local_lock)
+   spin_unlock_irq(>conn_lock);
return 0;
}
atomic_set(>session_reinstatement, 1);
spin_unlock(>conn_lock);
 
iscsit_stop_time2retain_timer(sess);
+   spin_unlock_irq(_tpg->session_lock);
+
iscsit_stop_session(sess, 1, 1);
+   if (!local_lock)
+   spin_lock_irq(_tpg->session_lock);
 
return 1;
 }
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH] target/iscsi: add an ability to set io limits for iscsi targets

2018-03-21 Thread Andrei Vagin

On Wed, Mar 14, 2018 at 11:01:43AM +0300, Vasily Averin wrote:
> See small comment below
> 
> On 2018-03-14 01:48, Andrei Vagin wrote:
> > This patch add an ability to set a blkio cgroup for an iscsi target.
> > 
> > When a new client is connected, the kernel creates two kernel threads and
> > run all io requests from them, this means that we can set a blkio group
> > for these threads and set io limits via this group.
> > 
> > Here is an exampe how this works:
> > 
> >   $ TGT_PATH=/sys/kernel/config/target/iscsi/iqn.2014-06.com.vstorage\:test
> >   $ CG_PATH=/sys/fs/cgroup/blkio/system.slice/vstorage-iscsi/
> > 
> >   # create a group
> >   $ mkdir -p $CG_PATH
> > 
> >   # set cgroup for iscsi target
> >   $ bash -c 'echo $$ > $CG_PATH/tasks &&
> >  echo 1 > $TGT_PATH/tpgt_1/param/BlkioCgroup'
> >   $ cat $TGT_PATH/tpgt_1/param/BlkioCgroup
> >   /system.slice/vstorage-iscsi
> > 
> >   # attach iscsi target
> >   $ IQN=iqn.2014-06.com.vstorage:test
> >   $ iscsiadm -m node -T $IQN -l
> >   Logging in to [iface: default, target: iqn.2014-06.com.vstorage:test,
> >   portal: 10.94.120.187,3260] (multiple)
> >   Login to [iface: default, target: iqn.2014-06.com.vstorage:test, portal:
> >   10.94.120.187,3260] successful.
> > 
> >   # check that iscsi threads in the required cgroup
> >   $ ps -C iscsi_ttx
> >PID TTY  TIME CMD
> >4097 ?00:00:00 iscsi_ttx
> >   $ cat /proc/4097/cgroup | grep blkio
> >   1:blkio:/system.slice/vstorage-iscsi
> > 
> >   # set io limits for a target backing store device
> >   $ ploop list
> >   ploop15810 
> > /mnt/vstorage/vols/iscsi/iqn.2014-06.com.vstorage:test/lun1/ploop
> >   $ ls -l /dev/ploop15810
> >   brw-rw 1 root disk 182, 252960 Mar 14 00:59 /dev/ploop15810
> >   $ echo "182:252960 6291456" > $CG_PATH/blkio.throttle.read_bps_device
> > 
> >   # check that limits work as expected
> >   $ dd if=/dev/sda of=/dev/null bs=10M count=10
> >   10+0 records in
> >   10+0 records out
> >   104857600 bytes (105 MB) copied, 17.3534 s, 6.0 MB/s
> > 
> > Signed-off-by: Andrei Vagin <ava...@openvz.org>
> > ---
> >  drivers/target/iscsi/iscsi_target_configfs.c | 46 
> > 
> >  drivers/target/iscsi/iscsi_target_login.c| 34 +++-
> >  drivers/target/iscsi/iscsi_target_tpg.c  | 36 ++
> >  drivers/target/iscsi/iscsi_target_tpg.h  |  2 ++
> >  include/target/iscsi/iscsi_target_core.h |  2 ++
> >  kernel/cgroup.c  |  1 +
> >  6 files changed, 120 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/target/iscsi/iscsi_target_configfs.c 
> > b/drivers/target/iscsi/iscsi_target_configfs.c
> > index e6631ab..6d754cf 100644
> > --- a/drivers/target/iscsi/iscsi_target_configfs.c
> > +++ b/drivers/target/iscsi/iscsi_target_configfs.c
> > @@ -1412,6 +1412,51 @@ TPG_PARAM_ATTR(IFMarkInt, S_IRUGO | S_IWUSR);
> >  DEF_TPG_PARAM(OFMarkInt);
> >  TPG_PARAM_ATTR(OFMarkInt, S_IRUGO | S_IWUSR);
> >  
> > +static ssize_t iscsi_tpg_param_show_BlkioCgroup(
> > +   struct se_portal_group *se_tpg,
> > +   char *page)
> > +{
> > +   struct iscsi_portal_group *tpg = container_of(se_tpg,
> > +   struct iscsi_portal_group, tpg_se_tpg);
> > +   ssize_t rb;
> > +
> > +   if (iscsit_get_tpg(tpg) < 0)
> > +   return -EINVAL;
> > +
> > +   rb = iscsit_ta_tpg_show_blkcg(tpg, page);
> > +   iscsit_put_tpg(tpg);
> > +   return rb;
> > +}
> > +
> > +static ssize_t iscsi_tpg_param_store_BlkioCgroup(
> > +   struct se_portal_group *se_tpg,
> > +   const char *page,
> > +   size_t count)
> > +{
> > +   struct iscsi_portal_group *tpg = container_of(se_tpg,
> > +   struct iscsi_portal_group, tpg_se_tpg);
> > +   u32 val;
> > +   int ret;
> > +
> > +   if (iscsit_get_tpg(tpg) < 0)
> > +   return -EINVAL;
> > +
> > +   ret = kstrtou32(page, 0, );
> > +   if (ret)
> > +   goto out;
> > +   ret = iscsit_ta_tpg_set_blkcg(tpg, val);
> > +   if (ret < 0)
> > +   goto out;
> > +
> > +   iscsit_put_tpg(tpg);
> > +   return count;
> > +out:
> > +   iscsit_put_tpg(tpg);
> > +   return ret;
> > +}
> > +
> > +TPG_PARAM_ATTR(BlkioCgroup, S_IRUGO | S_IWUSR);
> > +
> >  static struct configfs_

Re: [Devel] [PATCH vz7] net: Skip IP_FREEBIND for ipv6 SOCK_RAW sockets

2018-03-21 Thread Andrei Vagin

On Wed, Mar 21, 2018 at 01:27:35PM +0300, Kirill Tkhai wrote:
> On 21.03.2018 07:43, Andrei Vagin wrote:
> > On Wed, Mar 21, 2018 at 03:07:12AM +0300, Kirill Tkhai wrote:
> >> On 21.03.2018 02:18, Andrei Vagin wrote:
> >>> On Tue, Mar 20, 2018 at 02:07:51PM +0300, Kirill Tkhai wrote:
> >>>> IP_FREEBIND is not supported for SOCK_RAW ipv6 sockets.
> >>>> See kernel rawv6_setsockopt() for the details.
> >>>
> >>> inet_bind() can fail for ipv6 addresses without this option:
> >>>
> >>> /*
> >>>  * ipv6 addresses go through a “tentative” phase and
> >>>  * sockets could not be bound to them in this moment
> >>>  * without setting IP_FREEBIND.
> >>>  */
> >>>
> >>> Maybe we need to add support of this option for raw socket in the kernel?
> >>
> >> There are already: net->ipv4.sysctl_ip_nonlocal_bind and 
> >> net->ipv6.sysctl.ip_nonlocal_bind.
> >> Don't they fulfill our requirements?
> > 
> > Yes, they do.
> > 
> >> We just need to use them in code in general,
> >> when we port vz7 functionality to ml criu.
> > 
> > When are you going to do this?
> 
> I'm not going to port raw socket functionality to vz7, since this is Cyrill's 
> patch.
> 
> I can replace IP_FREEBIND workarounds with sysctl.ip_nonlocal_bind in vanila 
> criu instead.

Ok.

> 
> Kirill
> 
> >>
> >>>>
> >>>> JFI:
> >>>> For the rest of sockets, where they begin to support it:
> >>>> the actual magic happens in do_ipv6_setsockopt(), and
> >>>> IPV6_ADDRFORM optname changes sk->sk_prot to refer
> >>>> to tcp_prot, udp_prot and udplite_prot, which contains
> >>>> link to ip_setsockopt().
> >>>>
> >>>> Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com>
> >>>> ---
> >>>>  criu/sk-inet.c |2 +-
> >>>>  1 file changed, 1 insertion(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/criu/sk-inet.c b/criu/sk-inet.c
> >>>> index 941de1863..4fccda48c 100644
> >>>> --- a/criu/sk-inet.c
> >>>> +++ b/criu/sk-inet.c
> >>>> @@ -813,7 +813,7 @@ int inet_bind(int sk, struct inet_sk_info *ii)
> >>>>   * sockets could not be bound to them in this moment
> >>>>   * without setting IP_FREEBIND.
> >>>>   */
> >>>> -if (ii->ie->family == AF_INET6 && ii->ie->proto != IPPROTO_RAW) 
> >>>> {
> >>>> +if (ii->ie->family == AF_INET6 && ii->ie->type != SOCK_RAW) {
> >>>>  int yes = 1;
> >>>>  
> >>>>  if (restore_opt(sk, SOL_IP, IP_FREEBIND, ))
> >>>>
> >>>> ___
> >>>> Devel mailing list
> >>>> Devel@openvz.org
> >>>> https://lists.openvz.org/mailman/listinfo/devel
> >>
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH vz7] net: Skip IP_FREEBIND for ipv6 SOCK_RAW sockets

2018-03-20 Thread Andrei Vagin

On Wed, Mar 21, 2018 at 03:07:12AM +0300, Kirill Tkhai wrote:
> On 21.03.2018 02:18, Andrei Vagin wrote:
> > On Tue, Mar 20, 2018 at 02:07:51PM +0300, Kirill Tkhai wrote:
> >> IP_FREEBIND is not supported for SOCK_RAW ipv6 sockets.
> >> See kernel rawv6_setsockopt() for the details.
> > 
> > inet_bind() can fail for ipv6 addresses without this option:
> > 
> > /*
> >  * ipv6 addresses go through a “tentative” phase and
> >  * sockets could not be bound to them in this moment
> >  * without setting IP_FREEBIND.
> >  */
> > 
> > Maybe we need to add support of this option for raw socket in the kernel?
> 
> There are already: net->ipv4.sysctl_ip_nonlocal_bind and 
> net->ipv6.sysctl.ip_nonlocal_bind.
> Don't they fulfill our requirements?

Yes, they do.

> We just need to use them in code in general,
> when we port vz7 functionality to ml criu.

When are you going to do this?

> 
> >>
> >> JFI:
> >> For the rest of sockets, where they begin to support it:
> >> the actual magic happens in do_ipv6_setsockopt(), and
> >> IPV6_ADDRFORM optname changes sk->sk_prot to refer
> >> to tcp_prot, udp_prot and udplite_prot, which contains
> >> link to ip_setsockopt().
> >>
> >> Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com>
> >> ---
> >>  criu/sk-inet.c |2 +-
> >>  1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/criu/sk-inet.c b/criu/sk-inet.c
> >> index 941de1863..4fccda48c 100644
> >> --- a/criu/sk-inet.c
> >> +++ b/criu/sk-inet.c
> >> @@ -813,7 +813,7 @@ int inet_bind(int sk, struct inet_sk_info *ii)
> >> * sockets could not be bound to them in this moment
> >> * without setting IP_FREEBIND.
> >> */
> >> -  if (ii->ie->family == AF_INET6 && ii->ie->proto != IPPROTO_RAW) {
> >> +  if (ii->ie->family == AF_INET6 && ii->ie->type != SOCK_RAW) {
> >>int yes = 1;
> >>  
> >>if (restore_opt(sk, SOL_IP, IP_FREEBIND, ))
> >>
> >> ___
> >> Devel mailing list
> >> Devel@openvz.org
> >> https://lists.openvz.org/mailman/listinfo/devel
> 
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH vz7] net: Skip IP_FREEBIND for ipv6 SOCK_RAW sockets

2018-03-20 Thread Andrei Vagin

On Tue, Mar 20, 2018 at 02:07:51PM +0300, Kirill Tkhai wrote:
> IP_FREEBIND is not supported for SOCK_RAW ipv6 sockets.
> See kernel rawv6_setsockopt() for the details.

inet_bind() can fail for ipv6 addresses without this option:

/*
 * ipv6 addresses go through a “tentative” phase and
 * sockets could not be bound to them in this moment
 * without setting IP_FREEBIND.
 */

Maybe we need to add support of this option for raw socket in the kernel?

> 
> JFI:
> For the rest of sockets, where they begin to support it:
> the actual magic happens in do_ipv6_setsockopt(), and
> IPV6_ADDRFORM optname changes sk->sk_prot to refer
> to tcp_prot, udp_prot and udplite_prot, which contains
> link to ip_setsockopt().
> 
> Signed-off-by: Kirill Tkhai 
> ---
>  criu/sk-inet.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/criu/sk-inet.c b/criu/sk-inet.c
> index 941de1863..4fccda48c 100644
> --- a/criu/sk-inet.c
> +++ b/criu/sk-inet.c
> @@ -813,7 +813,7 @@ int inet_bind(int sk, struct inet_sk_info *ii)
>* sockets could not be bound to them in this moment
>* without setting IP_FREEBIND.
>*/
> - if (ii->ie->family == AF_INET6 && ii->ie->proto != IPPROTO_RAW) {
> + if (ii->ie->family == AF_INET6 && ii->ie->type != SOCK_RAW) {
>   int yes = 1;
>  
>   if (restore_opt(sk, SOL_IP, IP_FREEBIND, ))
> 
> ___
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH] target/iscsi: add an ability to set io limits for iscsi targets

2018-03-13 Thread Andrei Vagin

This patch add an ability to set a blkio cgroup for an iscsi target.

When a new client is connected, the kernel creates two kernel threads and
run all io requests from them, this means that we can set a blkio group
for these threads and set io limits via this group.

Here is an exampe how this works:

  $ TGT_PATH=/sys/kernel/config/target/iscsi/iqn.2014-06.com.vstorage\:test
  $ CG_PATH=/sys/fs/cgroup/blkio/system.slice/vstorage-iscsi/

  # create a group
  $ mkdir -p $CG_PATH

  # set cgroup for iscsi target
  $ bash -c 'echo $$ > $CG_PATH/tasks &&
 echo 1 > $TGT_PATH/tpgt_1/param/BlkioCgroup'
  $ cat $TGT_PATH/tpgt_1/param/BlkioCgroup
  /system.slice/vstorage-iscsi

  # attach iscsi target
  $ IQN=iqn.2014-06.com.vstorage:test
  $ iscsiadm -m node -T $IQN -l
  Logging in to [iface: default, target: iqn.2014-06.com.vstorage:test,
  portal: 10.94.120.187,3260] (multiple)
  Login to [iface: default, target: iqn.2014-06.com.vstorage:test, portal:
  10.94.120.187,3260] successful.

  # check that iscsi threads in the required cgroup
  $ ps -C iscsi_ttx
   PID TTY  TIME CMD
   4097 ?00:00:00 iscsi_ttx
  $ cat /proc/4097/cgroup | grep blkio
  1:blkio:/system.slice/vstorage-iscsi

  # set io limits for a target backing store device
  $ ploop list
  ploop15810 /mnt/vstorage/vols/iscsi/iqn.2014-06.com.vstorage:test/lun1/ploop
  $ ls -l /dev/ploop15810
  brw-rw 1 root disk 182, 252960 Mar 14 00:59 /dev/ploop15810
  $ echo "182:252960 6291456" > $CG_PATH/blkio.throttle.read_bps_device

  # check that limits work as expected
  $ dd if=/dev/sda of=/dev/null bs=10M count=10
  10+0 records in
  10+0 records out
  104857600 bytes (105 MB) copied, 17.3534 s, 6.0 MB/s

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/iscsi/iscsi_target_configfs.c | 46 
 drivers/target/iscsi/iscsi_target_login.c| 34 +++-
 drivers/target/iscsi/iscsi_target_tpg.c  | 36 ++
 drivers/target/iscsi/iscsi_target_tpg.h  |  2 ++
 include/target/iscsi/iscsi_target_core.h |  2 ++
 kernel/cgroup.c  |  1 +
 6 files changed, 120 insertions(+), 1 deletion(-)

diff --git a/drivers/target/iscsi/iscsi_target_configfs.c 
b/drivers/target/iscsi/iscsi_target_configfs.c
index e6631ab..6d754cf 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -1412,6 +1412,51 @@ TPG_PARAM_ATTR(IFMarkInt, S_IRUGO | S_IWUSR);
 DEF_TPG_PARAM(OFMarkInt);
 TPG_PARAM_ATTR(OFMarkInt, S_IRUGO | S_IWUSR);
 
+static ssize_t iscsi_tpg_param_show_BlkioCgroup(
+   struct se_portal_group *se_tpg,
+   char *page)
+{
+   struct iscsi_portal_group *tpg = container_of(se_tpg,
+   struct iscsi_portal_group, tpg_se_tpg);
+   ssize_t rb;
+
+   if (iscsit_get_tpg(tpg) < 0)
+   return -EINVAL;
+
+   rb = iscsit_ta_tpg_show_blkcg(tpg, page);
+   iscsit_put_tpg(tpg);
+   return rb;
+}
+
+static ssize_t iscsi_tpg_param_store_BlkioCgroup(
+   struct se_portal_group *se_tpg,
+   const char *page,
+   size_t count)
+{
+   struct iscsi_portal_group *tpg = container_of(se_tpg,
+   struct iscsi_portal_group, tpg_se_tpg);
+   u32 val;
+   int ret;
+
+   if (iscsit_get_tpg(tpg) < 0)
+   return -EINVAL;
+
+   ret = kstrtou32(page, 0, );
+   if (ret)
+   goto out;
+   ret = iscsit_ta_tpg_set_blkcg(tpg, val);
+   if (ret < 0)
+   goto out;
+
+   iscsit_put_tpg(tpg);
+   return count;
+out:
+   iscsit_put_tpg(tpg);
+   return ret;
+}
+
+TPG_PARAM_ATTR(BlkioCgroup, S_IRUGO | S_IWUSR);
+
 static struct configfs_attribute *lio_target_tpg_param_attrs[] = {
_tpg_param_AuthMethod.attr,
_tpg_param_HeaderDigest.attr,
@@ -1434,6 +1479,7 @@ static struct configfs_attribute 
*lio_target_tpg_param_attrs[] = {
_tpg_param_OFMarker.attr,
_tpg_param_IFMarkInt.attr,
_tpg_param_OFMarkInt.attr,
+   _tpg_param_BlkioCgroup.attr,
NULL,
 };
 
diff --git a/drivers/target/iscsi/iscsi_target_login.c 
b/drivers/target/iscsi/iscsi_target_login.c
index c20b561..4f59416 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -712,8 +713,18 @@ static void iscsi_post_login_start_timers(struct 
iscsi_conn *conn)
 
 int iscsit_start_kthreads(struct iscsi_conn *conn)
 {
+   struct iscsi_portal_group *tpg = conn->tpg;
+   struct cgroup_subsys_state *blk_css = NULL;
int ret = 0;
 
+   if (iscsit_get_tpg(tpg) < 0)
+   return -EINVAL;
+   if (tpg->blk_css) {
+   blk_css = tpg->blk_css;
+   css_get(blk_css);
+   }
+   iscsit_put_tpg(tpg);
+

Re: [Devel] [PATCH] ploop: don't forget to dec preq->io_count on a error path

2018-03-02 Thread Andrei Vagin

On Thu, Mar 01, 2018 at 01:19:10PM +0300, Vasily Averin wrote:
> Andrey,
> please take look at comment below.
> 
> On 2018-02-15 04:27, Andrei Vagin wrote:
> > [This sender failed our fraud detection checks and may not be who they 
> > appear to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing]
> > 
> > preq->io_count is incrimented before calling aio_kernel_submit()
> > and it is decrimented in kaio_rw_aio_complete().
> > 
> > But if aio_kernel_submit() failed , preq->io_count has to be decrimented
> > before exiting from the function.
> > ---
> >  drivers/block/ploop/io_kaio.c | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
> > index 2e48d13..6d922f5 100644
> > --- a/drivers/block/ploop/io_kaio.c
> > +++ b/drivers/block/ploop/io_kaio.c
> > @@ -686,6 +686,7 @@ kaio_io_page(struct ploop_io * io, int op, struct 
> > ploop_request * preq,
> >err, (op == IOCB_CMD_WRITE_ITER) ? "WRITE" : "READ",
> >preq->eng_state, preq->state, pos);
> > PLOOP_REQ_SET_ERROR(preq, err);
> > +   ploop_complete_io_request(preq);
> 
> Kostja pointed me this patch for vz6.
> 
> vz6 seems is affected too,
> however I think it's better to use atomic_dec(>io_count) here:
> it should balance atomic_inc called before aio_kernel_submit()
> and should not double ploop_complete_io_request called below.

There is nothing bad to call ploop_complete_io_request() twice. In a
success case, this reference is dropped by kaio_rw_kreq_complete(),
which calls ploop_complete_io_request() too.

> 
> How do you think?
> 
> > }
> > 
> >  out:
> > --
> > 1.8.3.1
> > 
> > 
> > ___
> > Devel mailing list
> > Devel@openvz.org
> > https://lists.openvz.org/mailman/listinfo/devel
> > 
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] SPAM [PATCH 3/4] ploop: allow to set discard granularity and alignment attributes

2018-02-27 Thread Andrei Vagin

On Fri, Feb 16, 2018 at 11:11:54AM +0300, Konstantin Khorenko wrote:
> On 02/15/2018 04:36 AM, Andrei Vagin wrote:
> > 
> > Different backing stores can have different values for these parameters.
> > 
> > Signed-off-by: Andrei Vagin <ava...@openvz.org>
> > ---
> >  drivers/block/ploop/sysfs.c | 48 
> > +
> >  1 file changed, 48 insertions(+)
> > 
> > diff --git a/drivers/block/ploop/sysfs.c b/drivers/block/ploop/sysfs.c
> > index acd18ff..835558a 100644
> > --- a/drivers/block/ploop/sysfs.c
> > +++ b/drivers/block/ploop/sysfs.c
> > @@ -362,6 +362,51 @@ static int store_aborted(struct ploop_device * plo, 
> > u32 val)
> > return 0;
> >  }
> > 
> > +static u32 show_discard_granularity(struct ploop_device * plo)
> > +{
> > +   return plo->queue->limits.discard_granularity;
> > +}
> > +
> > +static int store_discard_granularity(struct ploop_device * plo, u32 val)
> > +{
> > +   int q = ilog2(val);
> > +
> > +   /* look at kaio_fill_zero_submit */
> > +   if (1 << q != val || val > PAGE_SIZE || val < 512)
> > +   return -EINVAL;
> > +
> > +   plo->queue->limits.discard_granularity = val;
> > +   return 0;
> > +}
> > +
> > +static u32 show_discard_alignment(struct ploop_device * plo)
> > +{
> > +   return plo->queue->limits.discard_alignment;
> > +}
> > +
> > +static int store_discard_alignment(struct ploop_device * plo, u32 val)
> > +{
> > +   int q = ilog2(val);
> > +
> > +   /* look at kaio_fill_zero_submit */
> > +   if (1 << q != val || val > PAGE_SIZE || val < 512)
> > +   return -EINVAL;
> > +
> > +   plo->queue->limits.discard_alignment = val;
> > +   return 0;
> > +}
> > +
> > +static u32 show_discard_zeroes_data(struct ploop_device * plo)
> > +{
> > +   return plo->queue->limits.discard_zeroes_data;
> > +}
> > +
> > +static int store_discard_zeroes_data(struct ploop_device * plo, u32 val)
> > +{
> > +   plo->queue->limits.discard_zeroes_data = !!val;
> > +   return 0;
> > +}
> > +
> >  static u32 show_top(struct ploop_device * plo)
> >  {
> > int top = -1;
> > @@ -550,6 +595,9 @@ static struct attribute *state_attributes[] = {
> > _A(blockable_reqs),
> > _A(blocked_bios),
> > _A(freeze_state),
> > +   _A2(discard_granularity),
> > +   _A2(discard_alignment),
> > +   _A2(discard_zeroes_data),
> > NULL
> >  };
> 
> BTW, why these new parameters are state_attributes (others are readonly, 
> except for "aborted"),
> but not tune_attributes like other rw parameters?

It was done by mistake. I have sent a fix. Thank you!
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH] ploop: move discard_* attributes in the tune_attributes group

2018-02-27 Thread Andrei Vagin

state_attributes contains only readonly attrebutes,
all read-write attributes are in tune_attributes.

Reported-by: Konstantin Khorenko 
---
 drivers/block/ploop/sysfs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/block/ploop/sysfs.c b/drivers/block/ploop/sysfs.c
index 835558a..4899837 100644
--- a/drivers/block/ploop/sysfs.c
+++ b/drivers/block/ploop/sysfs.c
@@ -595,9 +595,6 @@ static struct attribute *state_attributes[] = {
_A(blockable_reqs),
_A(blocked_bios),
_A(freeze_state),
-   _A2(discard_granularity),
-   _A2(discard_alignment),
-   _A2(discard_zeroes_data),
NULL
 };
 
@@ -619,6 +616,9 @@ static struct attribute *tune_attributes[] = {
_A2(congestion_low_watermark),
_A2(max_active_requests),
_A2(push_backup_timeout),
+   _A2(discard_granularity),
+   _A2(discard_alignment),
+   _A2(discard_zeroes_data),
NULL
 };
 
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH vz7] net/vhost: Replace kmalloc with kvmalloc for order>=3

2018-02-27 Thread Andrei Vagin

On Tue, Feb 27, 2018 at 11:27:31AM +0300, Andrey Ryabinin wrote:
> 
> 
> On 02/27/2018 01:27 AM, Andrei Vagin wrote:
> > On Mon, Feb 26, 2018 at 03:29:51PM +0300, Oleg Babin wrote:
> >> Currently we allocate more than eight pages of memory in
> >> vhost_net_set_ubuf_info() function and we do not need
> >> them to be physically contiguous, so it is feasible to
> >> replace a call to kmalloc() with a call to kvmalloc().
> > 
> > I see only 6 pages
> > 
> 
> Round up these 6 pages to the nearest power of two and you'l get 2^3 = 8 pages

It isn't "more than eight pages of memory"

> 
> > UIO_MAXIOV = 1024
> > 
> > struct ubuf_info {
> > void (*callback)(struct ubuf_info *, bool zerocopy_success);
> > void *ctx;
> > unsigned long desc;
> > };
> > 
> > sizeof(struct ubuf_info) = 24
> > 
> > 1024 * 24 / 4096
> > 6
> > 
> 
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 2/2] target: set ALUA_TG_PT_STANDBY if a backing store is in standby mode

2018-02-26 Thread Andrei Vagin

[This sender failed our fraud detection checks and may not be who they appear 
to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing]

HA can decide to switch the current target into standby mode and switch
another target into active mode. In this case, an image lease will be
granted to the new target, and the current target has to complete all
commands and set the ASCQ_04H_ALUA_TG_PT_STANDBY bit in there status.

https://pmc.acronis.com/browse/VSTOR-7879

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_alua.c  |  5 +
 drivers/target/target_core_alua.h  |  1 +
 drivers/target/target_core_iblock.c| 10 --
 drivers/target/target_core_transport.c |  8 ++--
 include/target/target_core_base.h  |  1 +
 5 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_alua.c 
b/drivers/target/target_core_alua.c
index f1c1733..827de6f 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -473,6 +473,11 @@ static inline void set_ascq(struct se_cmd *cmd, u8 
alua_ascq)
cmd->scsi_ascq = alua_ascq;
 }

+void core_alua_set_ascq(struct se_cmd *cmd, u8 alua_ascq)
+{
+   set_ascq(cmd, alua_ascq);
+}
+
 static inline void core_alua_state_nonoptimized(
struct se_cmd *cmd,
unsigned char *cdb,
diff --git a/drivers/target/target_core_alua.h 
b/drivers/target/target_core_alua.h
index df1cf49..5f79d4b 100644
--- a/drivers/target/target_core_alua.h
+++ b/drivers/target/target_core_alua.h
@@ -161,5 +161,6 @@ extern ssize_t 
core_alua_store_secondary_write_metadata(struct se_lun *,
const char *, size_t);
 extern int core_setup_alua(struct se_device *);
 extern sense_reason_t target_alua_state_check(struct se_cmd *cmd);
+extern void core_alua_set_ascq(struct se_cmd *cmd, u8 alua_ascq);

 #endif /* TARGET_CORE_ALUA_H */
diff --git a/drivers/target/target_core_iblock.c 
b/drivers/target/target_core_iblock.c
index b451916..c68e255 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -297,9 +297,15 @@ static void iblock_complete_cmd(struct se_cmd *cmd)
if (!atomic_dec_and_test(>pending))
return;

-   if (atomic_read(>ib_bio_err_cnt))
+   if (atomic_read(>ib_bio_err_cnt)) {
+   struct iblock_dev *ib_dev = IBLOCK_DEV(cmd->se_dev);
+   struct request_queue *q = bdev_get_queue(ib_dev->ibd_bd);
+
+   if (blk_queue_standby(q))
+   cmd->transport_state |= CMD_T_STANDBY;
+
status = SAM_STAT_CHECK_CONDITION;
-   else
+   } else
status = SAM_STAT_GOOD;

target_complete_cmd(cmd, status);
diff --git a/drivers/target/target_core_transport.c 
b/drivers/target/target_core_transport.c
index 1682d60..5d2487b 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -629,6 +629,7 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int 
remove)
 static void target_complete_failure_work(struct work_struct *work)
 {
struct se_cmd *cmd = container_of(work, struct se_cmd, work);
+   sense_reason_t sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;

switch (cmd->data_direction) {
case DMA_FROM_DEVICE:
@@ -643,8 +644,11 @@ static void target_complete_failure_work(struct 
work_struct *work)
break;
}

-   transport_generic_request_failure(cmd,
-   TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
+   if (cmd->transport_state & CMD_T_STANDBY) {
+   core_alua_set_ascq(cmd, ASCQ_04H_ALUA_TG_PT_STANDBY);
+   sense_reason = TCM_CHECK_CONDITION_NOT_READY;
+   }
+   transport_generic_request_failure(cmd, sense_reason);
 }

 /*
diff --git a/include/target/target_core_base.h 
b/include/target/target_core_base.h
index 196d4df..2889350 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -538,6 +538,7 @@ struct se_cmd {
 #define CMD_T_DEV_ACTIVE   (1 << 7)
 #define CMD_T_REQUEST_STOP (1 << 8)
 #define CMD_T_BUSY (1 << 9)
+#define CMD_T_STANDBY  (1 << 31)
spinlock_t  t_state_lock;
struct completion   t_transport_stop_comp;

--
1.8.3.1


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 1/2] ploop: add a standby mode

2018-02-26 Thread Andrei Vagin

[This sender failed our fraud detection checks and may not be who they appear 
to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing]

This mode shows that a delta lease was stolen and it is impossible to
handle any requests.

We want to know about this situation from the iscsi target. When HA
decides that the current target is broken, it can initialize another
target with the same delta. In this case, the first target has to complete
all in-porgress commands and set the ASCQ_04H_ALUA_TG_PT_STANDBY bit in
their status.

In Linux, bio-s are always completed with EIO in error cases, so we need
another way how to determine this state. This patch addes a new block
queue flag QUEUE_FLAG_STANDBY.

https://pmc.acronis.com/browse/VSTOR-7879
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/block/ploop/dev.c | 11 +++
 drivers/block/ploop/io_kaio.c | 20 +++-
 include/linux/blkdev.h|  3 +++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 6c0b3c0..cf257a2 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -854,6 +854,11 @@ static void ploop_make_request(struct request_queue *q, 
struct bio *bio)
part_stat_add(cpu, part, sectors[rw], bio_sectors(bio));
part_stat_unlock();

+   if (blk_queue_standby(plo->queue)) {
+   BIO_ENDIO(q, bio, -EIO);
+   return;
+   }
+
if (unlikely(bio->bi_size == 0)) {
/* Is it possible? This makes sense if the request is
 * marked as FLUSH, otherwise just warn and complete. */
@@ -3325,6 +3330,11 @@ static int ploop_replace_delta(struct ploop_device * 
plo, unsigned long arg)
ploop_quiesce(plo);
ploop_map_destroy(>map);
list_replace_init(_delta->list, >list);
+
+   spin_lock_irq(plo->queue->queue_lock);
+   queue_flag_clear(QUEUE_FLAG_STANDBY, plo->queue);
+   spin_unlock_irq(plo->queue->queue_lock);
+
ploop_relax(plo);

old_delta->ops->stop(old_delta);
@@ -3988,6 +3998,7 @@ static int ploop_start(struct ploop_device * plo, struct 
block_device *bdev)

blk_queue_max_discard_sectors(plo->queue, INT_MAX);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, plo->queue);
+   queue_flag_clear_unlocked(QUEUE_FLAG_STANDBY, plo->queue);

set_capacity(plo->disk, plo->bd_size);
bd_set_size(bdev, (loff_t)plo->bd_size << 9);
diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
index 42ba13b..c360f2d 100644
--- a/drivers/block/ploop/io_kaio.c
+++ b/drivers/block/ploop/io_kaio.c
@@ -118,15 +118,33 @@ static void kaio_rw_aio_complete(u64 data, long res)

if (unlikely(res < 0)) {
struct bio *b = preq->aux_bio;
+
printk("kaio_rw_aio_complete: kaio failed with err=%ld "
   "(rw=%s; state=%ld/0x%lx; clu=%d; iblk=%d; aux=%ld)\n",
   res, (preq->req_rw & REQ_WRITE) ? "WRITE" : "READ",
   preq->eng_state, preq->state, preq->req_cluster,
   preq->iblock, b ? b->bi_sector : -1);
+
bio_list_for_each(b, >bl)
printk(" bio=%p: bi_sector=%ld bi_size=%d\n",
   b, b->bi_sector, b->bi_size);
-   PLOOP_REQ_SET_ERROR(preq, res);
+
+   if (res == -EBUSY) { /* a delta lease was stolen */
+   struct request_queue *q = preq->plo->queue;
+   int prev;
+
+   spin_lock_irq(q->queue_lock);
+   prev = queue_flag_test_and_set(QUEUE_FLAG_STANDBY, q);
+   spin_unlock_irq(q->queue_lock);
+
+   if (!prev)
+   printk("ploop%d was switched into "
+   "the standby mode\n", preq->plo->index);
+
+   ploop_req_set_error(preq, res);
+   } else {
+   PLOOP_REQ_SET_ERROR(preq, res);
+   }
}

kaio_complete_io_request(preq);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 24cb38b..7e92275 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -571,6 +571,8 @@ struct request_queue {
 #define QUEUE_FLAG_SG_GAPS 23  /* queue doesn't support SG gaps */
 #define QUEUE_FLAG_DAX 24  /* device supports DAX */

+#define QUEUE_FLAG_STANDBY 31  /* unable to handle read/write requests 
*/
+
 #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) |\
 (1 << QUEUE_FLAG_STACKABLE)|   \
 (1 << QUEUE_FLAG

[Devel] [PATCH 3/3] target: Fix LUN_RESET active I/O handling for ACK_KREF

2018-02-26 Thread Andrei Vagin

[This sender failed our fraud detection checks and may not be who they appear 
to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing]

From: Nicholas Bellinger 

Here is a backport of the mainline commit:
ML: febe562c20dfa8f33bee7d419c6b517986a5aa33

https://pmc.acronis.com/browse/VSTOR-7973

This patch fixes a NULL pointer se_cmd->cmd_kref < 0
refcount bug during TMR LUN_RESET with active se_cmd
I/O, that can be triggered during se_cmd descriptor
shutdown + release via core_tmr_drain_state_list() code.

To address this bug, add common __target_check_io_state()
helper for ABORT_TASK + LUN_RESET w/ CMD_T_COMPLETE
checking, and set CMD_T_ABORTED + obtain ->cmd_kref for
both cases ahead of last target_put_sess_cmd() after
TFO->aborted_task() -> transport_cmd_finish_abort()
callback has completed.

It also introduces SCF_ACK_KREF to determine when
transport_cmd_finish_abort() needs to drop the second
extra reference, ahead of calling target_put_sess_cmd()
for the final kref_put(_cmd->cmd_kref).

It also updates transport_cmd_check_stop() to avoid
holding se_cmd->t_state_lock while dropping se_cmd
device state via target_remove_from_state_list(), now
that core_tmr_drain_state_list() is holding the
se_device lock while checking se_cmd state from
within TMR logic.

Finally, move transport_put_cmd() release of SGL +
TMR + extended CDB memory into target_free_cmd_mem()
in order to avoid potential resource leaks in TMR
ABORT_TASK + LUN_RESET code-paths.  Also update
target_release_cmd_kref() accordingly.

Reviewed-by: Quinn Tran 
Cc: Himanshu Madhani 
Cc: Sagi Grimberg 
Cc: Christoph Hellwig 
Cc: Hannes Reinecke 
Cc: Andy Grover 
Cc: Mike Christie 
Cc: sta...@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger 
---
 drivers/target/target_core_tmr.c   | 70 +++---
 drivers/target/target_core_transport.c | 67 ++--
 2 files changed, 77 insertions(+), 60 deletions(-)

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index ae5c3e5..4fe985a 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -110,6 +110,36 @@ static int target_check_cdb_and_preempt(struct list_head 
*list,
return 1;
 }

+static bool __target_check_io_state(struct se_cmd *se_cmd)
+{
+   struct se_session *sess = se_cmd->se_sess;
+
+   assert_spin_locked(>sess_cmd_lock);
+   WARN_ON_ONCE(!irqs_disabled());
+   /*
+* If command already reached CMD_T_COMPLETE state within
+* target_complete_cmd(), this se_cmd has been passed to
+* fabric driver and will not be aborted.
+*
+* Otherwise, obtain a local se_cmd->cmd_kref now for TMR
+* ABORT_TASK + LUN_RESET for CMD_T_ABORTED processing as
+* long as se_cmd->cmd_kref is still active unless zero.
+*/
+   spin_lock(_cmd->t_state_lock);
+   if (se_cmd->transport_state & CMD_T_COMPLETE) {
+   int ref_tag = se_cmd->se_tfo->get_task_tag(se_cmd);
+
+   pr_debug("Attempted to abort io tag: %u already complete,"
+   " skipping\n", ref_tag);
+   spin_unlock(_cmd->t_state_lock);
+   return false;
+   }
+   se_cmd->transport_state |= CMD_T_ABORTED;
+   spin_unlock(_cmd->t_state_lock);
+
+   return kref_get_unless_zero(_cmd->cmd_kref);
+}
+
 void core_tmr_abort_task(
struct se_device *dev,
struct se_tmr_req *tmr,
@@ -133,37 +163,26 @@ void core_tmr_abort_task(
if (tmr->ref_task_tag != ref_tag)
continue;

-   if (!kref_get_unless_zero(_cmd->cmd_kref))
-   continue;
-
printk("ABORT_TASK: Found referenced %s task_tag: %u\n",
se_cmd->se_tfo->get_fabric_name(), ref_tag);

-   spin_lock(_cmd->t_state_lock);
-   if (se_cmd->transport_state & CMD_T_COMPLETE) {
-   printk("ABORT_TASK: ref_tag: %u already complete, 
skipping\n", ref_tag);
-   spin_unlock(_cmd->t_state_lock);
+   if (!__target_check_io_state(se_cmd)) {
spin_unlock_irqrestore(_sess->sess_cmd_lock, flags);
-
target_put_sess_cmd(se_cmd);
-
goto out;
}
-   se_cmd->transport_state |= CMD_T_ABORTED;
-   spin_unlock(_cmd->t_state_lock);
-
list_del_init(_cmd->se_cmd_list);
spin_unlock_irqrestore(_sess->sess_cmd_lock, flags);

cancel_work_sync(_cmd->work);
transport_wait_for_tasks(se_cmd);

-   target_put_sess_cmd(se_cmd);

if (se_cmd->se_cmd_flags & SCF_SE_LUN_CMD)

[Devel] [PATCH 1/3] target: Invoke release_cmd() callback without holding a spinlock

2018-02-26 Thread Andrei Vagin

[This sender failed our fraud detection checks and may not be who they appear 
to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing]

From: Bart Van Assche 

Here is a backport of the mainline commit:
ML: 9ff9d15eddd13ecdd41876c5e1f31ddbb127101c

This patch fixes the following kernel warning because it avoids that
IRQs are disabled while ft_release_cmd() is invoked (fc_seq_set_resp()
invokes spin_unlock_bh()):

WARNING: CPU: 3 PID: 117 at kernel/softirq.c:150 
__local_bh_enable_ip+0xaa/0x110()
Call Trace:
 [] dump_stack+0x4f/0x7b
 [] warn_slowpath_common+0x8a/0xc0
 [] warn_slowpath_null+0x1a/0x20
 [] __local_bh_enable_ip+0xaa/0x110
 [] _raw_spin_unlock_bh+0x39/0x40
 [] fc_seq_set_resp+0xe4/0x100 [libfc]
 [] ft_free_cmd+0x4a/0x90 [tcm_fc]
 [] ft_release_cmd+0x12/0x20 [tcm_fc]
 [] target_release_cmd_kref+0x56/0x90 [target_core_mod]
 [] target_put_sess_cmd+0xc0/0x110 [target_core_mod]
 [] transport_release_cmd+0x41/0x70 [target_core_mod]
 [] transport_generic_free_cmd+0x35/0x420 [target_core_mod]

Signed-off-by: Bart Van Assche 
Acked-by: Joern Engel 
Reviewed-by: Andy Grover 
Cc: Christoph Hellwig 
Cc: Hannes Reinecke 
Cc: Sagi Grimberg 
Signed-off-by: Nicholas Bellinger 
---
 drivers/target/target_core_tmr.c   |  7 ++-
 drivers/target/target_core_transport.c | 11 ++-
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 7723d11..ae5c3e5 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -133,6 +133,9 @@ void core_tmr_abort_task(
if (tmr->ref_task_tag != ref_tag)
continue;

+   if (!kref_get_unless_zero(_cmd->cmd_kref))
+   continue;
+
printk("ABORT_TASK: Found referenced %s task_tag: %u\n",
se_cmd->se_tfo->get_fabric_name(), ref_tag);

@@ -141,13 +144,15 @@ void core_tmr_abort_task(
printk("ABORT_TASK: ref_tag: %u already complete, 
skipping\n", ref_tag);
spin_unlock(_cmd->t_state_lock);
spin_unlock_irqrestore(_sess->sess_cmd_lock, flags);
+
+   target_put_sess_cmd(se_cmd);
+
goto out;
}
se_cmd->transport_state |= CMD_T_ABORTED;
spin_unlock(_cmd->t_state_lock);

list_del_init(_cmd->se_cmd_list);
-   kref_get(_cmd->cmd_kref);
spin_unlock_irqrestore(_sess->sess_cmd_lock, flags);

cancel_work_sync(_cmd->work);
diff --git a/drivers/target/target_core_transport.c 
b/drivers/target/target_core_transport.c
index b27c17b..b0aa9ac 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2494,19 +2494,21 @@ static void target_release_cmd_kref(struct kref *kref)
 {
struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref);
struct se_session *se_sess = se_cmd->se_sess;
+   unsigned long flags;

+   spin_lock_irqsave(_sess->sess_cmd_lock, flags);
if (list_empty(_cmd->se_cmd_list)) {
-   spin_unlock(_sess->sess_cmd_lock);
+   spin_unlock_irqrestore(_sess->sess_cmd_lock, flags);
se_cmd->se_tfo->release_cmd(se_cmd);
return;
}
if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) {
-   spin_unlock(_sess->sess_cmd_lock);
+   spin_unlock_irqrestore(_sess->sess_cmd_lock, flags);
complete(_cmd->cmd_wait_comp);
return;
}
list_del(_cmd->se_cmd_list);
-   spin_unlock(_sess->sess_cmd_lock);
+   spin_unlock_irqrestore(_sess->sess_cmd_lock, flags);

se_cmd->se_tfo->release_cmd(se_cmd);
 }
@@ -2522,8 +2524,7 @@ int target_put_sess_cmd(struct se_cmd *se_cmd)
se_cmd->se_tfo->release_cmd(se_cmd);
return 1;
}
-   return kref_put_spinlock_irqsave(_cmd->cmd_kref, 
target_release_cmd_kref,
-   _sess->sess_cmd_lock);
+   return kref_put(_cmd->cmd_kref, target_release_cmd_kref);
 }
 EXPORT_SYMBOL(target_put_sess_cmd);

--
1.8.3.1


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 2/3] target: remove the unused SCF_CMD_XCOPY_PASSTHROUGH flag

2018-02-26 Thread Andrei Vagin

[This sender failed our fraud detection checks and may not be who they appear 
to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing]

From: Christoph Hellwig 

Here is a backport of the mainline commit:
ML: c3d0a7c21db219ef87679c2a667aba9f138524db

Signed-off-by: Christoph Hellwig 
Signed-off-by: Nicholas Bellinger 
---
 drivers/target/target_core_xcopy.c | 4 ++--
 include/target/target_core_base.h  | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c 
b/drivers/target/target_core_xcopy.c
index 1d2421a..a7c8974 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -561,7 +561,7 @@ static int target_xcopy_init_pt_lun(
 * target_xcopy_setup_pt_port()
 */
if (!remote_port) {
-   pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | 
SCF_CMD_XCOPY_PASSTHROUGH;
+   pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
return 0;
}

@@ -569,7 +569,7 @@ static int target_xcopy_init_pt_lun(
pt_cmd->se_dev = se_dev;

pr_debug("Setup emulated se_dev: %p from se_dev\n", pt_cmd->se_dev);
-   pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH;
+   pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;

pr_debug("Setup emulated se_dev: %p to pt_cmd->se_lun->lun_se_dev\n",
pt_cmd->se_lun->lun_se_dev);
diff --git a/include/target/target_core_base.h 
b/include/target/target_core_base.h
index 7b6699c..196d4df 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -171,8 +171,7 @@ enum se_cmd_flags_table {
SCF_ACK_KREF= 0x0004,
SCF_COMPARE_AND_WRITE   = 0x0008,
SCF_COMPARE_AND_WRITE_POST  = 0x0010,
-   SCF_CMD_XCOPY_PASSTHROUGH   = 0x0020,
-   SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC = 0x0040,
+   SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC = 0x0020,
 };

 /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */
--
1.8.3.1


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH vz7] net/vhost: Replace kmalloc with kvmalloc for order>=3

2018-02-26 Thread Andrei Vagin

On Mon, Feb 26, 2018 at 03:29:51PM +0300, Oleg Babin wrote:
> Currently we allocate more than eight pages of memory in
> vhost_net_set_ubuf_info() function and we do not need
> them to be physically contiguous, so it is feasible to
> replace a call to kmalloc() with a call to kvmalloc().

I see only 6 pages

UIO_MAXIOV = 1024

struct ubuf_info {
void (*callback)(struct ubuf_info *, bool zerocopy_success);
void *ctx;
unsigned long desc;
};

sizeof(struct ubuf_info) = 24

1024 * 24 / 4096
6

> 
> https://jira.sw.ru/browse/PSBM-81803
> Signed-off-by: Oleg Babin 
> ---
>  drivers/vhost/net.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 1076a46..d397ceb 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -179,8 +179,8 @@ int vhost_net_set_ubuf_info(struct vhost_net *n)
>   zcopy = vhost_net_zcopy_mask & (0x1 << i);
>   if (!zcopy)
>   continue;
> - n->vqs[i].ubuf_info = kmalloc(sizeof(*n->vqs[i].ubuf_info) *
> -   UIO_MAXIOV, GFP_KERNEL);
> + n->vqs[i].ubuf_info = kvmalloc(sizeof(*n->vqs[i].ubuf_info) *
> +UIO_MAXIOV, GFP_KERNEL);
>   if  (!n->vqs[i].ubuf_info)
>   goto err;
>   }
> -- 
> 1.8.3.1
> 
> ___
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH v2] ploop: Discard bios of size not aligned at 512 bytes

2018-02-21 Thread Andrei Vagin

On Wed, Feb 21, 2018 at 09:45:17PM +0300, Kirill Tkhai wrote:
> Currently, we have BUG_ON() on this place. But it's
> easy to generate 511 bytes request from userspace,
> as it was found by Andrey Vagin via iscsi tests:
> 
> sg_raw -r511 /dev/sda 28 0 0 0 0 0 0 0 9 0
> 
> This patch replaces BUG_ON() with bio discarding
> like other drivers do when they see a size above
> their logical block size.
> 
> https://jira.sw.ru/browse/PSBM-81576
>

Acked-by: Andrei Vagin <ava...@virtuozzo.com>

> Reported-by: Andrey Vagin <ava...@virtuozzo.com>
> Signed-off-by: Kirill Tkhai <ktk...@virtuozzo.com>
> ---
>  drivers/block/ploop/dev.c |6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 6c0b3c0252c0..82d986922b4b 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -846,7 +846,11 @@ static void ploop_make_request(struct request_queue *q, 
> struct bio *bio)
>   plo->st.bio_in++;
>  
>   BUG_ON(bio->bi_idx);
> - BUG_ON(bio->bi_size & 511);
> + if (bio->bi_size & 511) {
> + pr_err_once("ploop%d: dropped bio: bi_size=%u\n", plo->index, 
> bio->bi_size);
> + BIO_ENDIO(q, bio, -EIO);
> + return;
> + }
>  
>   cpu = part_stat_lock();
>   part = disk_map_sector_rcu(plo->disk, bio->bi_sector);
> 
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH] ploop: Discard bios of size not aligned at 512 bytes

2018-02-21 Thread Andrei Vagin

On Wed, Feb 21, 2018 at 02:41:47PM +0300, Kirill Tkhai wrote:
> Currently, we have BUG_ON() on this place. But it's
> easy to generate 511 bytes request from userspace,
> as it was found by Andrey Vagin via iscsi tests:
> 
> sg_raw -r511 /dev/sda 28 0 0 0 0 0 0 0 9 0
> 
> This patch replaces BUG_ON() with bio discarding
> like other drivers do when they see a size above
> their logical block size.
> 
> https://jira.sw.ru/browse/PSBM-81576
> 
> Reported-by: Andrey Vagin 
> Signed-off-by: Kirill Tkhai 
> ---
>  drivers/block/ploop/dev.c |6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
> index 6c0b3c0252c0..7f079aea0e88 100644
> --- a/drivers/block/ploop/dev.c
> +++ b/drivers/block/ploop/dev.c
> @@ -846,7 +846,11 @@ static void ploop_make_request(struct request_queue *q, 
> struct bio *bio)
>   plo->st.bio_in++;
>  
>   BUG_ON(bio->bi_idx);
> - BUG_ON(bio->bi_size & 511);
> + if (bio->bi_size & 511) {
> + pr_err_once("ploop: dropped bio: bi_size=%u\n", bio->bi_size);

I think it would be better if we will know which ploop produces this
message
pr_err_once("ploop%d: dropped bio: bi_size=%u\n", dev->index, 
bio->bi_size);

> + BIO_ENDIO(q, bio, -EIO);
> + return;
> + }
>  
>   cpu = part_stat_lock();
>   part = disk_map_sector_rcu(plo->disk, bio->bi_sector);
> 
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 3/4] ploop: allow to set discard granularity and alignment attributes

2018-02-15 Thread Andrei Vagin

[This sender failed our fraud detection checks and may not be who they appear 
to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing]

Different backing stores can have different values for these parameters.

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/block/ploop/sysfs.c | 48 +
 1 file changed, 48 insertions(+)

diff --git a/drivers/block/ploop/sysfs.c b/drivers/block/ploop/sysfs.c
index acd18ff..835558a 100644
--- a/drivers/block/ploop/sysfs.c
+++ b/drivers/block/ploop/sysfs.c
@@ -362,6 +362,51 @@ static int store_aborted(struct ploop_device * plo, u32 
val)
return 0;
 }

+static u32 show_discard_granularity(struct ploop_device * plo)
+{
+   return plo->queue->limits.discard_granularity;
+}
+
+static int store_discard_granularity(struct ploop_device * plo, u32 val)
+{
+   int q = ilog2(val);
+
+   /* look at kaio_fill_zero_submit */
+   if (1 << q != val || val > PAGE_SIZE || val < 512)
+   return -EINVAL;
+
+   plo->queue->limits.discard_granularity = val;
+   return 0;
+}
+
+static u32 show_discard_alignment(struct ploop_device * plo)
+{
+   return plo->queue->limits.discard_alignment;
+}
+
+static int store_discard_alignment(struct ploop_device * plo, u32 val)
+{
+   int q = ilog2(val);
+
+   /* look at kaio_fill_zero_submit */
+   if (1 << q != val || val > PAGE_SIZE || val < 512)
+   return -EINVAL;
+
+   plo->queue->limits.discard_alignment = val;
+   return 0;
+}
+
+static u32 show_discard_zeroes_data(struct ploop_device * plo)
+{
+   return plo->queue->limits.discard_zeroes_data;
+}
+
+static int store_discard_zeroes_data(struct ploop_device * plo, u32 val)
+{
+   plo->queue->limits.discard_zeroes_data = !!val;
+   return 0;
+}
+
 static u32 show_top(struct ploop_device * plo)
 {
int top = -1;
@@ -550,6 +595,9 @@ static struct attribute *state_attributes[] = {
_A(blockable_reqs),
_A(blocked_bios),
_A(freeze_state),
+   _A2(discard_granularity),
+   _A2(discard_alignment),
+   _A2(discard_zeroes_data),
NULL
 };

--
1.8.3.1


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH rh7] target/sbc: Add LBPRZ attribute + control CDB emulation

2018-02-14 Thread Andrei Vagin

[This sender failed our fraud detection checks and may not be who they appear 
to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing]

From: Jamie Pocas <jamie.po...@emc.com>

This patch is back-ported from the upstream kernel:
ML e6f41633cb79b55ead84b023c02035322c7827e7

We need it to support the WRITE_SAME UNMAP command.

This change sets the LBPRZ flag in EVPD page b2h and READ CAPACITY (16)
based on a new unmap_zeroes_data device attribute. This flag is set
automatically for iblock based on underlying block device queue's
discard_zeroes_data flag.

Signed-off-by: Jamie Pocas <jamie.po...@emc.com>
Signed-off-by: Nicholas Bellinger <n...@linux-iscsi.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_device.c   | 34 +++
 drivers/target/target_core_file.c |  1 +
 drivers/target/target_core_iblock.c   |  1 +
 drivers/target/target_core_rd.c   |  1 +
 drivers/target/target_core_sbc.c  | 11 -
 drivers/target/target_core_spc.c  | 12 ++
 include/target/target_core_backend.h  |  1 +
 include/target/target_core_backend_configfs.h |  2 ++
 include/target/target_core_base.h |  3 +++
 9 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_device.c 
b/drivers/target/target_core_device.c
index c928bd1..e561823 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -708,6 +708,38 @@ int se_dev_set_unmap_granularity_alignment(
 }
 EXPORT_SYMBOL(se_dev_set_unmap_granularity_alignment);

+int se_dev_set_unmap_zeroes_data(
+   struct se_device *dev,
+   u32 flag)
+{
+   struct se_dev_attrib *da = >dev_attrib;
+
+   if (flag > 1)
+   return -EINVAL;
+
+   if (da->da_dev->export_count) {
+   pr_err("dev[%p]: Unable to change SE Device"
+  " unmap_zeroes_data while export_count is %d\n",
+  da->da_dev, da->da_dev->export_count);
+   return -EINVAL;
+   }
+   /*
+* We expect this value to be non-zero when generic Block Layer
+* Discard supported is detected iblock_configure_device().
+*/
+   if (flag && !da->max_unmap_block_desc_count) {
+   pr_err("dev[%p]: Thin Provisioning LBPRZ will not be set"
+  " because max_unmap_block_desc_count is zero\n",
+  da->da_dev);
+  return -ENOSYS;
+   }
+   da->unmap_zeroes_data = flag;
+   pr_debug("dev[%p]: SE Device Thin Provisioning LBPRZ bit: %d\n",
+da->da_dev, flag);
+   return 0;
+}
+EXPORT_SYMBOL(se_dev_set_unmap_zeroes_data);
+
 int se_dev_set_max_write_same_len(
struct se_device *dev,
u32 max_write_same_len)
@@ -1534,6 +1566,8 @@ struct se_device *target_alloc_device(struct se_hba *hba, 
const char *name)
dev->dev_attrib.unmap_granularity = DA_UNMAP_GRANULARITY_DEFAULT;
dev->dev_attrib.unmap_granularity_alignment =
DA_UNMAP_GRANULARITY_ALIGNMENT_DEFAULT;
+   dev->dev_attrib.unmap_zeroes_data =
+   DA_UNMAP_ZEROES_DATA_DEFAULT;
dev->dev_attrib.max_write_same_len = DA_MAX_WRITE_SAME_LEN;

xcopy_lun = >xcopy_lun;
diff --git a/drivers/target/target_core_file.c 
b/drivers/target/target_core_file.c
index 350a3b6..de165f5 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -1008,6 +1008,7 @@ static struct configfs_attribute 
*fileio_backend_dev_attrs[] = {
_dev_attrib_max_unmap_block_desc_count.attr,
_dev_attrib_unmap_granularity.attr,
_dev_attrib_unmap_granularity_alignment.attr,
+   _dev_attrib_unmap_zeroes_data.attr,
_dev_attrib_max_write_same_len.attr,
NULL,
 };
diff --git a/drivers/target/target_core_iblock.c 
b/drivers/target/target_core_iblock.c
index 60ff434..b451916 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -895,6 +895,7 @@ static struct configfs_attribute 
*iblock_backend_dev_attrs[] = {
_dev_attrib_max_unmap_block_desc_count.attr,
_dev_attrib_unmap_granularity.attr,
_dev_attrib_unmap_granularity_alignment.attr,
+   _dev_attrib_unmap_zeroes_data.attr,
_dev_attrib_max_write_same_len.attr,
NULL,
 };
diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index d27d588..fa35bcf 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -664,6 +664,7 @@ static struct configfs_attribute 
*rd_mcp_backend_dev_attrs[] = {
_mcp_dev_attrib_max_unmap_block_desc_count.attr,
_mcp_dev_attrib_unmap_granularity.attr,
_mcp_dev_attrib_unmap_granula

[Devel] [PATCH 1/4] fuse: add a new async operation to unmap regions

2018-02-14 Thread Andrei Vagin

[This sender failed our fraud detection checks and may not be who they appear 
to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing]

The fuse interface allows to run any operation asynchronously, because
the kernel redirect all operations to an user daemon and then waits an
answer.

In ploop, we want to handle discard requests via fallocate and
a simplest way to do this is to run fallocate(FALLOC_FL_PUNCH_HOLE)
asynchronously like the write command.

This patch adds a new async command IOCB_CMD_UNMAP_ITER, which sends
fallocate(FALLOC_FL_PUNCH_HOLE) to a fuse user daemon.

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 fs/aio.c |  1 +
 fs/fuse/file.c   | 65 ++--
 fs/fuse/fuse_i.h |  3 ++
 include/uapi/linux/aio_abi.h |  1 +
 4 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 3a6a9b0..cdc7558 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1492,6 +1492,7 @@ rw_common:
ret = aio_read_iter(req);
break;

+   case IOCB_CMD_UNMAP_ITER:
case IOCB_CMD_WRITE_ITER:
ret = aio_write_iter(req);
break;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 877c41f..cf268f9 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -920,6 +920,19 @@ static void fuse_aio_complete_req(struct fuse_conn *fc, 
struct fuse_req *req)
if (!req->bvec)
fuse_release_user_pages(req, !io->write);

+   if (req->in.h.opcode == FUSE_FALLOCATE) {
+   if (req->out.h.error)
+   printk("fuse_aio_complete_req: request (fallocate 
fh=0x%llx "
+  "offset=%lld length=%lld mode=%x) completed with 
err=%d\n",
+  req->misc.fallocate.in.fh,
+  req->misc.fallocate.in.offset,
+  req->misc.fallocate.in.length,
+  req->misc.fallocate.in.mode,
+  req->out.h.error);
+   fuse_aio_complete(io, req->out.h.error, -1);
+   return;
+   }
+
if (io->write) {
if (req->misc.write.in.size != req->misc.write.out.size)
pos = req->misc.write.in.offset - io->offset +
@@ -1322,6 +1335,35 @@ static void fuse_write_fill(struct fuse_req *req, struct 
fuse_file *ff,
req->out.args[0].value = outarg;
 }

+static size_t fuse_send_unmap(struct fuse_req *req, struct fuse_io_priv *io,
+ loff_t pos, size_t count, fl_owner_t owner)
+{
+   struct file *file = io->file;
+   struct fuse_file *ff = file->private_data;
+   struct fuse_conn *fc = ff->fc;
+   struct fuse_fallocate_in *inarg = >misc.fallocate.in;
+
+   inarg->fh = ff->fh;
+   inarg->offset = pos;
+   inarg->length = count;
+   inarg->mode = FALLOC_FL_KEEP_SIZE |
+ FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_ZERO_RANGE;
+   req->in.h.opcode = FUSE_FALLOCATE;
+   req->in.h.nodeid = ff->nodeid;
+   req->in.numargs = 1;
+   req->in.args[0].size = sizeof(struct fuse_fallocate_in);
+   req->in.args[0].value = inarg;
+
+   fuse_account_request(fc, count);
+
+   if (io->async)
+   return fuse_async_req_send(fc, req, count, io);
+
+   fuse_request_send(fc, req);
+   return count;
+}
+
 static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
  loff_t pos, size_t count, fl_owner_t owner)
 {
@@ -3455,7 +3497,7 @@ static ssize_t fuse_direct_IO_bvec(int rw, struct kiocb 
*iocb,
req->bvec = bvec;
}

-   if (filled + bvec->bv_len <= nmax) {
+   if (bvec_len && filled + bvec->bv_len <= nmax) {
filled += bvec->bv_len;
req->num_bvecs++;
bvec++;
@@ -3465,14 +3507,21 @@ static ssize_t fuse_direct_IO_bvec(int rw, struct kiocb 
*iocb,
continue;
}

-   BUG_ON(!filled);

-   if (rw == WRITE)
-   nres = fuse_send_write(req, io, pos,
-   filled, NULL);
-   else
-   nres = fuse_send_read(req, io, pos,
-   filled, NULL);
+   if (iocb->ki_opcode == IOCB_CMD_UNMAP_ITER) {
+   req->in.argbvec = 0;
+   nres = fuse_send_unmap(req, io, pos,
+   iocb->ki_nbytes, NULL);
+   filled = nres;
+   } else {
+   BUG_ON(!filled);
+

[Devel] [PATCH 2/4] ploop: handle discard requests via fallocate

2018-02-14 Thread Andrei Vagin

[This sender failed our fraud detection checks and may not be who they appear 
to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing]

Currently ploop can be compacted, but in this case discard requests are
hanled with help of a userspace tool.

This patch adds a native support for discard requests with a few
restrictions.

Currrenty this works only for raw images on a fuse file system due to
these reasons:
* Only the fuse file system allows to execute fallocate asynchroniously.
* pio_direct (ext4) requires that all blocks in image were allocated
  and initalized.
* The ploop1 format requires more changes to hanlde an index table.

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/block/ploop/dev.c| 18 ++
 drivers/block/ploop/fmt_ploop1.c |  2 ++
 drivers/block/ploop/io_direct.c  |  1 +
 drivers/block/ploop/io_kaio.c|  8 +++-
 include/linux/ploop/ploop.h  |  1 +
 5 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 12fde00..6c0b3c0 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -517,7 +517,8 @@ ploop_bio_queue(struct ploop_device * plo, struct bio * bio,
ploop_pb_check_and_clear_bit(plo->pbd, preq->req_cluster))
ploop_set_blockable(plo, preq);

-   if (unlikely(bio->bi_rw & REQ_DISCARD)) {
+   if (test_bit(PLOOP_S_DISCARD, >state) &&
+   unlikely(bio->bi_rw & REQ_DISCARD)) {
int clu_size = 1 << plo->cluster_log;
int i = (clu_size - 1) & bio->bi_sector;
int err = 0;
@@ -570,13 +571,13 @@ ploop_bio_queue(struct ploop_device * plo, struct bio * 
bio,

__TRACE("A %p %u\n", preq, preq->req_cluster);

-   if (unlikely(bio->bi_rw & REQ_DISCARD))
+   if (unlikely(preq->state & (1 << PLOOP_REQ_DISCARD)))
plo->bio_discard_qlen--;
else
plo->bio_qlen--;
ploop_entry_add(plo, preq);

-   if (bio->bi_size && !(bio->bi_rw & REQ_DISCARD))
+   if (bio->bi_size && !(preq->state & (1 << PLOOP_REQ_DISCARD)))
insert_entry_tree(plo, preq, drop_list);

trace_bio_queue(preq);
@@ -1487,7 +1488,7 @@ void ploop_complete_io_state(struct ploop_request * preq)

spin_lock_irqsave(>lock, flags);
__TRACE("C %p %u\n", preq, preq->req_cluster);
-   if (preq->error)
+   if (preq->error && !(preq->req_rw & REQ_DISCARD))
set_bit(PLOOP_S_ABORT, >state);

list_add_tail(>list, >ready_queue);
@@ -2570,6 +2571,15 @@ restart:
break;
}

+   if ((preq->req_rw & REQ_DISCARD) &&
+   !test_bit(PLOOP_REQ_DISCARD, >state) &&
+   test_bit(PLOOP_S_NO_FALLOC_DISCARD, >state)) {
+   preq->eng_state = PLOOP_E_COMPLETE;
+   preq->error = -EOPNOTSUPP;
+   ploop_complete_io_state(preq);
+   return;
+   }
+
ploop_entry_request(preq);
break;

diff --git a/drivers/block/ploop/fmt_ploop1.c b/drivers/block/ploop/fmt_ploop1.c
index 0034216..c2be627 100644
--- a/drivers/block/ploop/fmt_ploop1.c
+++ b/drivers/block/ploop/fmt_ploop1.c
@@ -189,6 +189,8 @@ ploop1_open(struct ploop_delta * delta)
((u64)ph->bd_size + ph->l1_off) << 9)
delta->flags |= PLOOP_FMT_PREALLOCATED;

+   set_bit(PLOOP_S_NO_FALLOC_DISCARD, >plo->state);
+
return 0;

 out_err:
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index d6b1118..3c57aca 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -1032,6 +1032,7 @@ dio_init(struct ploop_io * io)
init_timer(>fsync_timer);
io->fsync_timer.function = fsync_timeout;
io->fsync_timer.data = (unsigned long)io;
+   set_bit(PLOOP_S_NO_FALLOC_DISCARD, >plo->state);

return 0;
 }
diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
index ee9ba26..543f98b 100644
--- a/drivers/block/ploop/io_kaio.c
+++ b/drivers/block/ploop/io_kaio.c
@@ -167,7 +167,9 @@ static int kaio_kernel_submit(struct file *file, struct 
kaio_req *kreq,
if (!iocb)
return -ENOMEM;

-   if (rw & REQ_WRITE)
+   if (rw & REQ_DISCARD)
+   op = IOCB_CMD_UNMAP_ITER;
+   else if (rw & REQ_WRITE)
op = IOCB_CMD_WRITE_ITER;
else
op = IOCB_CMD_READ_ITER;
@@ -207,6 +209,10 @@ static size_t kaio_kreq_pack(struct kaio_req *kreq, int 
*nr_segs,

BUG_ON(b->bi_idx);

+   if (b->bi_vcnt == 0) { /* RE

[Devel] [PATCH 4/4] ploop: give aligned regions into fuse fallocate()

2018-02-14 Thread Andrei Vagin

[This sender failed our fraud detection checks and may not be who they appear 
to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing]

fuse fallocate() can have requirements about granularity and alignment
for regions. For exmple, vstorage requires that all regions have to
be aligned onto 4096.

A block device has optimal values of granularity and alignment for
discard requests, but it has to handle unaligned requests too.

Each block device has the discard_zeroes_data attribute, which say
whether a block device returns zero for discarded block or not.

If we set it to 1, unaligned parts has to be filled in with zeros,
otherwise we can ignore them.

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/block/ploop/io_kaio.c | 79 +++
 1 file changed, 79 insertions(+)

diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
index 543f98b..2e48d13 100644
--- a/drivers/block/ploop/io_kaio.c
+++ b/drivers/block/ploop/io_kaio.c
@@ -246,6 +246,79 @@ static size_t kaio_kreq_pack(struct kaio_req *kreq, int 
*nr_segs,
return copy;
 }

+static int kaio_fill_zero_submit(struct file *file,
+   struct ploop_request *preq, loff_t off, size_t size)
+{
+   struct page *zero_page = ZERO_PAGE(0);
+   int nr_segs = 1, err = -ENOMEM;
+   struct kaio_req *kreq;
+
+   BUG_ON(size > PAGE_SIZE);
+
+   if (size == 0)
+   return 0;
+
+   kreq = kaio_kreq_alloc(preq, _segs);
+   if (!kreq) {
+   PLOOP_REQ_SET_ERROR(preq, -ENOMEM);
+   return err;
+   }
+
+   kreq->bvecs[0].bv_page = zero_page;
+   kreq->bvecs[0].bv_len = size;
+   kreq->bvecs[0].bv_offset = 0;
+   atomic_inc(>io_count);
+
+   err = kaio_kernel_submit(file, kreq, 1, size, off, REQ_WRITE);
+   if (err) {
+   PLOOP_REQ_SET_ERROR(preq, err);
+   ploop_complete_io_request(preq);
+   kfree(kreq);
+   return err;
+   }
+
+   return 0;
+}
+
+static int preprocess_discard_req(struct file *file, struct ploop_request 
*preq,
+   loff_t *poff, size_t *psize)
+{
+   unsigned int alignment, granularity, zeroes_data;
+   loff_t off = *poff, off_align;
+   size_t size = *psize;
+
+   alignment   = preq->plo->queue->limits.discard_alignment;
+   granularity = preq->plo->queue->limits.discard_granularity;
+   zeroes_data = preq->plo->queue->limits.discard_zeroes_data;
+
+   if (alignment) {
+   off_align = round_up(off, alignment);
+
+   if (zeroes_data &&
+   kaio_fill_zero_submit(file, preq,
+   off, off_align - off))
+   return -1;
+
+   size -= (off_align - off);
+   off = off_align;
+   }
+
+   if (granularity) {
+   size_t size_align;
+
+   size_align = round_down(size, granularity);
+   if (zeroes_data &&
+   kaio_fill_zero_submit(file, preq,
+   off + size_align, size - size_align))
+   return -1;
+
+   size = size_align;
+   }
+
+   *poff = off;
+   *psize = size;
+   return 0;
+}
 /*
  * WRITE case:
  *
@@ -284,6 +357,11 @@ static void kaio_sbl_submit(struct file *file, struct 
ploop_request *preq,
ploop_prepare_io_request(preq);

size <<= 9;
+
+   if ((rw & REQ_DISCARD) &&
+   preprocess_discard_req(file, preq, , ))
+   goto out;
+
while (size > 0) {
struct kaio_req *kreq;
int nr_segs;
@@ -311,6 +389,7 @@ static void kaio_sbl_submit(struct file *file, struct 
ploop_request *preq,
size -= copy;
}

+out:
kaio_complete_io_request(preq);
 }

--
1.8.3.1


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH] ploop: don't forget to dec preq->io_count on a error path

2018-02-14 Thread Andrei Vagin

[This sender failed our fraud detection checks and may not be who they appear 
to be. Learn about spoofing at http://aka.ms/LearnAboutSpoofing]

preq->io_count is incrimented before calling aio_kernel_submit()
and it is decrimented in kaio_rw_aio_complete().

But if aio_kernel_submit() failed , preq->io_count has to be decrimented
before exiting from the function.
---
 drivers/block/ploop/io_kaio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
index 2e48d13..6d922f5 100644
--- a/drivers/block/ploop/io_kaio.c
+++ b/drivers/block/ploop/io_kaio.c
@@ -686,6 +686,7 @@ kaio_io_page(struct ploop_io * io, int op, struct 
ploop_request * preq,
   err, (op == IOCB_CMD_WRITE_ITER) ? "WRITE" : "READ",
   preq->eng_state, preq->state, pos);
PLOOP_REQ_SET_ERROR(preq, err);
+   ploop_complete_io_request(preq);
}

 out:
--
1.8.3.1


___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH 1/2] fuse: add a new async operation to unmap regions

2018-02-06 Thread Andrei Vagin

On Tue, Feb 06, 2018 at 11:49:30PM +0300, Konstantin Khorenko wrote:
> Andrey, this seems to be a feature and it should be tested.
> 
> Please post here a jira id with the feature description, QA task, etc.

1. Feature

Add support of discard requests via punch-holes for plain ploops
https://pmc.acronis.com/browse/VSTOR-6962

2. Description

When ploop receives a discard request, it calls fallocate() to make a
punch hole in a ploop image file. It allows to drop useless data from a
storage.

4. Testing

[root@localhost ploop]# cat test/ploop-fdiscard.sh
set -e -x

path=$1
mkdir -p $path
ploop init $path/root -s 1G -f raw --sparse -t none
out=$(ploop mount $path/DiskDescriptor.xml)
echo $out
dev=$(echo $out | sed "s/.*dev=\(\S*\).*/\1/")
echo $dev
filefrag -sv $path/root
dd if=/dev/urandom of=$dev bs=1M count=1
dd if=/dev/urandom of=$dev bs=1M count=1 seek=512
fout1="$(filefrag -sv $path/root | wc -l)"
filefrag -sv $path/root
blkdiscard -l 1M -o 512M $dev
filefrag -sv $path/root
fout2="$(filefrag -sv $path/root | wc -l)"
if [ "$fout1" -le "$fout2" ]; then
echo FAIL
exit 1
fi
blkdiscard $dev
filefrag -sv $path/root
fout3="$(filefrag -sv $path/root | wc -l)"
if [ "$fout2" -le "$fout3" ]; then
echo FAIL
exit 1
fi
ploop umount -d $dev
rm -rf $path

5. Known issues

Works only for raw images on a fuse file system (vstorage)

7. Feature owner
Andrei Vagin (avagin@)


> 
> And whom to review?

Dima, could you review this patch set?

> 
> --
> Best regards,
> 
> Konstantin Khorenko,
> Virtuozzo Linux Kernel Team
> 
> On 02/06/2018 03:25 AM, Andrei Vagin wrote:
> > The fuse interface allows to run any operation asynchronously, because
> > the kernel redirect all operations to an user daemon and then waits an
> > answer.
> > 
> > In ploop, we want to handle discard requests via fallocate and
> > a simplest way to do this is to run fallocate(FALLOC_FL_PUNCH_HOLE)
> > asynchronously like the write command.
> > 
> > This patch adds a new async command IOCB_CMD_UNMAP_ITER, which sends
> > fallocate(FALLOC_FL_PUNCH_HOLE) to a fuse user daemon.
> > 
> > Signed-off-by: Andrei Vagin <ava...@openvz.org>
> > ---
> >  fs/aio.c |  1 +
> >  fs/fuse/file.c   | 63 
> > ++--
> >  fs/fuse/fuse_i.h |  3 +++
> >  include/uapi/linux/aio_abi.h |  1 +
> >  4 files changed, 60 insertions(+), 8 deletions(-)
> > 
> > diff --git a/fs/aio.c b/fs/aio.c
> > index 3a6a9b0..cdc7558 100644
> > --- a/fs/aio.c
> > +++ b/fs/aio.c
> > @@ -1492,6 +1492,7 @@ rw_common:
> > ret = aio_read_iter(req);
> > break;
> > 
> > +   case IOCB_CMD_UNMAP_ITER:
> > case IOCB_CMD_WRITE_ITER:
> > ret = aio_write_iter(req);
> > break;
> > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > index 877c41f..83ea9da 100644
> > --- a/fs/fuse/file.c
> > +++ b/fs/fuse/file.c
> > @@ -920,6 +920,19 @@ static void fuse_aio_complete_req(struct fuse_conn 
> > *fc, struct fuse_req *req)
> > if (!req->bvec)
> > fuse_release_user_pages(req, !io->write);
> > 
> > +   if (req->in.h.opcode == FUSE_FALLOCATE) {
> > +   if (req->out.h.error)
> > +   printk("fuse_aio_complete_req: request (fallocate 
> > fh=0x%llx "
> > +  "offset=%lld length=%lld mode=%x) completed with 
> > err=%d\n",
> > +  req->misc.fallocate.in.fh,
> > +  req->misc.fallocate.in.offset,
> > +  req->misc.fallocate.in.length,
> > +  req->misc.fallocate.in.mode,
> > +  req->out.h.error);
> > +   fuse_aio_complete(io, req->out.h.error, -1);
> > +   return;
> > +   }
> > +
> > if (io->write) {
> > if (req->misc.write.in.size != req->misc.write.out.size)
> > pos = req->misc.write.in.offset - io->offset +
> > @@ -1322,6 +1335,33 @@ static void fuse_write_fill(struct fuse_req *req, 
> > struct fuse_file *ff,
> > req->out.args[0].value = outarg;
> >  }
> > 
> > +static size_t fuse_send_unmap(struct fuse_req *req, struct fuse_io_priv 
> > *io,
> > + loff_t pos, size_t count, fl_owner_t owner)
> > +{
> > +   struct file *file = io->file;
> > +   struct fuse_file *ff = file->private_

[Devel] [PATCH 2/2] ploop: handle discard requests via fallocate

2018-02-05 Thread Andrei Vagin

Currently ploop can be compacted, but in this case discard requests are
hanled with help of a userspace tool.

This patch adds a native support for discard requests with a few
restrictions.

Currrenty this works only for raw images on a fuse file system due to
these reasons:
* Only the fuse file system allows to execute fallocate asynchroniously.
* pio_direct (ext4) requires that all blocks in image were allocated
  and initalized.
* The ploop1 format requires more changes to hanlde an index table.

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/block/ploop/dev.c| 19 +++
 drivers/block/ploop/fmt_ploop1.c |  2 ++
 drivers/block/ploop/io_direct.c  |  1 +
 drivers/block/ploop/io_kaio.c|  8 +++-
 include/linux/ploop/ploop.h  |  1 +
 5 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c
index 12fde00..4519d14 100644
--- a/drivers/block/ploop/dev.c
+++ b/drivers/block/ploop/dev.c
@@ -517,7 +517,7 @@ ploop_bio_queue(struct ploop_device * plo, struct bio * bio,
ploop_pb_check_and_clear_bit(plo->pbd, preq->req_cluster))
ploop_set_blockable(plo, preq);
 
-   if (unlikely(bio->bi_rw & REQ_DISCARD)) {
+   if (plo->fbd && unlikely(bio->bi_rw & REQ_DISCARD)) {
int clu_size = 1 << plo->cluster_log;
int i = (clu_size - 1) & bio->bi_sector;
int err = 0;
@@ -570,13 +570,13 @@ ploop_bio_queue(struct ploop_device * plo, struct bio * 
bio,
 
__TRACE("A %p %u\n", preq, preq->req_cluster);
 
-   if (unlikely(bio->bi_rw & REQ_DISCARD))
+   if (unlikely(preq->state & (1 << PLOOP_REQ_DISCARD)))
plo->bio_discard_qlen--;
else
plo->bio_qlen--;
ploop_entry_add(plo, preq);
 
-   if (bio->bi_size && !(bio->bi_rw & REQ_DISCARD))
+   if (bio->bi_size && !(preq->state & (1 << PLOOP_REQ_DISCARD)))
insert_entry_tree(plo, preq, drop_list);
 
trace_bio_queue(preq);
@@ -1487,7 +1487,7 @@ void ploop_complete_io_state(struct ploop_request * preq)
 
spin_lock_irqsave(>lock, flags);
__TRACE("C %p %u\n", preq, preq->req_cluster);
-   if (preq->error)
+   if (preq->error && !(preq->req_rw & REQ_DISCARD))
set_bit(PLOOP_S_ABORT, >state);
 
list_add_tail(>list, >ready_queue);
@@ -2570,6 +2570,15 @@ restart:
break;
}
 
+   if ((preq->req_rw & REQ_DISCARD) &&
+   !test_bit(PLOOP_REQ_DISCARD, >state) &&
+   test_bit(PLOOP_S_NO_FALLOC_DISCARD, >state)) {
+   preq->eng_state = PLOOP_E_COMPLETE;
+   preq->error = -EOPNOTSUPP;
+   ploop_complete_io_state(preq);
+   return;
+   }
+
ploop_entry_request(preq);
break;
 
@@ -3978,6 +3987,8 @@ static int ploop_start(struct ploop_device * plo, struct 
block_device *bdev)
 
blk_queue_max_discard_sectors(plo->queue, INT_MAX);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, plo->queue);
+   plo->queue->limits.discard_granularity = 4096;
+   plo->queue->limits.discard_alignment = 4096;
 
set_capacity(plo->disk, plo->bd_size);
bd_set_size(bdev, (loff_t)plo->bd_size << 9);
diff --git a/drivers/block/ploop/fmt_ploop1.c b/drivers/block/ploop/fmt_ploop1.c
index 0034216..c2be627 100644
--- a/drivers/block/ploop/fmt_ploop1.c
+++ b/drivers/block/ploop/fmt_ploop1.c
@@ -189,6 +189,8 @@ ploop1_open(struct ploop_delta * delta)
((u64)ph->bd_size + ph->l1_off) << 9)
delta->flags |= PLOOP_FMT_PREALLOCATED;
 
+   set_bit(PLOOP_S_NO_FALLOC_DISCARD, >plo->state);
+
return 0;
 
 out_err:
diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c
index d6b1118..3c57aca 100644
--- a/drivers/block/ploop/io_direct.c
+++ b/drivers/block/ploop/io_direct.c
@@ -1032,6 +1032,7 @@ dio_init(struct ploop_io * io)
init_timer(>fsync_timer);
io->fsync_timer.function = fsync_timeout;
io->fsync_timer.data = (unsigned long)io;
+   set_bit(PLOOP_S_NO_FALLOC_DISCARD, >plo->state);
 
return 0;
 }
diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c
index ee9ba26..543f98b 100644
--- a/drivers/block/ploop/io_kaio.c
+++ b/drivers/block/ploop/io_kaio.c
@@ -167,7 +167,9 @@ static int kaio_kernel_submit(struct file *file, struct 
kaio_req *kreq,
if (!iocb)
return -ENOMEM;
 
-   if (rw & REQ_WRITE)
+   if (rw & REQ_DISCARD)
+   op = IOCB

[Devel] [PATCH 1/2] fuse: add a new async operation to unmap regions

2018-02-05 Thread Andrei Vagin

The fuse interface allows to run any operation asynchronously, because
the kernel redirect all operations to an user daemon and then waits an
answer.

In ploop, we want to handle discard requests via fallocate and
a simplest way to do this is to run fallocate(FALLOC_FL_PUNCH_HOLE)
asynchronously like the write command.

This patch adds a new async command IOCB_CMD_UNMAP_ITER, which sends
fallocate(FALLOC_FL_PUNCH_HOLE) to a fuse user daemon.

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 fs/aio.c |  1 +
 fs/fuse/file.c   | 63 ++--
 fs/fuse/fuse_i.h |  3 +++
 include/uapi/linux/aio_abi.h |  1 +
 4 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 3a6a9b0..cdc7558 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1492,6 +1492,7 @@ rw_common:
ret = aio_read_iter(req);
break;
 
+   case IOCB_CMD_UNMAP_ITER:
case IOCB_CMD_WRITE_ITER:
ret = aio_write_iter(req);
break;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 877c41f..83ea9da 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -920,6 +920,19 @@ static void fuse_aio_complete_req(struct fuse_conn *fc, 
struct fuse_req *req)
if (!req->bvec)
fuse_release_user_pages(req, !io->write);
 
+   if (req->in.h.opcode == FUSE_FALLOCATE) {
+   if (req->out.h.error)
+   printk("fuse_aio_complete_req: request (fallocate 
fh=0x%llx "
+  "offset=%lld length=%lld mode=%x) completed with 
err=%d\n",
+  req->misc.fallocate.in.fh,
+  req->misc.fallocate.in.offset,
+  req->misc.fallocate.in.length,
+  req->misc.fallocate.in.mode,
+  req->out.h.error);
+   fuse_aio_complete(io, req->out.h.error, -1);
+   return;
+   }
+
if (io->write) {
if (req->misc.write.in.size != req->misc.write.out.size)
pos = req->misc.write.in.offset - io->offset +
@@ -1322,6 +1335,33 @@ static void fuse_write_fill(struct fuse_req *req, struct 
fuse_file *ff,
req->out.args[0].value = outarg;
 }
 
+static size_t fuse_send_unmap(struct fuse_req *req, struct fuse_io_priv *io,
+ loff_t pos, size_t count, fl_owner_t owner)
+{
+   struct file *file = io->file;
+   struct fuse_file *ff = file->private_data;
+   struct fuse_conn *fc = ff->fc;
+   struct fuse_fallocate_in *inarg = >misc.fallocate.in;
+
+   inarg->fh = ff->fh;
+   inarg->offset = pos;
+   inarg->length = count;
+   inarg->mode = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE;
+   req->in.h.opcode = FUSE_FALLOCATE;
+   req->in.h.nodeid = ff->nodeid;
+   req->in.numargs = 1;
+   req->in.args[0].size = sizeof(struct fuse_fallocate_in);
+   req->in.args[0].value = inarg;
+
+   fuse_account_request(fc, count);
+
+   if (io->async)
+   return fuse_async_req_send(fc, req, count, io);
+
+   fuse_request_send(fc, req);
+   return count;
+}
+
 static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io,
  loff_t pos, size_t count, fl_owner_t owner)
 {
@@ -3455,7 +3495,7 @@ static ssize_t fuse_direct_IO_bvec(int rw, struct kiocb 
*iocb,
req->bvec = bvec;
}
 
-   if (filled + bvec->bv_len <= nmax) {
+   if (bvec_len && filled + bvec->bv_len <= nmax) {
filled += bvec->bv_len;
req->num_bvecs++;
bvec++;
@@ -3465,14 +3505,21 @@ static ssize_t fuse_direct_IO_bvec(int rw, struct kiocb 
*iocb,
continue;
}
 
-   BUG_ON(!filled);
 
-   if (rw == WRITE)
-   nres = fuse_send_write(req, io, pos,
-   filled, NULL);
-   else
-   nres = fuse_send_read(req, io, pos,
-   filled, NULL);
+   if (iocb->ki_opcode == IOCB_CMD_UNMAP_ITER) {
+   req->in.argbvec = 0;
+   nres = fuse_send_unmap(req, io, pos,
+   iocb->ki_nbytes, NULL);
+   filled = nres;
+   } else {
+   BUG_ON(!filled);
+   if (rw == WRITE)
+   nres = fuse_send_write(req, io, pos,
+   filled, NULL);
+   else
+

[Devel] [PATCH 2/2] target: call alua helper before reporting group states to initiator

2018-01-31 Thread Andrei Vagin

An alua helper is called with the same set of arguments as it is called
when a group state is changed, but the fourth argument will be "Read".

For example:
 default_tg_pt_gp 0 Active/Optimized Read implicit 
iqn.2014-06.com.vstorage:test-1

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_alua.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_alua.c 
b/drivers/target/target_core_alua.c
index 46c8beb..f1c1733 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -138,6 +138,8 @@ target_emulate_report_referrals(struct se_cmd *cmd)
return 0;
 }
 
+static int core_alua_usermode_helper(struct t10_alua_tg_pt_gp *tg_pt_gp,
+   struct se_device *l_dev, int new_state, int explicit);
 /*
  * REPORT_TARGET_PORT_GROUPS
  *
@@ -173,6 +175,8 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
if (!buf)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
+   core_alua_usermode_helper(dev->t10_alua.default_tg_pt_gp, dev, -1, 0);
+
spin_lock(>t10_alua.tg_pt_gps_lock);
list_for_each_entry(tg_pt_gp, >t10_alua.tg_pt_gps_list,
tg_pt_gp_list) {
@@ -1046,7 +1050,7 @@ static int core_alua_usermode_helper(struct 
t10_alua_tg_pt_gp *tg_pt_gp,
argv[1] = config_item_name(_pt_gp->tg_pt_gp_group.cg_item);
argv[2] = str_id;
argv[3] = core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_access_state);
-   argv[4] = core_alua_dump_state(new_state);
+   argv[4] = new_state < 0 ? "Read" : core_alua_dump_state(new_state);
argv[5] = (explicit) ? "explicit" : "implicit";
argv[6] = config_item_name(_dev->dev_group.cg_item);
argv[7] = NULL;
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 1/2] target: move alua user helper from group to device

2018-01-31 Thread Andrei Vagin

We added this helper to tune a device backing store (to set a correct
delta for a ploop device). It is executed when a group state is changed.
In this case, there is no difference where it is placed.  But now we
understand, that we need to run this helper before reporting group
states to an initiator. It will be used to sync groups with other
targets in a cluster. We have to guaranty that only one target reports
the Active/Optimize state. And in this case, it looks better if a user
helper will be set per device.

How to use:
echo -n /usr/sbin/vstorage-alua-helper > \
/sys/kernel/config/target/core/iblock_0/iqn.2014-06.com.vstorage\:test-1/alua_user_helper

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_alua.c | 49 +++
 drivers/target/target_core_alua.h |  4 +--
 drivers/target/target_core_configfs.c | 44 +++
 include/target/target_core_base.h |  3 ++-
 4 files changed, 41 insertions(+), 59 deletions(-)

diff --git a/drivers/target/target_core_alua.c 
b/drivers/target/target_core_alua.c
index c59bf69..46c8beb 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -1038,20 +1038,10 @@ static int core_alua_usermode_helper(struct 
t10_alua_tg_pt_gp *tg_pt_gp,
char *argv[8] = {}, str_id[6];
int ret;
 
-   if (!tg_pt_gp->tg_pt_gp_usermode_helper)
+   if (!l_dev->alua_user_helper[0])
return 0;
 
-   mutex_lock(_pt_gp->tg_pt_gp_transition_mutex);
-   if (!tg_pt_gp->tg_pt_gp_usermode_helper) {
-   mutex_unlock(_pt_gp->tg_pt_gp_transition_mutex);
-   return 0;
-   }
-   argv[0] = kstrdup(tg_pt_gp->tg_pt_gp_usermode_helper, GFP_KERNEL);
-   mutex_unlock(_pt_gp->tg_pt_gp_transition_mutex);
-
-   if (argv[0] == NULL)
-   return -ENOMEM;
-
+   argv[0] = l_dev->alua_user_helper;
snprintf(str_id, sizeof(str_id), "%hu", tg_pt_gp->tg_pt_gp_id);
argv[1] = config_item_name(_pt_gp->tg_pt_gp_group.cg_item);
argv[2] = str_id;
@@ -1064,7 +1054,6 @@ static int core_alua_usermode_helper(struct 
t10_alua_tg_pt_gp *tg_pt_gp,
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC | 
UMH_KILLABLE);
pr_debug("helper command: %s exit code %u (0x%x)\n",
argv[0], (ret >> 8) & 0xff, ret);
-   kfree(argv[0]);
return ret;
 }
 
@@ -1727,7 +1716,6 @@ struct t10_alua_tg_pt_gp 
*core_alua_allocate_tg_pt_gp(struct se_device *dev,
tg_pt_gp->tg_pt_gp_nonop_delay_msecs = ALUA_DEFAULT_NONOP_DELAY_MSECS;
tg_pt_gp->tg_pt_gp_trans_delay_msecs = ALUA_DEFAULT_TRANS_DELAY_MSECS;
tg_pt_gp->tg_pt_gp_implicit_trans_secs = 
ALUA_DEFAULT_IMPLICIT_TRANS_SECS;
-   tg_pt_gp->tg_pt_gp_usermode_helper = NULL;
 
/*
 * Enable all supported states
@@ -1885,8 +1873,6 @@ void core_alua_free_tg_pt_gp(
}
spin_unlock(_pt_gp->tg_pt_gp_lock);
 
-   kfree(tg_pt_gp->tg_pt_gp_usermode_helper);
-
kmem_cache_free(t10_alua_tg_pt_gp_cache, tg_pt_gp);
 }
 
@@ -2216,41 +2202,26 @@ ssize_t core_alua_store_trans_delay_msecs(
 }
 
 ssize_t core_alua_show_user_helper(
-   struct t10_alua_tg_pt_gp *tg_pt_gp,
+   struct se_device *dev,
char *page)
 {
-   ssize_t len;
-
-   mutex_lock(_pt_gp->tg_pt_gp_transition_mutex);
-   if (!tg_pt_gp->tg_pt_gp_usermode_helper)
-   len = 0;
-   else
-   len = sprintf(page, "%s\n", tg_pt_gp->tg_pt_gp_usermode_helper);
-   mutex_unlock(_pt_gp->tg_pt_gp_transition_mutex);
-
-   return len;
+   return sprintf(page, "%s\n", dev->alua_user_helper);
 }
 
 ssize_t core_alua_store_user_helper(
-   struct t10_alua_tg_pt_gp *tg_pt_gp,
+   struct se_device *dev,
const char *page,
size_t count)
 {
-   char *h;
-
if (count == 1 && page[0] == '-') {
-   h = NULL;
+   dev->alua_user_helper[0] = 0;
+   } else if (count > ALUA_USER_HELPER_LEN - 1) {
+   return -EINVAL;
} else {
-   h = kstrndup(page, count, GFP_KERNEL);
-   if (h == NULL)
-   return -ENOMEM;
+   memcpy(dev->alua_user_helper, page, count);
+   dev->alua_user_helper[count] = 0;
}
 
-   mutex_lock(_pt_gp->tg_pt_gp_transition_mutex);
-   kfree(tg_pt_gp->tg_pt_gp_usermode_helper);
-   tg_pt_gp->tg_pt_gp_usermode_helper = h;
-   mutex_unlock(_pt_gp->tg_pt_gp_transition_mutex);
-
return count;
 }
 
diff --git a/drivers/target/target_core_alua.h 
b/drivers/target/target_core_alua.h
index 7673ffe..df1cf49 100644
--- a/drivers/target/target_core_alua.h
+++ b/drivers/target/target_core_alua.h
@@ -137

[Devel] [PATCH] target: pass a device name to an alua user helper

2018-01-29 Thread Andrei Vagin

Now the helper is executed with this list of arguments:
TG_PT_Group ID prev_state new_state {explicit/implicit} dev_name

For exmaple:
default_tg_pt_gp 0 Active/Optimized Standby explicit 
iqn.2014-06.com.vstorage:test-2

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_alua.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_alua.c 
b/drivers/target/target_core_alua.c
index a88a51d..c59bf69 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -1028,13 +1028,14 @@ static void core_alua_do_transition_ua(struct 
t10_alua_tg_pt_gp *tg_pt_gp)
spin_unlock(_pt_gp->tg_pt_gp_lock);
 }
 
-static int core_alua_usermode_helper(struct t10_alua_tg_pt_gp *tg_pt_gp, int 
new_state, int explicit)
+static int core_alua_usermode_helper(struct t10_alua_tg_pt_gp *tg_pt_gp,
+   struct se_device *l_dev, int new_state, int explicit)
 {
char *envp[] = { "HOME=/",
"TERM=linux",
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
NULL };
-   char *argv[7] = {}, str_id[6];
+   char *argv[8] = {}, str_id[6];
int ret;
 
if (!tg_pt_gp->tg_pt_gp_usermode_helper)
@@ -1057,7 +1058,8 @@ static int core_alua_usermode_helper(struct 
t10_alua_tg_pt_gp *tg_pt_gp, int new
argv[3] = core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_access_state);
argv[4] = core_alua_dump_state(new_state);
argv[5] = (explicit) ? "explicit" : "implicit";
-   argv[6] = NULL;
+   argv[6] = config_item_name(_dev->dev_group.cg_item);
+   argv[7] = NULL;
 
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC | 
UMH_KILLABLE);
pr_debug("helper command: %s exit code %u (0x%x)\n",
@@ -1068,12 +1070,13 @@ static int core_alua_usermode_helper(struct 
t10_alua_tg_pt_gp *tg_pt_gp, int new
 
 static int core_alua_do_transition_tg_pt(
struct t10_alua_tg_pt_gp *tg_pt_gp,
+   struct se_device *l_dev,
int new_state,
int explicit)
 {
int prev_state;
 
-   if (core_alua_usermode_helper(tg_pt_gp, new_state, explicit))
+   if (core_alua_usermode_helper(tg_pt_gp, l_dev, new_state, explicit))
return -EAGAIN;
 
mutex_lock(_pt_gp->tg_pt_gp_transition_mutex);
@@ -1181,7 +1184,7 @@ int core_alua_do_port_transition(
 */
l_tg_pt_gp->tg_pt_gp_alua_port = l_port;
l_tg_pt_gp->tg_pt_gp_alua_nacl = l_nacl;
-   rc = core_alua_do_transition_tg_pt(l_tg_pt_gp,
+   rc = core_alua_do_transition_tg_pt(l_tg_pt_gp, l_dev,
   new_state, explicit);
atomic_dec_mb(_gp->lu_gp_ref_cnt);
return rc;
@@ -1230,7 +1233,7 @@ int core_alua_do_port_transition(
 * core_alua_do_transition_tg_pt() will always return
 * success.
 */
-   rc = core_alua_do_transition_tg_pt(tg_pt_gp,
+   rc = core_alua_do_transition_tg_pt(tg_pt_gp, l_dev,
new_state, explicit);
 
spin_lock(>t10_alua.tg_pt_gps_lock);
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH] target: pass a device name to an alua user helper

2018-01-29 Thread Andrei Vagin

Now the helper is executed with this list of arguments:
TG_PT_Group ID prev_state new_state {explicit/implicit} dev_name

For exmaple:
default_tg_pt_gp 0 Active/Optimized Standby explicit 
iqn.2014-06.com.vstorage:test-2

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_alua.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_alua.c 
b/drivers/target/target_core_alua.c
index a88a51d..c59bf69 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -1028,13 +1028,14 @@ static void core_alua_do_transition_ua(struct 
t10_alua_tg_pt_gp *tg_pt_gp)
spin_unlock(_pt_gp->tg_pt_gp_lock);
 }
 
-static int core_alua_usermode_helper(struct t10_alua_tg_pt_gp *tg_pt_gp, int 
new_state, int explicit)
+static int core_alua_usermode_helper(struct t10_alua_tg_pt_gp *tg_pt_gp,
+   struct se_device *l_dev, int new_state, int explicit)
 {
char *envp[] = { "HOME=/",
"TERM=linux",
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
NULL };
-   char *argv[7] = {}, str_id[6];
+   char *argv[8] = {}, str_id[6];
int ret;
 
if (!tg_pt_gp->tg_pt_gp_usermode_helper)
@@ -1057,7 +1058,8 @@ static int core_alua_usermode_helper(struct 
t10_alua_tg_pt_gp *tg_pt_gp, int new
argv[3] = core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_access_state);
argv[4] = core_alua_dump_state(new_state);
argv[5] = (explicit) ? "explicit" : "implicit";
-   argv[6] = NULL;
+   argv[6] = config_item_name(_dev->dev_group.cg_item);
+   argv[7] = NULL;
 
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC | 
UMH_KILLABLE);
pr_debug("helper command: %s exit code %u (0x%x)\n",
@@ -1068,12 +1070,13 @@ static int core_alua_usermode_helper(struct 
t10_alua_tg_pt_gp *tg_pt_gp, int new
 
 static int core_alua_do_transition_tg_pt(
struct t10_alua_tg_pt_gp *tg_pt_gp,
+   struct se_device *l_dev,
int new_state,
int explicit)
 {
int prev_state;
 
-   if (core_alua_usermode_helper(tg_pt_gp, new_state, explicit))
+   if (core_alua_usermode_helper(tg_pt_gp, l_dev, new_state, explicit))
return -EAGAIN;
 
mutex_lock(_pt_gp->tg_pt_gp_transition_mutex);
@@ -1181,7 +1184,7 @@ int core_alua_do_port_transition(
 */
l_tg_pt_gp->tg_pt_gp_alua_port = l_port;
l_tg_pt_gp->tg_pt_gp_alua_nacl = l_nacl;
-   rc = core_alua_do_transition_tg_pt(l_tg_pt_gp,
+   rc = core_alua_do_transition_tg_pt(l_tg_pt_gp, l_dev,
   new_state, explicit);
atomic_dec_mb(_gp->lu_gp_ref_cnt);
return rc;
@@ -1230,7 +1233,7 @@ int core_alua_do_port_transition(
 * core_alua_do_transition_tg_pt() will always return
 * success.
 */
-   rc = core_alua_do_transition_tg_pt(tg_pt_gp,
+   rc = core_alua_do_transition_tg_pt(tg_pt_gp, l_dev,
new_state, explicit);
 
spin_lock(>t10_alua.tg_pt_gps_lock);
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH] target: add an user-mode helper to handle changes of a group state

2018-01-16 Thread Andrei Vagin

An user-mode helper is a user command, which is executed before changing
a group state. It allows userspace to run required actions to handle a
new state.

For example, our storage doesn't allow to open one file from a few
nodes, so when a target is switched from standby to active, we have to
get a lease to an image file and to attach it to a ploop device.

The usermode helper is executed with this set of arguments:

TG_PT_Group ID prev_state new_state {explicit/implicit}

for example:
default_tg_pt_gp 0 Standby Active/NonOptimized explicit

Here is an example how to set and cleanup a helper:
$ cd /sys/kernel/config/target/core/
$ echo -n /usr/sbin/vstrorage_iscsi_alua > \

fileio_1/iqn.2014-06.com.vstorage:test-2/alua/default_tg_pt_gp/user_helper
$ echo -n - > 
fileio_1/iqn.2014-06.com.vstorage:test-2/alua/default_tg_pt_gp/user_helper

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 drivers/target/target_core_alua.c | 84 +++
 drivers/target/target_core_alua.h |  4 ++
 drivers/target/target_core_configfs.c | 17 +++
 include/target/target_core_base.h |  1 +
 4 files changed, 106 insertions(+)

diff --git a/drivers/target/target_core_alua.c 
b/drivers/target/target_core_alua.c
index a735425..a88a51d 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1027,6 +1028,44 @@ static void core_alua_do_transition_ua(struct 
t10_alua_tg_pt_gp *tg_pt_gp)
spin_unlock(_pt_gp->tg_pt_gp_lock);
 }
 
+static int core_alua_usermode_helper(struct t10_alua_tg_pt_gp *tg_pt_gp, int 
new_state, int explicit)
+{
+   char *envp[] = { "HOME=/",
+   "TERM=linux",
+   "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+   NULL };
+   char *argv[7] = {}, str_id[6];
+   int ret;
+
+   if (!tg_pt_gp->tg_pt_gp_usermode_helper)
+   return 0;
+
+   mutex_lock(_pt_gp->tg_pt_gp_transition_mutex);
+   if (!tg_pt_gp->tg_pt_gp_usermode_helper) {
+   mutex_unlock(_pt_gp->tg_pt_gp_transition_mutex);
+   return 0;
+   }
+   argv[0] = kstrdup(tg_pt_gp->tg_pt_gp_usermode_helper, GFP_KERNEL);
+   mutex_unlock(_pt_gp->tg_pt_gp_transition_mutex);
+
+   if (argv[0] == NULL)
+   return -ENOMEM;
+
+   snprintf(str_id, sizeof(str_id), "%hu", tg_pt_gp->tg_pt_gp_id);
+   argv[1] = config_item_name(_pt_gp->tg_pt_gp_group.cg_item);
+   argv[2] = str_id;
+   argv[3] = core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_access_state);
+   argv[4] = core_alua_dump_state(new_state);
+   argv[5] = (explicit) ? "explicit" : "implicit";
+   argv[6] = NULL;
+
+   ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC | 
UMH_KILLABLE);
+   pr_debug("helper command: %s exit code %u (0x%x)\n",
+   argv[0], (ret >> 8) & 0xff, ret);
+   kfree(argv[0]);
+   return ret;
+}
+
 static int core_alua_do_transition_tg_pt(
struct t10_alua_tg_pt_gp *tg_pt_gp,
int new_state,
@@ -1034,6 +1073,9 @@ static int core_alua_do_transition_tg_pt(
 {
int prev_state;
 
+   if (core_alua_usermode_helper(tg_pt_gp, new_state, explicit))
+   return -EAGAIN;
+
mutex_lock(_pt_gp->tg_pt_gp_transition_mutex);
/* Nothing to be done here */
if (tg_pt_gp->tg_pt_gp_alua_access_state == new_state) {
@@ -1682,6 +1724,7 @@ struct t10_alua_tg_pt_gp 
*core_alua_allocate_tg_pt_gp(struct se_device *dev,
tg_pt_gp->tg_pt_gp_nonop_delay_msecs = ALUA_DEFAULT_NONOP_DELAY_MSECS;
tg_pt_gp->tg_pt_gp_trans_delay_msecs = ALUA_DEFAULT_TRANS_DELAY_MSECS;
tg_pt_gp->tg_pt_gp_implicit_trans_secs = 
ALUA_DEFAULT_IMPLICIT_TRANS_SECS;
+   tg_pt_gp->tg_pt_gp_usermode_helper = NULL;
 
/*
 * Enable all supported states
@@ -1839,6 +1882,8 @@ void core_alua_free_tg_pt_gp(
}
spin_unlock(_pt_gp->tg_pt_gp_lock);
 
+   kfree(tg_pt_gp->tg_pt_gp_usermode_helper);
+
kmem_cache_free(t10_alua_tg_pt_gp_cache, tg_pt_gp);
 }
 
@@ -2167,6 +2212,45 @@ ssize_t core_alua_store_trans_delay_msecs(
return count;
 }
 
+ssize_t core_alua_show_user_helper(
+   struct t10_alua_tg_pt_gp *tg_pt_gp,
+   char *page)
+{
+   ssize_t len;
+
+   mutex_lock(_pt_gp->tg_pt_gp_transition_mutex);
+   if (!tg_pt_gp->tg_pt_gp_usermode_helper)
+   len = 0;
+   else
+   len = sprintf(page, "%s\n", tg_pt_gp->tg_pt_gp_usermode_helper);
+   mutex_unlock(_pt_gp->tg_pt_gp_transition_mutex);
+
+   return len;
+}
+
+ssize_t core_alua_store_user_helper(
+   struct t10_alua_tg_pt_gp *tg_pt_gp,
+   const char *page,
+   size_t

Re: [Devel] [PATCH 1/4] target: Move scsi_port_stats from se_port to se_lun

2017-12-27 Thread Andrei Vagin

For all patches:

Acked-by: Andrei Vagin <ava...@virtuozzo.com>

On Wed, Dec 20, 2017 at 01:10:11PM +0300, Andrey Grafin wrote:
> This patch moves scsi_port_stats from se_port to se_lun
> and changes stats counters type to atomic_long_t.
> 
> This changes remove the next superfluous actions in collecting stats:
> - the check for the existence se_port;
> - spin_lock usage.
> 
> This patch is based on the mainstream patches adf653f92f38e and
> 4cc987eaff914 that can't be backported directly because there are
> too many changes before them. But the idea of that patches
> simplifies stats collecting.
> 
> Signed-off-by: Andrey Grafin <andrey.gra...@acronis.com>
> ---
>  drivers/target/target_core_stat.c  | 41 
> ++
>  drivers/target/target_core_tpg.c   |  3 +++
>  drivers/target/target_core_transport.c | 30 +++--
>  include/target/target_core_base.h  | 15 +++--
>  4 files changed, 31 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/target/target_core_stat.c 
> b/drivers/target/target_core_stat.c
> index 59830a27f50..8dacf57620f 100644
> --- a/drivers/target/target_core_stat.c
> +++ b/drivers/target/target_core_stat.c
> @@ -794,17 +794,12 @@ static ssize_t 
> target_stat_scsi_tgt_port_show_attr_in_cmds(
>   struct se_port_stat_grps *pgrps, char *page)
>  {
>   struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
> - struct se_port *sep;
> - ssize_t ret;
> + ssize_t ret = -ENODEV;
>  
>   spin_lock(>lun_sep_lock);
> - sep = lun->lun_sep;
> - if (!sep) {
> - spin_unlock(>lun_sep_lock);
> - return -ENODEV;
> - }
> -
> - ret = snprintf(page, PAGE_SIZE, "%llu\n", sep->sep_stats.cmd_pdus);
> + if (lun->lun_sep)
> + ret = snprintf(page, PAGE_SIZE, "%lu\n",
> + atomic_long_read(>lun_stats.cmd_pdus));
>   spin_unlock(>lun_sep_lock);
>   return ret;
>  }
> @@ -814,18 +809,12 @@ static ssize_t 
> target_stat_scsi_tgt_port_show_attr_write_mbytes(
>   struct se_port_stat_grps *pgrps, char *page)
>  {
>   struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
> - struct se_port *sep;
> - ssize_t ret;
> + ssize_t ret = -ENODEV;
>  
>   spin_lock(>lun_sep_lock);
> - sep = lun->lun_sep;
> - if (!sep) {
> - spin_unlock(>lun_sep_lock);
> - return -ENODEV;
> - }
> -
> - ret = snprintf(page, PAGE_SIZE, "%u\n",
> - (u32)(sep->sep_stats.rx_data_octets >> 20));
> + if (lun->lun_sep)
> + ret = snprintf(page, PAGE_SIZE, "%lu\n",
> + atomic_long_read(>lun_stats.rx_data_octets) >> 20);
>   spin_unlock(>lun_sep_lock);
>   return ret;
>  }
> @@ -835,18 +824,12 @@ static ssize_t 
> target_stat_scsi_tgt_port_show_attr_read_mbytes(
>   struct se_port_stat_grps *pgrps, char *page)
>  {
>   struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
> - struct se_port *sep;
> - ssize_t ret;
> + ssize_t ret = -ENODEV;
>  
>   spin_lock(>lun_sep_lock);
> - sep = lun->lun_sep;
> - if (!sep) {
> - spin_unlock(>lun_sep_lock);
> - return -ENODEV;
> - }
> -
> - ret = snprintf(page, PAGE_SIZE, "%u\n",
> - (u32)(sep->sep_stats.tx_data_octets >> 20));
> + if (lun->lun_sep)
> + ret = snprintf(page, PAGE_SIZE, "%lu\n",
> + atomic_long_read(>lun_stats.tx_data_octets) >> 20);
>   spin_unlock(>lun_sep_lock);
>   return ret;
>  }
> diff --git a/drivers/target/target_core_tpg.c 
> b/drivers/target/target_core_tpg.c
> index 0696de9553d..7ee2a94463b 100644
> --- a/drivers/target/target_core_tpg.c
> +++ b/drivers/target/target_core_tpg.c
> @@ -832,6 +832,9 @@ int core_tpg_add_lun(
>   }
>  
>   spin_lock(>tpg_lun_lock);
> + atomic_long_set(>lun_stats.cmd_pdus, 0);
> + atomic_long_set(>lun_stats.rx_data_octets, 0);
> + atomic_long_set(>lun_stats.tx_data_octets, 0);
>   lun->lun_access = lun_access;
>   lun->lun_status = TRANSPORT_LUN_STATUS_ACTIVE;
>   spin_unlock(>tpg_lun_lock);
> diff --git a/drivers/target/target_core_transport.c 
> b/drivers/target/target_core_transport.c
> index 25b5581ad78..4675bcc70cb 100644
> --- a/drivers/target/target_core_transport.c
> +++ b/drivers/target/target_core_transport.c
> @@ -1243,10

Re: [Devel] [PATCH 3/4] target: add histogram for LUN statistics

2017-12-27 Thread Andrei Vagin

On Wed, Dec 20, 2017 at 01:10:13PM +0300, Andrey Grafin wrote:
> This patch adds histogram statistics to scsi_ports_stats.
> Histogram can be obtained and configured via config_fs.
> Histogram measurement unit is usec.
> 
> Histogram usage:
> 1. Configure histogram '| 1 ms | 10 ms | 15 ms | largest values |'
> `echo "1000 1 15000" > 
> target/iscsi/iqn.2003-01.org.linux-iscsi.localhost.x8664\:sn.fdee138936b9/tpgt_1/lun/lun_3/statistics/scsi_tgt_port/write_hist`
> 
> 2. Obtain histogram stats
> `cat 
> /target/iscsi/iqn.2003-01.org.linux-iscsi.localhost.x8664\:sn.fdee138936b9/tpgt_1/lun/lun_3/statistics/scsi_tgt_port/write_hist`
> 
> 3. Stop histogram stats
> `echo "" > 
> /target/iscsi/iqn.2003-01.org.linux-iscsi.localhost.x8664\:sn.fdee138936b9/tpgt_1/lun/lun_3/statistics/scsi_tgt_port/write_hist`
> 
> Signed-off-by: Andrey Grafin 
> ---
>  drivers/target/target_core_stat.c  | 207 
> +
>  drivers/target/target_core_tpg.c   |  27 +
>  drivers/target/target_core_transport.c |  23 
>  include/target/target_core_base.h  |  16 +++
>  4 files changed, 273 insertions(+)
> 
> diff --git a/drivers/target/target_core_stat.c 
> b/drivers/target/target_core_stat.c
> index e88e29612db..6445869a22b 100644
> --- a/drivers/target/target_core_stat.c
> +++ b/drivers/target/target_core_stat.c
> @@ -33,6 +33,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -699,6 +700,209 @@ static ssize_t  
> target_stat_scsi_tgt_port_show_attr_##_name(\
>   return ret; \
>  }
>  
> +#define DEV_STAT_SCSI_TGT_PORT_SHOW_HIST(_name)  
> \
> +static ssize_t target_stat_scsi_tgt_port_show_attr_##_name(  \
> + struct se_port_stat_grps *pgrps, char *page)\
> +{\
> + ssize_t size = -ENODEV; \
> + struct se_lun *lun = container_of(pgrps,\
> + struct se_lun, port_stat_grps); \
> + \
> + spin_lock(>lun_sep_lock);  \
> + if (lun->lun_sep) { \
> + rcu_read_lock();\
> + size = snprintf_histogram(page, PAGE_SIZE,  \
> + rcu_dereference(lun->lun_stats._name)); \
> + rcu_read_unlock();  \
> + }   \
> + spin_unlock(>lun_sep_lock);\
> + return size;\
> +}
> +
> +#define DEV_STAT_SCSI_TGT_PORT_STORE_HIST(_name) \
> +static ssize_t target_stat_scsi_tgt_port_store_attr_##_name( \
> + struct se_port_stat_grps *pgrps, const char *page, size_t size) \
> +{\
> + struct se_lun *lun = container_of(pgrps,\
> + struct se_lun, port_stat_grps); \
> + struct scsi_port_stats_hist *old, *new; \
> + ssize_t ret;\
> + \
> + new = kzalloc(sizeof(*new), GFP_KERNEL);\
> + if (!new)   \
> + return -ENOMEM; \
> + \
> + ret = read_histogram_items(page,\
> + size, new->items, TCM_SE_PORT_STATS_HIST_MAX - 1);  \
> + \
> + if (ret < 0)\
> + goto err;   \
> + \
> + if (ret == 0) { \
> + kfree(new); \
> + new = NULL; \
> + } else  {   \
> + new->items[ret] = U64_MAX;  \
> + new->count = ret + 1;   \
> + }   \
> + \
> + spin_lock(>lun_sep_lock);

Re: [Devel] [PATCH v2 0/7] spfs: duplicate socket before sending it from usernsd

2017-12-21 Thread Andrei Vagin

Reviewed-by: Andrei Vagin <ava...@virtuozzo.com>

On Thu, Dec 21, 2017 at 02:06:53PM +0300, Stanislav Kinsburskiy wrote:
> Usernds closes socket once it was sent.
> So, id should be duplicated before sending, if the socket is expected to be
> send multiple times.
> 
> https://jira.sw.ru/browse/PSBM-79462
> 
> The following series implements...
> 
> ---
> 
> Stanislav Kinsburskiy (7):
>   spfs: introduce request_spfs_mngr_sock)() helper
>   spfs: improve error and debug output for spfs_mount()
>   spfs: remove redundant spfs_service_fd() helper
>   spfs: improve SPFS manager start debug and error output
>   spfs: improve prints in spfs_set_mode() and spfs_release_replace()
>   spfs: return duplicated socket from usernsd
>   spfs: switch mounts mode to STUB after root yard depopulation
> 
> 
>  criu/cr-restore.c |   12 +++
>  criu/spfs.c   |   92 
> +++--
>  2 files changed, 59 insertions(+), 45 deletions(-)
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH 6/7] spfs: return duplicated socket from usernsd

2017-12-20 Thread Andrei Vagin

On Wed, Dec 20, 2017 at 07:07:16PM +0300, Stanislav Kinsburskiy wrote:
> Usernsd closes socket when sent.
> 
> https://jira.sw.ru/browse/PSBM-79462
> 
> Signed-off-by: Stanislav Kinsburskiy 
> ---
>  criu/spfs.c |   10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/criu/spfs.c b/criu/spfs.c
> index da19179..857a167 100644
> --- a/criu/spfs.c
> +++ b/criu/spfs.c
> @@ -162,11 +162,19 @@ static int start_spfs_manager(void)
>  static int get_spfs_mngr_sock(void *start, int fd, pid_t pid)
>  {
>   int sock;
> + int sfd;
>  
>   sock = get_service_fd(SPFS_MNGR_SK);
>   if (sock < 0 && start)
>   sock = start_spfs_manager();
> - return sock;
> + if (sock < 0)
> + return sock;
> +

start_spfs_manager() returns a descriptor which has to be closed.

You need to dup a descriptor, only if it is a service one.

> + sfd = dup(sock);
> + if (sfd < 0)
> + pr_perror("failed to duplicate socket %d", sock);
> +
> + return sfd;
>  }
>  
>  static int request_spfs_mngr_sock(bool *start_mngr)
> 
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH 1/7] spfs: introduce request_spfs_mngr_sock)() helper

2017-12-20 Thread Andrei Vagin

On Wed, Dec 20, 2017 at 07:06:51PM +0300, Stanislav Kinsburskiy wrote:
> This generic helper will be responsible for both SPFS manager start via
> usernsd and socket request.
> 
> Signed-off-by: Stanislav Kinsburskiy 
> ---
>  criu/spfs.c |   45 ++---
>  1 file changed, 26 insertions(+), 19 deletions(-)
> 
> diff --git a/criu/spfs.c b/criu/spfs.c
> index a5f6031..fff7b9f 100644
> --- a/criu/spfs.c
> +++ b/criu/spfs.c
> @@ -153,16 +153,38 @@ static int start_spfs_manager(void)
>   return sock;
>  }
>  
> -static int get_spfs_mngr_sock(void *arg, int fd, pid_t pid)
> +static int get_spfs_mngr_sock(void *start, int fd, pid_t pid)
>  {
>   int sock;
>  
>   sock = get_service_fd(SPFS_MNGR_SK);
> - if (sock < 0)
> + if (sock < 0 && start)

What does this start mean? You don't use its value.

>   sock = start_spfs_manager();
>   return sock;
>  }
>  
> +static int request_spfs_mngr_sock(bool *start_mngr)
> +{
> + int ns_fd;
> + int sock;
> +
> + ns_fd = open_proc(PROC_SELF, "ns");
> + if (ns_fd < 0)
> + return ns_fd;

Why do you need this ns fd? ^^^
> +
> + sock = userns_call(get_spfs_mngr_sock, UNS_FDOUT, start_mngr, 0, ns_fd);
> +
> + close(ns_fd);
> + return sock;
> +}
> +
> +static int start_spfs_mngr(void)
> +{
> + bool start;

^ 

> +
> + return request_spfs_mngr_sock();
> +}
> +
>  static int spfs_request_mount(int sock, struct mount_info *mi, const char 
> *source,
> const char *type, unsigned long mountflags)
>  {
> @@ -268,15 +290,9 @@ int spfs_mount(struct mount_info *mi, const char *source,
>  const char *filesystemtype, unsigned long mountflags)
>  {
>   int ret;
> - int ns_fd;
>   int sock;
>  
> - ns_fd = open_proc(PROC_SELF, "ns");
> - if (ns_fd < 0)
> - return ns_fd;
> -
> - sock = userns_call(get_spfs_mngr_sock, UNS_FDOUT, NULL, 0, ns_fd);
> - close(ns_fd);
> + sock = start_spfs_mngr();
>   if (sock < 0) {
>   pr_err("failed to mount NFS to path %s\n", mi->mountpoint);
>   return sock;
> @@ -345,16 +361,7 @@ int spfs_mngr_status(bool *active)
>  
>  int spfs_mngr_sock(void)
>  {
> - int ns_fd, fd;
> -
> - ns_fd = open_proc(PROC_SELF, "ns");
> - if (ns_fd < 0)
> - return ns_fd;
> -
> - fd = userns_call(spfs_service_fd, UNS_FDOUT, NULL, 0, ns_fd);
> -
> - close(ns_fd);
> - return fd;
> + return request_spfs_mngr_sock(NULL);
>  }
>  
>  int spfs_set_mode(int sock, const char *mode)
> 
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH] target: don't call an unmap callback if a range length is zero

2017-12-13 Thread Andrei Vagin

If a length of a range is zero, it means there is nothing to unmap
and we can skip this range.

Here is one more reason, why we have to skip such ranges.  An unmap
callback calls file_operations->fallocate(), but the man page for the
fallocate syscall says that fallocate(fd, mode, offset, let) returns
EINVAL, if len is zero. It means that file_operations->fallocate() isn't
obligated to handle zero ranges too.

Cc: alexey.kuznet...@acronis.com
---
 drivers/target/target_core_sbc.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 59a1235..99fb25f 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -1132,9 +1132,11 @@ sbc_execute_unmap(struct se_cmd *cmd,
goto err;
}
 
-   ret = do_unmap_fn(cmd, priv, lba, range);
-   if (ret)
-   goto err;
+   if (range) {
+   ret = do_unmap_fn(cmd, priv, lba, range);
+   if (ret)
+   goto err;
+   }
 
ptr += 16;
size -= 16;
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH 1/3] target: Move scsi_port_stats from se_port to se_lun

2017-12-11 Thread Andrei Vagin

I haven't got the second patch and it isn't listed in the mailing list
archive:
https://lists.openvz.org/pipermail/devel/2017-December/thread.html

On Mon, Dec 11, 2017 at 08:53:03PM +0300, Andrey Grafin wrote:
> This patch moves scsi_port_stats from se_port to se_lun
> and changes stats counters type to atomic_long_t.
> 
> This changes remove the next superfluous actions in collecting stats:
> - the check for the existence se_port;
> - spin_lock usage.
> 
> This patch is based on the mainstream patches adf653f92f38e and
> 4cc987eaff914 that can't be backported directly because there are
> too many changes before them. But the idea of that patches
> simplifies stats collecting.
> 
> Signed-off-by: Andrey Grafin 
> ---
>  drivers/target/target_core_stat.c  | 41 
> ++
>  drivers/target/target_core_tpg.c   |  3 +++
>  drivers/target/target_core_transport.c | 30 +++--
>  include/target/target_core_base.h  | 15 +++--
>  4 files changed, 31 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/target/target_core_stat.c 
> b/drivers/target/target_core_stat.c
> index 59830a27f50..8dacf57620f 100644
> --- a/drivers/target/target_core_stat.c
> +++ b/drivers/target/target_core_stat.c
> @@ -794,17 +794,12 @@ static ssize_t 
> target_stat_scsi_tgt_port_show_attr_in_cmds(
>   struct se_port_stat_grps *pgrps, char *page)
>  {
>   struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
> - struct se_port *sep;
> - ssize_t ret;
> + ssize_t ret = -ENODEV;
>  
>   spin_lock(>lun_sep_lock);
> - sep = lun->lun_sep;
> - if (!sep) {
> - spin_unlock(>lun_sep_lock);
> - return -ENODEV;
> - }
> -
> - ret = snprintf(page, PAGE_SIZE, "%llu\n", sep->sep_stats.cmd_pdus);
> + if (lun->lun_sep)
> + ret = snprintf(page, PAGE_SIZE, "%lu\n",
> + atomic_long_read(>lun_stats.cmd_pdus));
>   spin_unlock(>lun_sep_lock);
>   return ret;
>  }
> @@ -814,18 +809,12 @@ static ssize_t 
> target_stat_scsi_tgt_port_show_attr_write_mbytes(
>   struct se_port_stat_grps *pgrps, char *page)
>  {
>   struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
> - struct se_port *sep;
> - ssize_t ret;
> + ssize_t ret = -ENODEV;
>  
>   spin_lock(>lun_sep_lock);
> - sep = lun->lun_sep;
> - if (!sep) {
> - spin_unlock(>lun_sep_lock);
> - return -ENODEV;
> - }
> -
> - ret = snprintf(page, PAGE_SIZE, "%u\n",
> - (u32)(sep->sep_stats.rx_data_octets >> 20));
> + if (lun->lun_sep)
> + ret = snprintf(page, PAGE_SIZE, "%lu\n",
> + atomic_long_read(>lun_stats.rx_data_octets) >> 20);
>   spin_unlock(>lun_sep_lock);
>   return ret;
>  }
> @@ -835,18 +824,12 @@ static ssize_t 
> target_stat_scsi_tgt_port_show_attr_read_mbytes(
>   struct se_port_stat_grps *pgrps, char *page)
>  {
>   struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
> - struct se_port *sep;
> - ssize_t ret;
> + ssize_t ret = -ENODEV;
>  
>   spin_lock(>lun_sep_lock);
> - sep = lun->lun_sep;
> - if (!sep) {
> - spin_unlock(>lun_sep_lock);
> - return -ENODEV;
> - }
> -
> - ret = snprintf(page, PAGE_SIZE, "%u\n",
> - (u32)(sep->sep_stats.tx_data_octets >> 20));
> + if (lun->lun_sep)
> + ret = snprintf(page, PAGE_SIZE, "%lu\n",
> + atomic_long_read(>lun_stats.tx_data_octets) >> 20);
>   spin_unlock(>lun_sep_lock);
>   return ret;
>  }
> diff --git a/drivers/target/target_core_tpg.c 
> b/drivers/target/target_core_tpg.c
> index 0696de9553d..7ee2a94463b 100644
> --- a/drivers/target/target_core_tpg.c
> +++ b/drivers/target/target_core_tpg.c
> @@ -832,6 +832,9 @@ int core_tpg_add_lun(
>   }
>  
>   spin_lock(>tpg_lun_lock);
> + atomic_long_set(>lun_stats.cmd_pdus, 0);
> + atomic_long_set(>lun_stats.rx_data_octets, 0);
> + atomic_long_set(>lun_stats.tx_data_octets, 0);
>   lun->lun_access = lun_access;
>   lun->lun_status = TRANSPORT_LUN_STATUS_ACTIVE;
>   spin_unlock(>tpg_lun_lock);
> diff --git a/drivers/target/target_core_transport.c 
> b/drivers/target/target_core_transport.c
> index 25b5581ad78..4675bcc70cb 100644
> --- a/drivers/target/target_core_transport.c
> +++ b/drivers/target/target_core_transport.c
> @@ -1243,10 +1243,7 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned 
> char *cdb)
>  
>   cmd->se_cmd_flags |= SCF_SUPPORTED_SAM_OPCODE;
>  
> - spin_lock(>se_lun->lun_sep_lock);
> - if (cmd->se_lun->lun_sep)
> - cmd->se_lun->lun_sep->sep_stats.cmd_pdus++;
> - spin_unlock(>se_lun->lun_sep_lock);
> + atomic_long_inc(>se_lun->lun_stats.cmd_pdus);
>   return 0;
>  }
>  EXPORT_SYMBOL(target_setup_cmd_from_cdb);
> @@

[Devel] [PATCH] criu: print a criu version with the info level

2017-11-21 Thread Andrei Vagin

We always ask users what version of criu they use to investigate a problem,
so it better to have it in a log.

Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 criu/crtools.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/criu/crtools.c b/criu/crtools.c
index 741b7b2eb..de74e483c 100644
--- a/criu/crtools.c
+++ b/criu/crtools.c
@@ -713,7 +713,7 @@ int main(int argc, char *argv[], char *envp[])
libsoccr_set_log(log_level, soccr_print_on_level);
compel_log_init(vprint_on_level, log_get_loglevel());
 
-   pr_debug("Version: %s (gitid %s)\n", CRIU_VERSION, CRIU_GITID);
+   pr_info("Version: %s (gitid %s)\n", CRIU_VERSION, CRIU_GITID);
if (opts.deprecated_ok)
pr_debug("DEPRECATED ON\n");
 
-- 
2.13.6

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH] criu: remap soccr log levels to criu levels

2017-11-21 Thread Andrei Vagin

criu and soccr has different values for log levels, so
someone has to remap them.

Cc: Cyrill Gorcunov <gorcu...@openvz.org>
Reported-by: Cyrill Gorcunov <gorcu...@openvz.org>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 criu/crtools.c | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/criu/crtools.c b/criu/crtools.c
index ebfd1d6a7..741b7b2eb 100644
--- a/criu/crtools.c
+++ b/criu/crtools.c
@@ -208,6 +208,28 @@ bool deprecated_ok(char *what)
return false;
 }
 
+static void soccr_print_on_level(unsigned int loglevel, const char *format, 
...)
+{
+   va_list args;
+   int lv;
+
+   switch (loglevel) {
+   case SOCCR_LOG_DBG:
+   lv = LOG_DEBUG;
+   break;
+   case SOCCR_LOG_ERR:
+   lv = LOG_ERROR;
+   break;
+   default:
+   lv = LOG_INFO;
+   break;
+   }
+
+   va_start(args, format);
+   vprint_on_level(lv, format, args);
+   va_end(args);
+}
+
 int main(int argc, char *argv[], char *envp[])
 {
 
@@ -688,7 +710,7 @@ int main(int argc, char *argv[], char *envp[])
 
if (log_init(opts.output))
return 1;
-   libsoccr_set_log(log_level, print_on_level);
+   libsoccr_set_log(log_level, soccr_print_on_level);
compel_log_init(vprint_on_level, log_get_loglevel());
 
pr_debug("Version: %s (gitid %s)\n", CRIU_VERSION, CRIU_GITID);
-- 
2.13.6

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH] nfs: abort delegation in dying VE

2017-11-15 Thread Andrei Vagin

On Wed, Nov 15, 2017 at 11:41:00AM -0800, Andrei Vagin wrote:
> On Wed, Nov 15, 2017 at 07:55:02PM +0300, Kirill Tkhai wrote:
> > On 15.11.2017 19:50, Stanislav Kinsburskiy wrote:
> > > Don't queue delegation request, if ve init is exiting.
> > > 
> > > https://jira.sw.ru/browse/PSBM-77061
> > > 
> > > Inspired-by: Kirill Tkhai <ktk...@virtuozzo.com>
> > > Signed-off-by: Stanislav Kinsburskiy <skinsbur...@virtuozzo.com>
> > > ---
> > >  fs/nfs/delegation.c |   16 +++-
> > >  1 file changed, 15 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
> > > index 66af497..2422754 100644
> > > --- a/fs/nfs/delegation.c
> > > +++ b/fs/nfs/delegation.c
> > > @@ -189,15 +189,29 @@ void nfs_inode_reclaim_delegation(struct inode 
> > > *inode, struct rpc_cred *cred,
> > >   nfs_inode_set_delegation(inode, cred, res);
> > >  }
> > >  
> > > +static bool ve_abort_delegation(struct inode *inode)
> > > +{
> > > + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
> > > + struct rpc_xprt *xprt;
> > > +
> > > + rcu_read_lock();
> > > + xprt = rcu_dereference(clp->cl_rpcclient->cl_xprt);
> > > + rcu_read_unlock();
> > > +
> > > + return xprt->xprt_net->owner_ve->ve_netns == NULL;
> > 
> > Usually, memory pointed by a pointer, which was obtained via rcu, has to be 
> > used
> > in rcu_read_* brackets:
> > 
> > rcu_read_lock();
> > xprt = rcu_dereference(clp->cl_rpcclient->cl_xprt);
> > ret = (xprt->xprt_net->owner_ve->ve_netns == NULL);
> > rcu_read_unlock();
> > 
> > return ret;
> > 
> > If there is no an exception, we have to do something like above.
> 
> It is true when you want to dereference this pointer, otherwise I don't
> see any reason to take rcu_read_lock().

Oops, here is exectly this case. I didn't read the proposed code. Sorry.
> 
> > 
> > > +}
> > > +
> > >  static int nfs_do_return_delegation(struct inode *inode, struct 
> > > nfs_delegation *delegation, int issync)
> > >  {
> > >   int res = 0;
> > >  
> > > - if (!test_bit(NFS_DELEGATION_REVOKED, >flags))
> > > + if (!test_bit(NFS_DELEGATION_REVOKED, >flags) &&
> > > + !ve_abort_delegation(inode)) {
> > >   res = nfs4_proc_delegreturn(inode,
> > >   delegation->cred,
> > >   >stateid,
> > >   issync);
> > > + }
> > >   nfs_free_delegation(delegation);
> > >   return res;
> > >  }
> > > 
> > ___
> > Devel mailing list
> > Devel@openvz.org
> > https://lists.openvz.org/mailman/listinfo/devel
> ___
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH] nfs: abort delegation in dying VE

2017-11-15 Thread Andrei Vagin

On Wed, Nov 15, 2017 at 07:55:02PM +0300, Kirill Tkhai wrote:
> On 15.11.2017 19:50, Stanislav Kinsburskiy wrote:
> > Don't queue delegation request, if ve init is exiting.
> > 
> > https://jira.sw.ru/browse/PSBM-77061
> > 
> > Inspired-by: Kirill Tkhai 
> > Signed-off-by: Stanislav Kinsburskiy 
> > ---
> >  fs/nfs/delegation.c |   16 +++-
> >  1 file changed, 15 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
> > index 66af497..2422754 100644
> > --- a/fs/nfs/delegation.c
> > +++ b/fs/nfs/delegation.c
> > @@ -189,15 +189,29 @@ void nfs_inode_reclaim_delegation(struct inode 
> > *inode, struct rpc_cred *cred,
> > nfs_inode_set_delegation(inode, cred, res);
> >  }
> >  
> > +static bool ve_abort_delegation(struct inode *inode)
> > +{
> > +   struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
> > +   struct rpc_xprt *xprt;
> > +
> > +   rcu_read_lock();
> > +   xprt = rcu_dereference(clp->cl_rpcclient->cl_xprt);
> > +   rcu_read_unlock();
> > +
> > +   return xprt->xprt_net->owner_ve->ve_netns == NULL;
> 
> Usually, memory pointed by a pointer, which was obtained via rcu, has to be 
> used
> in rcu_read_* brackets:
> 
> rcu_read_lock();
> xprt = rcu_dereference(clp->cl_rpcclient->cl_xprt);
> ret = (xprt->xprt_net->owner_ve->ve_netns == NULL);
> rcu_read_unlock();
> 
> return ret;
> 
> If there is no an exception, we have to do something like above.

It is true when you want to dereference this pointer, otherwise I don't
see any reason to take rcu_read_lock().

> 
> > +}
> > +
> >  static int nfs_do_return_delegation(struct inode *inode, struct 
> > nfs_delegation *delegation, int issync)
> >  {
> > int res = 0;
> >  
> > -   if (!test_bit(NFS_DELEGATION_REVOKED, >flags))
> > +   if (!test_bit(NFS_DELEGATION_REVOKED, >flags) &&
> > +   !ve_abort_delegation(inode)) {
> > res = nfs4_proc_delegreturn(inode,
> > delegation->cred,
> > >stateid,
> > issync);
> > +   }
> > nfs_free_delegation(delegation);
> > return res;
> >  }
> > 
> ___
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH p.haul] Increate a limit for opened files for criu pre-dump and page-server

2017-11-13 Thread Andrei Vagin

criu restore has to be resored with a standard limit, because the kernel
doesn't shrink fdtable, when a limit is reduced. fdtable-s are charged
to kmem, so if we run criu restore with a big limit, all restored
proccess are forked with this limit and only then they restore their
limits, but fdtable-s are allocated for the initial limit, so they eat
much more kernel memory then they have to.

https://jira.sw.ru/browse/PSBM-67194

Cc: Cyrill Gorcunov <gorcu...@gmail.com>
Cc: Pavel Vokhmyanin <pvokhmya...@virtuozzo.com>
Signed-off-by: Andrei Vagin <ava...@openvz.org>
---
 phaul/criu_api.py | 8 
 1 file changed, 8 insertions(+)

diff --git a/phaul/criu_api.py b/phaul/criu_api.py
index 73c642a..4627d5f 100644
--- a/phaul/criu_api.py
+++ b/phaul/criu_api.py
@@ -9,6 +9,7 @@ import re
 import socket
 import subprocess
 import util
+import resource
 
 import pycriu
 
@@ -36,9 +37,16 @@ class criu_conn(object):
util.set_cloexec(css[1])
logging.info("Passing (ctl:%d, data:%d) pair to CRIU",
css[0].fileno(), mem_sk.fileno())
+
+# criu uses a lot of pipes to pre-dump memory, so we need to
+# increate a limit for opened files.
+   fileno_max = int(open("/proc/sys/fs/nr_open").read())
+   fileno_old = resource.getrlimit(resource.RLIMIT_NOFILE)
+   resource.setrlimit(resource.RLIMIT_NOFILE, (fileno_max, 
fileno_max))
self._swrk = subprocess.Popen([criu_binary,
"swrk", 
"%d" % css[0].fileno()])
css[0].close()
+   resource.setrlimit(resource.RLIMIT_NOFILE, fileno_old)
self._cs = css[1]
self._last_req = -1
self._mem_fd = mem_sk.fileno()
-- 
2.13.6

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH] vz7: service: Allow keep_open flag for a WAIT_PID command #PSBM-58198

2017-11-10 Thread Andrei Vagin

Acked-by: Andrei Vagin <ava...@virtuozzo.com>

On Fri, Nov 10, 2017 at 10:45:55AM +, Pavel Vokhmyanin wrote:
> WAIT_PID rpc command is necessary to wait on a page server and retrieve
> its exit code. Keep_open flag is essential for this command - we need
> RPC server to keep running after processing WAIT_PID in order to handle
> next iteration of a pre-dump. Patch whitelists WAIT_PID command for
> keep_open flag.
> 
> Signed-off-by: Pavel Vokhmyanin <pvokhmya...@virtuozzo.com>
> 
> Pavel Vokhmyanin
> Software Developer, Virtualization Maintenance
> 
> Otradnaya street 2b/9, "Otradnoe" Techno Park | Moscow | Russia
> Phone: +7 (495) 139 80 17, ext 77449  | 
> pvokhmya...@virtuozzo.com<mailto:pvokhmya...@virtuozzo.com>
> Skype: pvokhmyanin
> 
> Virtuozzo.com<https://virtuozzo.com/>
> 


> ___
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH 2/2 criu] test: check ipv6 sockets which handle ipv4 connections

2017-11-01 Thread Andrei Vagin

From: Andrei Vagin <ava...@virtuozzo.com>

A server socket is created with AF_INET6, but a client
socket is create with AF_INET.

Signed-off-by: Andrei Vagin <ava...@virtuozzo.com>
---
 test/zdtm/static/Makefile  | 18 ++
 test/zdtm/static/socket-tcp-close-wait.c   |  9 +++--
 test/zdtm/static/socket-tcp-closed.c   |  9 +++--
 test/zdtm/static/socket-tcp-closing.c  |  9 +++--
 test/zdtm/static/socket-tcp-fin-wait1.c|  9 +++--
 test/zdtm/static/socket-tcp-last-ack.desc  |  2 +-
 test/zdtm/static/socket-tcp-reseted.c  | 10 +++---
 test/zdtm/static/socket-tcp-syn-sent.c |  9 +++--
 test/zdtm/static/socket-tcp-unconn.c   |  9 +++--
 test/zdtm/static/socket-tcp.c  |  9 +++--
 test/zdtm/static/socket-tcp4v6-close-wait.c|  1 +
 test/zdtm/static/socket-tcp4v6-close-wait.desc |  1 +
 test/zdtm/static/socket-tcp4v6-closed.c|  1 +
 test/zdtm/static/socket-tcp4v6-closed.desc |  1 +
 test/zdtm/static/socket-tcp4v6-closing.c   |  1 +
 test/zdtm/static/socket-tcp4v6-closing.desc|  1 +
 test/zdtm/static/socket-tcp4v6-fin-wait1.c |  1 +
 test/zdtm/static/socket-tcp4v6-fin-wait1.desc  |  1 +
 test/zdtm/static/socket-tcp4v6-fin-wait2.c |  1 +
 test/zdtm/static/socket-tcp4v6-fin-wait2.desc  |  1 +
 test/zdtm/static/socket-tcp4v6-last-ack.c  |  1 +
 test/zdtm/static/socket-tcp4v6-last-ack.desc   |  1 +
 test/zdtm/static/socket-tcp4v6-local.c |  1 +
 test/zdtm/static/socket-tcp4v6-local.desc  |  1 +
 test/zdtm/static/socket-tcp4v6.c   |  1 +
 test/zdtm/static/socket-tcp4v6.desc|  1 +
 test/zdtm/static/socket_listen.c   |  9 +++--
 test/zdtm/static/socket_listen4v6.c|  1 +
 28 files changed, 99 insertions(+), 20 deletions(-)
 create mode 12 test/zdtm/static/socket-tcp4v6-close-wait.c
 create mode 12 test/zdtm/static/socket-tcp4v6-close-wait.desc
 create mode 12 test/zdtm/static/socket-tcp4v6-closed.c
 create mode 12 test/zdtm/static/socket-tcp4v6-closed.desc
 create mode 12 test/zdtm/static/socket-tcp4v6-closing.c
 create mode 12 test/zdtm/static/socket-tcp4v6-closing.desc
 create mode 12 test/zdtm/static/socket-tcp4v6-fin-wait1.c
 create mode 12 test/zdtm/static/socket-tcp4v6-fin-wait1.desc
 create mode 12 test/zdtm/static/socket-tcp4v6-fin-wait2.c
 create mode 12 test/zdtm/static/socket-tcp4v6-fin-wait2.desc
 create mode 12 test/zdtm/static/socket-tcp4v6-last-ack.c
 create mode 12 test/zdtm/static/socket-tcp4v6-last-ack.desc
 create mode 12 test/zdtm/static/socket-tcp4v6-local.c
 create mode 12 test/zdtm/static/socket-tcp4v6-local.desc
 create mode 12 test/zdtm/static/socket-tcp4v6.c
 create mode 12 test/zdtm/static/socket-tcp4v6.desc
 create mode 12 test/zdtm/static/socket_listen4v6.c

diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile
index 4afe51cb5..97c97a273 100644
--- a/test/zdtm/static/Makefile
+++ b/test/zdtm/static/Makefile
@@ -26,6 +26,7 @@ TST_NOFILE:=  \
sched_policy00  \
socket_listen   \
socket_listen6  \
+   socket_listen4v6\
socket_udp  \
socket6_udp \
socket_udp_shutdown \
@@ -65,26 +66,34 @@ TST_NOFILE  :=  \
socket-tcp  \
socket-tcp-reseted  \
socket-tcp6 \
+   socket-tcp4v6   \
socket-tcp-local\
socket-tcp-nfconntrack  \
socket-tcp6-local   \
+   socket-tcp4v6-local \
socket-tcpbuf   \
socket-tcpbuf-local \
socket-tcpbuf6-local\
socket-tcpbuf6  \
socket-tcp-fin-wait1\
socket-tcp6-fin-wait1   \
+   socket-tcp4v6-fin-wait1 \
socket-tcp-fin-wait2\
socket-tcp6-fin-wait2   \
+   socket-tcp4v6-fin-wait2 \
socket-tcp-close-wait   \
socket-tcp6-close-wait  \
+   socket-tcp4v6-close-wait\
socket-tcp-last-ack \
socket-tcp6-last-ack\
+   socket-tcp4v6-last-ack  \
socket-tcp-closing  \
socket-tcp6-closing \
+   socket-tcp4v6-closing   \
socket-tcp-closed   \

[Devel] [PATCH 1/2 criu] soccr: c/r ipv6 sockets which handles ipv4 connections

2017-11-01 Thread Andrei Vagin

From: Andrei Vagin <ava...@virtuozzo.com>

IPv6 listening sockets can accept both ipv4 and ipv6 connections,
in both cases a family of an accepted socket will be AF_INET6.

But we have to send tcp packets accoding with a connection type.

 grep Error 
(00.002320) 53: Debug:  Will set rcv_wscale to 7
(00.002325) 53: Debug:  Will turn timestamps on
(00.002331) 53: Debug: Will set mss clamp to 65495
(00.002338) 53: Debug:  Restoring TCP 1 queue data 2 bytes
(00.002403) 53: Error (soccr/soccr.c:673): Unable to send a fin packet: 
libnet_write_raw_ipv6(): -1 bytes written (Network is unreachable)

(00.002434) 53: Error (criu/files.c:1191): Unable to open fd=3 id=0x6
(00.002506) Error (criu/cr-restore.c:2171): Restoring FAILED.
 ERROR OVER 

Signed-off-by: Andrei Vagin <ava...@virtuozzo.com>

Signed-off-by: Andrei Vagin <ava...@virtuozzo.com>
---
 soccr/soccr.c | 29 +++--
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/soccr/soccr.c b/soccr/soccr.c
index 394095f00..2d49766dc 100644
--- a/soccr/soccr.c
+++ b/soccr/soccr.c
@@ -580,16 +580,33 @@ static int libsoccr_set_sk_data_noq(struct libsoccr_sk 
*sk,
return 0;
 }
 
+/* IPv4-Mapped IPv6 Addresses */
+static int ipv6_addr_mapped(union libsoccr_addr *addr)
+{
+   return (addr->v6.sin6_addr.s6_addr32[2] == htonl(0x));
+}
+
 static int send_fin(struct libsoccr_sk *sk, struct libsoccr_sk_data *data,
unsigned data_size, uint8_t flags)
 {
-   int ret, exit_code = -1;
+   uint32_t src_v4 = sk->src_addr->v4.sin_addr.s_addr;
+   uint32_t dst_v4 = sk->dst_addr->v4.sin_addr.s_addr;
+   int ret, exit_code = -1, family;
char errbuf[LIBNET_ERRBUF_SIZE];
int mark = SOCCR_MARK;;
int libnet_type;
libnet_t *l;
 
-   if (sk->dst_addr->sa.sa_family == AF_INET6)
+   family = sk->dst_addr->sa.sa_family;
+
+   if (family == AF_INET6 && ipv6_addr_mapped(sk->dst_addr)) {
+   /* TCP over IPv4 */
+   family = AF_INET;
+   dst_v4 = sk->dst_addr->v6.sin6_addr.s6_addr32[3];
+   src_v4 = sk->src_addr->v6.sin6_addr.s6_addr32[3];
+   }
+
+   if (family == AF_INET6)
libnet_type = LIBNET_RAW6;
else
libnet_type = LIBNET_RAW4;
@@ -627,7 +644,7 @@ static int send_fin(struct libsoccr_sk *sk, struct 
libsoccr_sk_data *data,
goto err;
}
 
-   if (sk->dst_addr->sa.sa_family == AF_INET6) {
+   if (family == AF_INET6) {
struct libnet_in6_addr src, dst;
 
memcpy(, >dst_addr->v6.sin6_addr, sizeof(dst));
@@ -644,7 +661,7 @@ static int send_fin(struct libsoccr_sk *sk, struct 
libsoccr_sk_data *data,
0,  /* payload size */
l,  /* libnet handle */
0); /* libnet id */
-   } else if (sk->dst_addr->sa.sa_family == AF_INET)
+   } else if (family == AF_INET)
ret = libnet_build_ipv4(
LIBNET_IPV4_H + LIBNET_TCP_H + 20,  /* length */
0,  /* TOS */
@@ -653,8 +670,8 @@ static int send_fin(struct libsoccr_sk *sk, struct 
libsoccr_sk_data *data,
64, /* TTL */
IPPROTO_TCP,/* protocol */
0,  /* checksum */
-   sk->dst_addr->v4.sin_addr.s_addr,   /* source IP */
-   sk->src_addr->v4.sin_addr.s_addr,   /* destination 
IP */
+   dst_v4, /* source IP */
+   src_v4, /* destination IP */
NULL,   /* payload */
0,  /* payload size */
l,  /* libnet handle */
-- 
2.13.6

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH] sk-inet: restore a value of SO_REUSEADDR

2017-10-31 Thread Andrei Vagin

From: Andrei Vagin <ava...@virtuozzo.com>

The SO_REUSEADDR option allows multiple sockets on the same
host to bind to the same port. This option has to ve restored when all
sockets are bound to a port. The same logic is already used to restore
SO_REUSEADDR.

https://jira.sw.ru/browse/PSBM-75515

Signed-off-by: Andrei Vagin <ava...@virtuozzo.com>
---
 criu/sk-inet.c   | 10 --
 criu/sockets.c   |  4 
 images/sk-opts.proto |  1 +
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/criu/sk-inet.c b/criu/sk-inet.c
index 93de1e2ee..06275c922 100644
--- a/criu/sk-inet.c
+++ b/criu/sk-inet.c
@@ -586,14 +586,18 @@ static int post_open_inet_sk(struct file_desc *d, int sk)
}
 
/* SO_REUSEADDR is set for all sockets */
-   if (ii->ie->opts->reuseaddr)
+   if (ii->ie->opts->reuseaddr && ii->ie->opts->so_reuseport)
return 0;
 
if (atomic_read(>port->users))
return 1;
 
val = ii->ie->opts->reuseaddr;
-   if (restore_opt(sk, SOL_SOCKET, SO_REUSEADDR, ))
+   if (!val && restore_opt(sk, SOL_SOCKET, SO_REUSEADDR, ))
+   return -1;
+
+   val = ii->ie->opts->so_reuseport;
+   if (!val && restore_opt(sk, SOL_SOCKET, SO_REUSEPORT, ))
return -1;
 
return 0;
@@ -653,6 +657,8 @@ static int open_inet_sk(struct file_desc *d, int *new_fd)
 */
if (restore_opt(sk, SOL_SOCKET, SO_REUSEADDR, ))
goto err;
+   if (restore_opt(sk, SOL_SOCKET, SO_REUSEPORT, ))
+   goto err;
 
if (tcp_connection(ie)) {
if (!opts.tcp_established_ok && !opts.tcp_close) {
diff --git a/criu/sockets.c b/criu/sockets.c
index c2a5cd130..70d57b009 100644
--- a/criu/sockets.c
+++ b/criu/sockets.c
@@ -524,6 +524,10 @@ int dump_socket_opts(int sk, SkOptsEntry *soe)
soe->reuseaddr = val ? true : false;
soe->has_reuseaddr = true;
 
+   ret |= dump_opt(sk, SOL_SOCKET, SO_REUSEPORT, );
+   soe->so_reuseport = val ? true : false;
+   soe->has_so_reuseport = true;
+
ret |= dump_opt(sk, SOL_SOCKET, SO_PASSCRED, );
soe->has_so_passcred = true;
soe->so_passcred = val ? true : false;
diff --git a/images/sk-opts.proto b/images/sk-opts.proto
index b5374c976..af61975e9 100644
--- a/images/sk-opts.proto
+++ b/images/sk-opts.proto
@@ -21,6 +21,7 @@ message sk_opts_entry {
optional string so_bound_dev= 15;
 
repeated fixed64so_filter   = 16;
+   optional bool   so_reuseport= 17;
 }
 
 enum sk_shutdown {
-- 
2.13.6

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH rh7] fs/nfs: don't use delayed unmount for nfs.

2017-10-27 Thread Andrei Vagin

On Fri, Oct 27, 2017 at 06:31:18PM +0300, Andrey Ryabinin wrote:
> Delayed nfs unmount causes too much PITA. We must destroy VENET ip after
> unmount, but in that case we can't reuse that IP on restarted container
> because it migh be still alive.
> 
> So let's just unmount NFS synchronously and destroy veip after it.

You change a general scenario to fix your small case. For users, it will
be unexpected behaviour. They call umount -l and don't expect any
delays.

How nfs mounts are umounted when a host is shutdowned? I think they are
umounted from init scripts (systemd). Why we can't umount nfs mounts
with the force flag when we stop a container?

> 
> https://jira.sw.ru/browse/PSBM-76086
> Signed-off-by: Andrey Ryabinin 
> ---
>  drivers/net/venetdev.c | 9 ++---
>  fs/namespace.c | 3 ++-
>  fs/nfs/super.c | 1 +
>  include/linux/fs.h | 4 
>  4 files changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/venetdev.c b/drivers/net/venetdev.c
> index 1c4ae90b7ba8..11f4a66aaf3d 100644
> --- a/drivers/net/venetdev.c
> +++ b/drivers/net/venetdev.c
> @@ -765,7 +765,7 @@ static void venet_dellink(struct net_device *dev, struct 
> list_head *head)
>* has VE_FEATURE_NFS enabled. Thus here we have to destroy veip in
>* this case.
>*/
> - if (env->ve_netns || (env->features & VE_FEATURE_NFS))
> + if (env->ve_netns)
>   veip_shutdown(env);
>  
>   env->_venet_dev = NULL;
> @@ -1182,12 +1182,7 @@ static struct rtnl_link_ops venet_link_ops = {
>  
>  static void veip_shutdown_fini(void *data)
>  {
> - struct ve_struct *ve = data;
> -
> - if (ve->features & VE_FEATURE_NFS)
> - return;
> -
> - veip_shutdown(ve);
> + veip_shutdown(data);
>  }
>  
>  static struct ve_hook veip_shutdown_hook = {
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 2c9824985bc5..c2489dd2f520 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -1134,7 +1134,8 @@ static void mntput_no_expire(struct mount *mnt)
>   }
>   unlock_mount_hash();
>  
> - if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
> + if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))
> + && !(mnt->mnt.mnt_sb->s_iflags & SB_I_UMOUNT_SYNC)) {
>   struct task_struct *task = current;
>   if (likely(!(task->flags & PF_KTHREAD))) {
>   init_task_work(>mnt_rcu, __cleanup_mnt);
> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
> index 8f29ad17e29e..65a0ac8a3d16 100644
> --- a/fs/nfs/super.c
> +++ b/fs/nfs/super.c
> @@ -2414,6 +2414,7 @@ static int nfs_set_super(struct super_block *s, void 
> *data)
>   int ret;
>  
>   s->s_flags = sb_mntdata->mntflags;
> + s->s_iflags |= SB_I_UMOUNT_SYNC;
>   s->s_fs_info = server;
>   s->s_d_op = server->nfs_client->rpc_ops->dentry_ops;
>   ret = set_anon_super(s, server);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 79011b4bc040..2f3a983741f8 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1526,6 +1526,9 @@ struct mm_struct;
>  #define UMOUNT_NOFOLLOW  0x0008  /* Don't follow symlink on 
> umount */
>  #define UMOUNT_UNUSED0x8000  /* Flag guaranteed to be unused 
> */
>  
> +/* sb->s_iflags */
> +#define SB_I_UMOUNT_SYNC 0x1000 /* don't use delayed unmount 
> */
> +
>  extern struct list_head super_blocks;
>  extern spinlock_t sb_lock;
>  
> @@ -1566,6 +1569,7 @@ struct super_block {
>   const struct quotactl_ops   *s_qcop;
>   const struct export_operations *s_export_op;
>   unsigned long   s_flags;
> + unsigned long   s_iflags;   /* internal SB_I_* flags */
>   unsigned long   s_magic;
>   struct dentry   *s_root;
>   struct rw_semaphore s_umount;
> -- 
> 2.13.6
> 
> ___
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH criu 2/3] action-scripts: Export mount namespace roots via CRIU_MNT_NS_ROOTS

2017-10-23 Thread Andrei Vagin

On Thu, Oct 12, 2017 at 10:21:30AM +0300, Cyrill Gorcunov wrote:
> In particular we need to process filesystem to restore
> ploop device migration, thus export roots and the script
> can use CRTOOLS_INIT_PID together with nsenter to step
> into prepared file systems and whatever needed.
> 
> https://jira.sw.ru/browse/PSBM-71861
> 
> Signed-off-by: Cyrill Gorcunov 
> ---
>  criu/action-scripts.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/criu/action-scripts.c b/criu/action-scripts.c
> index a07f560..137980c 100644
> --- a/criu/action-scripts.c
> +++ b/criu/action-scripts.c
> @@ -16,6 +16,7 @@
>  #include 
>  #include 
>  #include "common/scm.h"
> +#include "mount.h"
>  #include "spfs.h"
>  
>  static const char *action_names[ACT_MAX] = {
> @@ -76,6 +77,7 @@ static int run_shell_scripts(const char *action)
>   return -1;
>  
>   if (!(env_set & ENV_ROOTPID) && root_item) {
> + char mnt_ns_roots[PATH_MAX];

Should it be declared in a block where it is initialized?

>   int pid;
>   char root_item_pid[16];
>  
> @@ -86,6 +88,11 @@ static int run_shell_scripts(const char *action)
>   pr_perror("Can't set CRTOOLS_INIT_PID=%s", 
> root_item_pid);
>   return -1;
>   }
> + export_mnt_ns_roots(mnt_ns_roots, sizeof(mnt_ns_roots));
> + if (setenv("CRIU_MNT_NS_ROOTS", mnt_ns_roots, 1)) {
> + pr_perror("Can't set CRIU_MNT_ROOTS=%s", 
> mnt_ns_roots);
> + return -1;
> + }

I don't like the idea to transfer mnt_ns_roots for all scripts with
ENV_ROOTPID.

It has a meaning only for post-namespace

>   env_set |= ENV_ROOTPID;
>   }
>   }
> -- 
> 2.7.5
> 
> ___
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH criu 1/3] mount: Add export_mnt_ns_roots helper

2017-10-23 Thread Andrei Vagin

On Thu, Oct 12, 2017 at 10:21:29AM +0300, Cyrill Gorcunov wrote:
> This helper produce space separated list of
> mount namespace roots which can be exported
> then into environment variable.
> 
> https://jira.sw.ru/browse/PSBM-71861
> 
> Signed-off-by: Cyrill Gorcunov 
> ---
>  criu/include/mount.h |  2 ++
>  criu/mount.c | 34 ++
>  2 files changed, 36 insertions(+)
> 
> diff --git a/criu/include/mount.h b/criu/include/mount.h
> index 35e1b49..e19f767 100644
> --- a/criu/include/mount.h
> +++ b/criu/include/mount.h
> @@ -93,6 +93,8 @@ extern int open_mountpoint(struct mount_info *pm);
>  extern struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump);
>  extern int prepare_mnt_ns(void);
>  
> +extern char *export_mnt_ns_roots(char *dst, size_t size);
> +
>  extern int pivot_root(const char *new_root, const char *put_old);
>  
>  extern struct mount_info *lookup_overlayfs(char *rpath, unsigned int s_dev,
> diff --git a/criu/mount.c b/criu/mount.c
> index c483c2e..57df5da 100644
> --- a/criu/mount.c
> +++ b/criu/mount.c
> @@ -2788,6 +2788,40 @@ static inline int print_ns_root(struct ns_id *ns, int 
> remap_id, char *buf, int b
>   return snprintf(buf, bs, "%s/%d-%010d", mnt_roots, ns->id, remap_id);
>  }
>  
> +/*
> + * Construct space separated list of mount namespace roots
> + * so that we could export it via environment variable and
> + * process in restore scripts.
> + */
> +char *export_mnt_ns_roots(char *dst, size_t size)
> +{
> + struct ns_id *nsid;
> + char *p = dst;
> + size_t len;
> +
> + if (size == 0)
> + return dst;
> +
> + dst[0] = '\0';
> + for (nsid = ns_ids; nsid; nsid = nsid->next) {
> + if (nsid->nd != _ns_desc)
> + continue;
> +
> + len = print_ns_root(nsid, 0, p, size);
> + if (len >= (size - 2)) {
> + p[(size - 1)] = '\0';
> + return dst;
> + }
> +
> + size -= len;
> + p += len;
> + p[0] = ' ';
> + p++, size--;
> + }
> +
do you forget to add \0 to the end?

if (p != dst)
*(p - 1) = 0;

> + return dst;
> +}
> +
>  static int create_mnt_roots(void)
>  {
>   int exit_code = -1;
> -- 
> 2.7.5
> 
> ___
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH criu 1/3] mount: Add export_mnt_ns_roots helper

2017-10-23 Thread Andrei Vagin

On Thu, Oct 12, 2017 at 10:21:29AM +0300, Cyrill Gorcunov wrote:
> This helper produce space separated list of
> mount namespace roots which can be exported
> then into environment variable.
> 
> https://jira.sw.ru/browse/PSBM-71861
> 
> Signed-off-by: Cyrill Gorcunov 
> ---
>  criu/include/mount.h |  2 ++
>  criu/mount.c | 34 ++
>  2 files changed, 36 insertions(+)
> 
> diff --git a/criu/include/mount.h b/criu/include/mount.h
> index 35e1b49..e19f767 100644
> --- a/criu/include/mount.h
> +++ b/criu/include/mount.h
> @@ -93,6 +93,8 @@ extern int open_mountpoint(struct mount_info *pm);
>  extern struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump);
>  extern int prepare_mnt_ns(void);
>  
> +extern char *export_mnt_ns_roots(char *dst, size_t size);
> +
>  extern int pivot_root(const char *new_root, const char *put_old);
>  
>  extern struct mount_info *lookup_overlayfs(char *rpath, unsigned int s_dev,
> diff --git a/criu/mount.c b/criu/mount.c
> index c483c2e..57df5da 100644
> --- a/criu/mount.c
> +++ b/criu/mount.c
> @@ -2788,6 +2788,40 @@ static inline int print_ns_root(struct ns_id *ns, int 
> remap_id, char *buf, int b
>   return snprintf(buf, bs, "%s/%d-%010d", mnt_roots, ns->id, remap_id);
>  }
>  
> +/*
> + * Construct space separated list of mount namespace roots
> + * so that we could export it via environment variable and
> + * process in restore scripts.
> + */
> +char *export_mnt_ns_roots(char *dst, size_t size)
> +{
> + struct ns_id *nsid;
> + char *p = dst;
> + size_t len;
> +
> + if (size == 0)
> + return dst;
> +
> + dst[0] = '\0';
> + for (nsid = ns_ids; nsid; nsid = nsid->next) {
> + if (nsid->nd != _ns_desc)
> + continue;
> +
> + len = print_ns_root(nsid, 0, p, size);
> + if (len >= (size - 2)) {
> + p[(size - 1)] = '\0';
> + return dst;
> + }
> +
> + size -= len;
> + p += len;
> + p[0] = ' ';
> + p++, size--;
> + }
> +
> + return dst;

This function is always return dst... How vzctl will find a required
mount?
> +}
> +
>  static int create_mnt_roots(void)
>  {
>   int exit_code = -1;
> -- 
> 2.7.5
> 
> ___
> Devel mailing list
> Devel@openvz.org
> https://lists.openvz.org/mailman/listinfo/devel
___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH criu] net: execute iptables-restore in a target network namespace

2017-10-18 Thread Andrei Vagin

otherwise the kernel can return an error, one of these checks
is in xt_owner.c:owner_check():
...
if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) &&
(current_user_ns() != net->user_ns))
return -EINVAL;
...

https://jira.sw.ru/browse/PSBM-75531
---
 criu/net.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/criu/net.c b/criu/net.c
index b90a730..e3f083b 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -1807,7 +1807,12 @@ static int do_iptables_restore(bool ipv6, char *buf, int 
size)
}
close_safe([1]);
 
-   ret = cr_system(pfd[0], -1, -1, cmd[0], cmd, 0);
+   /*
+* iptables-restore has to be executed in a network userns,
+* otherwise the kernel can return an error. One of these checks
+* is in xt_owner.c:owner_check().
+*/
+   ret = cr_system_userns(pfd[0], -1, -1, cmd[0], cmd, 0, 
root_item->pid->real);
 err:
close_safe([1]);
close_safe([0]);
-- 
1.8.3.1

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH rh7] netfilter: Allow xt_owner in any user namespace

2017-10-17 Thread Andrei Vagin

On Mon, Oct 16, 2017 at 01:33:12PM +0300, Konstantin Khorenko wrote:
> Stas, please review the patch.
> 
> Andrey, why do we need to support deeper user namespaces at all?
> Someone app tries to create a new userns inside a vz7 CT and use ipt_owner 
> inside it?

The kernel grabs userns when we send START to the "state" file of a
container ve cgroup. But vzctl does this after restoring a container,
so we don't know a ve userns, when we are retoriung iptable rules.

CRIU can't dump nested userns, so if any app will create a new userns,
criu dump will return an error.

> 
> --
> Best regards,
> 
> Konstantin Khorenko,
> Virtuozzo Linux Kernel Team
> 
> On 10/14/2017 02:20 AM, Andrei Vagin wrote:
> > From: "Eric W. Biederman" <ebied...@xmission.com>
> > 
> > ML: 9847371a84b0be330f4bc4aaa98904101ee8573d
> > https://jira.sw.ru/browse/PSBM-69409?
> > 
> > Making this work is a little tricky as it really isn't kosher to
> > change the xt_owner_match_info in a check function.
> > 
> > Without changing xt_owner_match_info we need to know the user
> > namespace the uids and gids are specified in.  In the common case
> > net->user_ns == current_user_ns().  Verify net->user_ns ==
> > current_user_ns() in owner_check so we can later assume it in
> > owner_mt.
> > 
> > In owner_check also verify that all of the uids and gids specified are
> > in net->user_ns and that the expected min/max relationship exists
> > between the uids and gids in xt_owner_match_info.
> > 
> > In owner_mt get the network namespace from the outgoing socket, as this
> > must be the same network namespace as the netfilter rules, and use that
> > network namespace to find the user namespace the uids and gids in
> > xt_match_owner_info are encoded in.  Then convert from their encoded
> > from into the kernel internal format for uids and gids and perform the
> > owner match.
> > 
> > Similar to ping_group_range, this code does not try to detect
> > noncontiguous UID/GID ranges.
> > 
> > Signed-off-by: "Eric W. Biederman" <ebied...@xmission.com>
> > Signed-off-by: Kevin Cernekee <cerne...@chromium.org>
> > Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
> > Signed-off-by: Andrei Vagin <ava...@virtuozzo.com>
> > ---
> >  net/netfilter/xt_owner.c | 41 +++--
> >  1 file changed, 35 insertions(+), 6 deletions(-)
> > 
> > diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
> > index 31dec4a..1744f78 100644
> > --- a/net/netfilter/xt_owner.c
> > +++ b/net/netfilter/xt_owner.c
> > @@ -80,11 +80,39 @@ owner_mt6_v0(const struct sk_buff *skb, struct 
> > xt_action_param *par)
> >  static int owner_check(const struct xt_mtchk_param *par)
> >  {
> > struct xt_owner_match_info *info = par->matchinfo;
> > +   struct net *net = par->net;
> > 
> > -   /* For now only allow adding matches from the initial user namespace */
> > +   /* Only allow the common case where the userns of the writer
> > +* matches the userns of the network namespace.
> > +*/
> > if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) &&
> > -   !current_user_ns_initial())
> > +   (current_user_ns() != net->user_ns))
> > return -EINVAL;
> > +
> > +   /* Ensure the uids are valid */
> > +   if (info->match & XT_OWNER_UID) {
> > +   kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
> > +   kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
> > +
> > +   if (!uid_valid(uid_min) || !uid_valid(uid_max) ||
> > +   (info->uid_max < info->uid_min) ||
> > +   uid_lt(uid_max, uid_min)) {
> > +   return -EINVAL;
> > +   }
> > +   }
> > +
> > +   /* Ensure the gids are valid */
> > +   if (info->match & XT_OWNER_GID) {
> > +   kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
> > +   kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
> > +
> > +   if (!gid_valid(gid_min) || !gid_valid(gid_max) ||
> > +   (info->gid_max < info->gid_min) ||
> > +   gid_lt(gid_max, gid_min)) {
> > +   return -EINVAL;
> > +   }
> > +   }
> > +
> > return 0;
> >  }
> > 
> > @@ -93,6 +121,7 @@ owner_mt(const struct sk_buff *skb, struct 
> > xt_action_param *par)
> >  {
> >

[Devel] [PATCH rh7] netfilter: Allow xt_owner in any user namespace

2017-10-13 Thread Andrei Vagin

From: "Eric W. Biederman" <ebied...@xmission.com>

ML: 9847371a84b0be330f4bc4aaa98904101ee8573d
https://jira.sw.ru/browse/PSBM-69409?

Making this work is a little tricky as it really isn't kosher to
change the xt_owner_match_info in a check function.

Without changing xt_owner_match_info we need to know the user
namespace the uids and gids are specified in.  In the common case
net->user_ns == current_user_ns().  Verify net->user_ns ==
current_user_ns() in owner_check so we can later assume it in
owner_mt.

In owner_check also verify that all of the uids and gids specified are
in net->user_ns and that the expected min/max relationship exists
between the uids and gids in xt_owner_match_info.

In owner_mt get the network namespace from the outgoing socket, as this
must be the same network namespace as the netfilter rules, and use that
network namespace to find the user namespace the uids and gids in
xt_match_owner_info are encoded in.  Then convert from their encoded
from into the kernel internal format for uids and gids and perform the
owner match.

Similar to ping_group_range, this code does not try to detect
noncontiguous UID/GID ranges.

Signed-off-by: "Eric W. Biederman" <ebied...@xmission.com>
Signed-off-by: Kevin Cernekee <cerne...@chromium.org>
Signed-off-by: Pablo Neira Ayuso <pa...@netfilter.org>
Signed-off-by: Andrei Vagin <ava...@virtuozzo.com>
---
 net/netfilter/xt_owner.c | 41 +++--
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 31dec4a..1744f78 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -80,11 +80,39 @@ owner_mt6_v0(const struct sk_buff *skb, struct 
xt_action_param *par)
 static int owner_check(const struct xt_mtchk_param *par)
 {
struct xt_owner_match_info *info = par->matchinfo;
+   struct net *net = par->net;
 
-   /* For now only allow adding matches from the initial user namespace */
+   /* Only allow the common case where the userns of the writer
+* matches the userns of the network namespace.
+*/
if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) &&
-   !current_user_ns_initial())
+   (current_user_ns() != net->user_ns))
return -EINVAL;
+
+   /* Ensure the uids are valid */
+   if (info->match & XT_OWNER_UID) {
+   kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+   kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
+
+   if (!uid_valid(uid_min) || !uid_valid(uid_max) ||
+   (info->uid_max < info->uid_min) ||
+   uid_lt(uid_max, uid_min)) {
+   return -EINVAL;
+   }
+   }
+
+   /* Ensure the gids are valid */
+   if (info->match & XT_OWNER_GID) {
+   kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+   kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
+
+   if (!gid_valid(gid_min) || !gid_valid(gid_max) ||
+   (info->gid_max < info->gid_min) ||
+   gid_lt(gid_max, gid_min)) {
+   return -EINVAL;
+   }
+   }
+
return 0;
 }
 
@@ -93,6 +121,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param 
*par)
 {
const struct xt_owner_match_info *info = par->matchinfo;
const struct file *filp;
+   struct net *net = dev_net(par->in ? par->in : par->out);
 
if (skb->sk == NULL || skb->sk->sk_socket == NULL)
return (info->match ^ info->invert) == 0;
@@ -109,8 +138,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param 
*par)
   (XT_OWNER_UID | XT_OWNER_GID)) == 0;
 
if (info->match & XT_OWNER_UID) {
-   kuid_t uid_min = make_kuid(ve_init_user_ns(), info->uid_min);
-   kuid_t uid_max = make_kuid(ve_init_user_ns(), info->uid_max);
+   kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
+   kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
if ((uid_gte(filp->f_cred->fsuid, uid_min) &&
 uid_lte(filp->f_cred->fsuid, uid_max)) ^
!(info->invert & XT_OWNER_UID))
@@ -118,8 +147,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param 
*par)
}
 
if (info->match & XT_OWNER_GID) {
-   kgid_t gid_min = make_kgid(ve_init_user_ns(), info->gid_min);
-   kgid_t gid_max = make_kgid(ve_init_user_ns(), info->gid_max);
+   kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
+   kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);

[Devel] [PATCH criu] libsoccr: ignore data in a sent queue, if a connect is closed

2017-10-05 Thread Andrei Vagin

From: Andrei Vagin <ava...@virtuozzo.com>

If a connection was reseted, it can have some data in a sent queue,
a use can't read this data, so we can ignore them too.

https://jira.sw.ru/browse/PSBM-67026

Signed-off-by: Andrei Vagin <ava...@virtuozzo.com>
---
 soccr/soccr.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/soccr/soccr.c b/soccr/soccr.c
index 48469aaab..bdf92d721 100644
--- a/soccr/soccr.c
+++ b/soccr/soccr.c
@@ -198,6 +198,18 @@ static int refresh_sk(struct libsoccr_sk *sk, struct 
libsoccr_sk_data *data, str
 
data->unsq_len = size;
 
+   if (data->state == TCP_CLOSE) {
+   /* A connection could be reseted. In thise case a sent queue
+* may contain some data. A user can't read this data, so let's
+* ignore them. Otherwise we will need to add a logic whether
+* the send queue contains a fin packet or not and decide 
whether
+* a fin or reset packet has to be sent to restore a state
+*/
+
+   data->unsq_len = 0;
+   data->outq_len = 0;
+   }
+
/* Don't account the fin packet. It doesn't countain real data. */
if ((1 << data->state) & (SNDQ_FIRST_FIN | SNDQ_SECOND_FIN)) {
if (data->outq_len)
-- 
2.13.3

___
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

1 2 >

1 - 100 of 126 matches

Mail list logo