[PATCH v4] can: mcp251xfd: replace sizeof(u32) with val_bytes in regmap

2021-01-22 Thread Su Yanjun
The sizeof(u32) is hardcoded. It's better to use the config value in
regmap.

It increases the size of target object, but it's flexible when new mcp chip
need other val_bytes.

Signed-off-by: Su Yanjun 
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c 
b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index f07e8b737d31..3dde52669343 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1308,6 +1308,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
   const u8 offset, const u8 len)
 {
const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
+   int val_bytes = regmap_get_val_bytes(priv->map_rx);
 
if (IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY) &&
(offset > tx_ring->obj_num ||
@@ -1322,7 +1323,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
return regmap_bulk_read(priv->map_rx,
mcp251xfd_get_tef_obj_addr(offset),
hw_tef_obj,
-   sizeof(*hw_tef_obj) / sizeof(u32) * len);
+   sizeof(*hw_tef_obj) / val_bytes * len);
 }
 
 static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
@@ -1511,11 +1512,12 @@ mcp251xfd_rx_obj_read(const struct mcp251xfd_priv *priv,
  const u8 offset, const u8 len)
 {
int err;
+   int val_bytes = regmap_get_val_bytes(priv->map_rx);
 
err = regmap_bulk_read(priv->map_rx,
   mcp251xfd_get_rx_obj_addr(ring, offset),
   hw_rx_obj,
-  len * ring->obj_size / sizeof(u32));
+  len * ring->obj_size / val_bytes);
 
return err;
 }
@@ -2139,6 +2141,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
struct mcp251xfd_priv *priv = dev_id;
irqreturn_t handled = IRQ_NONE;
int err;
+   int val_bytes = regmap_get_val_bytes(priv->map_reg);
 
if (priv->rx_int)
do {
@@ -2162,7 +2165,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
err = regmap_bulk_read(priv->map_reg, MCP251XFD_REG_INT,
   >regs_status,
   sizeof(priv->regs_status) /
-  sizeof(u32));
+  val_bytes);
if (err)
goto out_fail;
 
-- 
2.25.1



[PATCH v1] can: mcp251xfd: Add some sysfs debug interfaces for registers r/w

2021-01-21 Thread Su Yanjun
When i debug mcp2518fd, some method to track registers is
needed. This easy debug interface will be ok.

For example,
read a register at 0xe00:
echo 0xe00 > can_get_reg
cat can_get_reg

write a register at 0xe00:
echo 0xe00,0x60 > can_set_reg

Signed-off-by: Su Yanjun 
---
 .../net/can/spi/mcp251xfd/mcp251xfd-core.c| 132 ++
 1 file changed, 132 insertions(+)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c 
b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index ab8aad0a7594..d65abe5505d5 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -27,6 +27,131 @@
 
 #define DEVICE_NAME "mcp251xfd"
 
+/* Add sysfs debug interface for easy to debug
+ *
+ * For example,
+ *
+ * - read a register
+ * echo 0xe00 > can_get_reg
+ * cat can_get_reg
+ *
+ * - write a register
+ * echo 0xe00,0x1 > can_set_reg
+ *
+ */
+static int reg_offset;
+
+static int __get_param(const char *buf, char *off, char *val)
+{
+   int len;
+
+   if (!buf || !off || !val)
+   return -EINVAL;
+
+   len = 0;
+   while (*buf != ',') {
+   *off++ = *buf++;
+   len++;
+
+   if (len >= 16)
+   return -EINVAL;
+   }
+
+   buf++;
+
+   *off = '\0';
+
+   len = 0;
+   while (*buf) {
+   *val++ = *buf++;
+   len++;
+
+   if (len >= 16)
+   return -EINVAL;
+   }
+
+   *val = '\0';
+
+   return 0;
+}
+
+static ssize_t can_get_reg_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   int err;
+   u32 val;
+   struct mcp251xfd_priv *priv;
+
+   priv = dev_get_drvdata(dev);
+
+   err = regmap_read(priv->map_reg, reg_offset, );
+   if (err)
+   return 0;
+
+   return sprintf(buf, "reg = 0x%08x, val = 0x%08x\n", reg_offset, val);
+}
+
+static ssize_t can_get_reg_store(struct device *dev,
+struct device_attribute *attr, const char 
*buf, size_t len)
+{
+   u32 off;
+
+   reg_offset = 0;
+
+   if (kstrtouint(buf, 0, ) || (off % 4))
+   return -EINVAL;
+
+   reg_offset = off;
+
+   return len;
+}
+
+static ssize_t can_set_reg_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   return 0;
+}
+
+static ssize_t can_set_reg_store(struct device *dev,
+struct device_attribute *attr, const char 
*buf, size_t len)
+{
+   struct mcp251xfd_priv *priv;
+   u32 off, val;
+   int err;
+
+   char s1[16];
+   char s2[16];
+
+   if (__get_param(buf, s1, s2))
+   return -EINVAL;
+
+   if (kstrtouint(s1, 0, ) || (off % 4))
+   return -EINVAL;
+
+   if (kstrtouint(s2, 0, ))
+   return -EINVAL;
+
+   err = regmap_write(priv->map_reg, off, val);
+   if (err)
+   return -EINVAL;
+
+   return len;
+}
+
+static DEVICE_ATTR_RW(can_get_reg);
+static DEVICE_ATTR_RW(can_set_reg);
+
+static struct attribute *can_attributes[] = {
+   _attr_can_get_reg.attr,
+   _attr_can_set_reg.attr,
+   NULL
+};
+
+static const struct attribute_group can_group = {
+   .attrs = can_attributes,
+   NULL
+};
+
 static const struct mcp251xfd_devtype_data mcp251xfd_devtype_data_mcp2517fd = {
.quirks = MCP251XFD_QUIRK_MAB_NO_WARN | MCP251XFD_QUIRK_CRC_REG |
MCP251XFD_QUIRK_CRC_RX | MCP251XFD_QUIRK_CRC_TX |
@@ -2944,6 +3069,12 @@ static int mcp251xfd_probe(struct spi_device *spi)
if (err)
goto out_free_candev;
 
+   err = sysfs_create_group(>dev.kobj, _group);
+   if (err) {
+   netdev_err(priv->ndev, "Create can group fail.\n");
+   goto out_free_candev;
+   }
+
err = can_rx_offload_add_manual(ndev, >offload,
MCP251XFD_NAPI_WEIGHT);
if (err)
@@ -2972,6 +3103,7 @@ static int mcp251xfd_remove(struct spi_device *spi)
mcp251xfd_unregister(priv);
spi->max_speed_hz = priv->spi_max_speed_hz_orig;
free_candev(ndev);
+   sysfs_remove_group(>dev.kobj, _group);
 
return 0;
 }
-- 
2.25.1



[PATCH v3] can: mcp251xfd: replace sizeof(u32) with val_bytes in regmap

2021-01-21 Thread Su Yanjun
The sizeof(u32) is hardcoded. It's better to use the config value in
regmap.

It increases the size of target object, but it's flexible when new mcp chip
need other val_bytes.

Signed-off-by: Su Yanjun 
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c 
b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index f07e8b737d31..3dde52669343 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1308,6 +1308,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
   const u8 offset, const u8 len)
 {
const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
+   int val_bytes = regmap_get_val_bytes(priv->map_reg);
 
if (IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY) &&
(offset > tx_ring->obj_num ||
@@ -1322,7 +1323,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
return regmap_bulk_read(priv->map_rx,
mcp251xfd_get_tef_obj_addr(offset),
hw_tef_obj,
-   sizeof(*hw_tef_obj) / sizeof(u32) * len);
+   sizeof(*hw_tef_obj) / val_bytes * len);
 }
 
 static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
@@ -1511,11 +1512,12 @@ mcp251xfd_rx_obj_read(const struct mcp251xfd_priv *priv,
  const u8 offset, const u8 len)
 {
int err;
+   int val_bytes = regmap_get_val_bytes(priv->map_reg);
 
err = regmap_bulk_read(priv->map_rx,
   mcp251xfd_get_rx_obj_addr(ring, offset),
   hw_rx_obj,
-  len * ring->obj_size / sizeof(u32));
+  len * ring->obj_size / val_bytes);
 
return err;
 }
@@ -2139,6 +2141,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
struct mcp251xfd_priv *priv = dev_id;
irqreturn_t handled = IRQ_NONE;
int err;
+   int val_bytes = regmap_get_val_bytes(priv->map_reg);
 
if (priv->rx_int)
do {
@@ -2162,7 +2165,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
err = regmap_bulk_read(priv->map_reg, MCP251XFD_REG_INT,
   >regs_status,
   sizeof(priv->regs_status) /
-  sizeof(u32));
+  val_bytes);
if (err)
goto out_fail;
 
-- 
2.25.1



[PATCH v1] can: mcp251xfd: use regmap_bulk_write for compatibility

2021-01-21 Thread Su Yanjun
Recently i use mcp2518fd on 4.x kernel which multiple write is not
backported, regmap_raw_write will cause old kernel crash because the
tx buffer in driver is smaller then 2K. Use regmap_bulk_write instead
for compatibility.

Signed-off-by: Su Yanjun 
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c 
b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 3dde52669343..ab8aad0a7594 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -932,6 +932,7 @@ static int mcp251xfd_chip_ecc_init(struct mcp251xfd_priv 
*priv)
void *ram;
u32 val = 0;
int err;
+   int val_bytes = regmap_get_val_bytes(priv->map_reg);
 
ecc->ecc_stat = 0;
 
@@ -947,8 +948,8 @@ static int mcp251xfd_chip_ecc_init(struct mcp251xfd_priv 
*priv)
if (!ram)
return -ENOMEM;
 
-   err = regmap_raw_write(priv->map_reg, MCP251XFD_RAM_START, ram,
-  MCP251XFD_RAM_SIZE);
+   err = regmap_bulk_write(priv->map_reg, MCP251XFD_RAM_START, ram,
+  MCP251XFD_RAM_SIZE / val_bytes);
kfree(ram);
 
return err;
-- 
2.25.1



[PATCH v2] can: mcp251xfd: replace sizeof(u32) with val_bytes in regmap

2021-01-21 Thread Su Yanjun
No functional effect.

Signed-off-by: Su Yanjun 
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c 
b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index f07e8b737d31..b15bfd50b863 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -181,6 +181,12 @@ static int mcp251xfd_clks_and_vdd_disable(const struct 
mcp251xfd_priv *priv)
return 0;
 }
 
+static inline int
+mcp251xfd_get_val_bytes(const struct mcp251xfd_priv *priv)
+{
+   return regmap_get_val_bytes(priv->map_reg);
+}
+
 static inline u8
 mcp251xfd_cmd_prepare_write_reg(const struct mcp251xfd_priv *priv,
union mcp251xfd_write_reg_buf *write_reg_buf,
@@ -1308,6 +1314,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
   const u8 offset, const u8 len)
 {
const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
+   int val_bytes = mcp251xfd_get_val_bytes(priv);
 
if (IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY) &&
(offset > tx_ring->obj_num ||
@@ -1322,7 +1329,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
return regmap_bulk_read(priv->map_rx,
mcp251xfd_get_tef_obj_addr(offset),
hw_tef_obj,
-   sizeof(*hw_tef_obj) / sizeof(u32) * len);
+   sizeof(*hw_tef_obj) / val_bytes * len);
 }
 
 static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
@@ -1511,11 +1518,12 @@ mcp251xfd_rx_obj_read(const struct mcp251xfd_priv *priv,
  const u8 offset, const u8 len)
 {
int err;
+   int val_bytes = mcp251xfd_get_val_bytes(priv);
 
err = regmap_bulk_read(priv->map_rx,
   mcp251xfd_get_rx_obj_addr(ring, offset),
   hw_rx_obj,
-  len * ring->obj_size / sizeof(u32));
+  len * ring->obj_size / val_bytes);
 
return err;
 }
@@ -2139,6 +2147,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
struct mcp251xfd_priv *priv = dev_id;
irqreturn_t handled = IRQ_NONE;
int err;
+   int val_bytes = mcp251xfd_get_val_bytes(priv);
 
if (priv->rx_int)
do {
@@ -2162,7 +2171,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
err = regmap_bulk_read(priv->map_reg, MCP251XFD_REG_INT,
   >regs_status,
   sizeof(priv->regs_status) /
-  sizeof(u32));
+  val_bytes);
if (err)
goto out_fail;
 
-- 
2.25.1



[PATCH v1] can: mcp251xfd: replace sizeof(u32) with val_bytes in regmap

2021-01-21 Thread Su Yanjun
No functional effect.

Signed-off-by: Su Yanjun 
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c 
b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index f07e8b737d31..cc48ccee4694 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -181,6 +181,12 @@ static int mcp251xfd_clks_and_vdd_disable(const struct 
mcp251xfd_priv *priv)
return 0;
 }
 
+static inline int
+mcp251xfd_get_val_bytes(const struct mcp251xfd_priv *priv)
+{
+   return priv->map_reg->format.val_bytes;
+}
+
 static inline u8
 mcp251xfd_cmd_prepare_write_reg(const struct mcp251xfd_priv *priv,
union mcp251xfd_write_reg_buf *write_reg_buf,
@@ -1308,6 +1314,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
   const u8 offset, const u8 len)
 {
const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
+   int val_bytes = mcp251xfd_get_val_bytes(priv);
 
if (IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY) &&
(offset > tx_ring->obj_num ||
@@ -1322,7 +1329,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
return regmap_bulk_read(priv->map_rx,
mcp251xfd_get_tef_obj_addr(offset),
hw_tef_obj,
-   sizeof(*hw_tef_obj) / sizeof(u32) * len);
+   sizeof(*hw_tef_obj) / val_bytes * len);
 }
 
 static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
@@ -1511,11 +1518,12 @@ mcp251xfd_rx_obj_read(const struct mcp251xfd_priv *priv,
  const u8 offset, const u8 len)
 {
int err;
+   int val_bytes = mcp251xfd_get_val_bytes(priv);
 
err = regmap_bulk_read(priv->map_rx,
   mcp251xfd_get_rx_obj_addr(ring, offset),
   hw_rx_obj,
-  len * ring->obj_size / sizeof(u32));
+  len * ring->obj_size / val_bytes);
 
return err;
 }
@@ -2139,6 +2147,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
struct mcp251xfd_priv *priv = dev_id;
irqreturn_t handled = IRQ_NONE;
int err;
+   int val_bytes = mcp251xfd_get_val_bytes(priv);
 
if (priv->rx_int)
do {
@@ -2162,7 +2171,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
err = regmap_bulk_read(priv->map_reg, MCP251XFD_REG_INT,
   >regs_status,
   sizeof(priv->regs_status) /
-  sizeof(u32));
+  val_bytes);
if (err)
goto out_fail;
 
-- 
2.25.1



About patch NFS: Fix O_DIRECT accounting of number of bytes read/written

2019-10-15 Thread Su, Yanjun
Hi trond,
Because My mail system cant receive nfs mail list’s mails, I reply your patch 
here.
I have some question for the patch.

>No. Basic O_DIRECT does not guarantee atomicity of requests, which is
>why we do not have generic locking at the VFS level when reading and
>writing. The only guarantee being offered is that O_DIRECT and buffered
>writes do not collide.
Do you mean other fs also cant guarantee atomicity of O_DIRECT request or just 
nfs?

>IOW: I think the basic premise for this test is just broken (as I
>commented in the patch series I sent) because it is assuming a
>behaviour that is simply not guaranteed.
So the generic/465 of xfstests can’t apply to nfs for now, am I right?

>BTW: note that buffered writes have the same property. They are ordered
>when being written into the page cache, meaning that reads on the same
>client will see no holes, however if you try to read from another
>client, then you will see the same behaviour, with temporary holes
>magically appearing in the file.
As you say, nfs buffered write also has the hole problem with multiple r/w on 
different clients.
I want to know if the problem exists in other local fs such as xfs,ext4?

Thanks in advance.




Re: [PATCH] NFS: Fix O_DIRECT read problem when another write is going on

2019-10-06 Thread Su Yanjun



在 2019/10/1 2:06, Trond Myklebust 写道:

Hi Su,

On Mon, 2019-09-30 at 17:11 +0800, Su Yanjun wrote:

In xfstests generic/465 tests failed. Because O_DIRECT r/w use
async rpc calls, when r/w rpc calls are running concurrently we
may read partial data which is wrong.

For example as follows.
  user buffer
/\

||

  rpc0 rpc1

When rpc0 runs it encounters eof so return 0, then another writes
something. When rpc1 runs it returns some data. The total data
buffer contains wrong data.

In this patch we check eof mark for each direct request. If
encounters
eof then set eof mark in the request, when we meet it again report
-EAGAIN error. In nfs_direct_complete we convert -EAGAIN as if read
nothing. When the reader issue another read it will read ok.

Signed-off-by: Su Yanjun 
---
  fs/nfs/direct.c | 14 +-
  1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 222d711..7f737a3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -93,6 +93,7 @@ struct nfs_direct_req {
bytes_left, /* bytes left to be
sent */
error;  /* any reported error
*/
struct completion   completion; /* wait for i/o completion */
+   int eof;/* eof mark in the
req */
  
  	/* commit state */

struct nfs_mds_commit_info mds_cinfo;   /* Storage for cinfo
*/
@@ -380,6 +381,12 @@ static void nfs_direct_complete(struct
nfs_direct_req *dreq)
  {
struct inode *inode = dreq->inode;
  
+	/* read partial data just as read nothing */

+   if (dreq->error == -EAGAIN) {
+   dreq->count = 0;
+   dreq->error = 0;
+   }
+
inode_dio_end(inode);
  
  	if (dreq->iocb) {

@@ -413,8 +420,13 @@ static void nfs_direct_read_completion(struct
nfs_pgio_header *hdr)
if (hdr->good_bytes != 0)
nfs_direct_good_bytes(dreq, hdr);
  
-	if (test_bit(NFS_IOHDR_EOF, >flags))

+   if (dreq->eof)
+   dreq->error = -EAGAIN;
+
+   if (test_bit(NFS_IOHDR_EOF, >flags)) {
dreq->error = 0;
+   dreq->eof = 1;
+   }
  
  	spin_unlock(>lock);
  

Thanks for looking into this issue. I agree with your analysis of what
is going wrong in generic/465.

However, I think the problem is greater than just EOF. I think we also
need to look at the generic error handling, and ensure that it handles
a truncated RPC call in the middle of a series of calls correctly.

Please see the two patches I sent you just now and check if they fix
the problem for you.


The patchset you sent works for generic/465.

Thanks a lot





[PATCH] NFS: Fix O_DIRECT read problem when another write is going on

2019-09-30 Thread Su Yanjun
In xfstests generic/465 tests failed. Because O_DIRECT r/w use
async rpc calls, when r/w rpc calls are running concurrently we
may read partial data which is wrong.

For example as follows.
 user buffer
/\
|||
 rpc0 rpc1

When rpc0 runs it encounters eof so return 0, then another writes
something. When rpc1 runs it returns some data. The total data
buffer contains wrong data.

In this patch we check eof mark for each direct request. If encounters
eof then set eof mark in the request, when we meet it again report
-EAGAIN error. In nfs_direct_complete we convert -EAGAIN as if read
nothing. When the reader issue another read it will read ok.

Signed-off-by: Su Yanjun 
---
 fs/nfs/direct.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 222d711..7f737a3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -93,6 +93,7 @@ struct nfs_direct_req {
bytes_left, /* bytes left to be sent */
error;  /* any reported error */
struct completion   completion; /* wait for i/o completion */
+   int eof;/* eof mark in the req */
 
/* commit state */
struct nfs_mds_commit_info mds_cinfo;   /* Storage for cinfo */
@@ -380,6 +381,12 @@ static void nfs_direct_complete(struct nfs_direct_req 
*dreq)
 {
struct inode *inode = dreq->inode;
 
+   /* read partial data just as read nothing */
+   if (dreq->error == -EAGAIN) {
+   dreq->count = 0;
+   dreq->error = 0;
+   }
+
inode_dio_end(inode);
 
if (dreq->iocb) {
@@ -413,8 +420,13 @@ static void nfs_direct_read_completion(struct 
nfs_pgio_header *hdr)
if (hdr->good_bytes != 0)
nfs_direct_good_bytes(dreq, hdr);
 
-   if (test_bit(NFS_IOHDR_EOF, >flags))
+   if (dreq->eof)
+   dreq->error = -EAGAIN;
+
+   if (test_bit(NFS_IOHDR_EOF, >flags)) {
dreq->error = 0;
+   dreq->eof = 1;
+   }
 
spin_unlock(>lock);
 
-- 
2.7.4





[PATCH net] ipv4: fix ifa_flags reuse problem in using ifconfig tool

2019-09-04 Thread Su Yanjun
When NetworkManager has already set ipv4 address then uses
ifconfig set another ipv4 address. It will use previous ifa_flags
that will cause device route not be inserted.

As NetworkManager has already support IFA_F_NOPREFIXROUTE flag [1],
but ifconfig will reuse the ifa_flags. It's weird especially
some old scripts or program [2]  still  use ifconfig.

[1] https://gitlab.freedesktop.org/NetworkManager/NetworkManager/
commit/fec80e7473ad16979af75ed299d68103e7aa3fe9

[2] LTP or TAHI

Signed-off-by: Su Yanjun 
---
 net/ipv4/devinet.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a4b5bd4..56ca339 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1159,6 +1159,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, 
struct ifreq *ifr)
inet_del_ifa(in_dev, ifap, 0);
ifa->ifa_broadcast = 0;
ifa->ifa_scope = 0;
+   ifa->ifa_flags = 0;
}
 
ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
-- 
2.7.4





[tip:perf/urgent] perf/x86: Fix typo in comment

2019-08-19 Thread tip-bot for Su Yanjun
Commit-ID:  77d760328ee015cf89460c52bfd5a6b0a09b7472
Gitweb: https://git.kernel.org/tip/77d760328ee015cf89460c52bfd5a6b0a09b7472
Author: Su Yanjun 
AuthorDate: Fri, 16 Aug 2019 16:43:21 +0800
Committer:  Ingo Molnar 
CommitDate: Mon, 19 Aug 2019 11:50:24 +0200

perf/x86: Fix typo in comment

No functional change.

Signed-off-by: Su Yanjun 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1565945001-4413-1-git-send-email-suyj.f...@cn.fujitsu.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 81b005e4c7d9..325959d19d9a 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1236,7 +1236,7 @@ void x86_pmu_enable_event(struct perf_event *event)
  * Add a single event to the PMU.
  *
  * The event is added to the group of enabled events
- * but only if it can be scehduled with existing events.
+ * but only if it can be scheduled with existing events.
  */
 static int x86_pmu_add(struct perf_event *event, int flags)
 {


[PATCH] perf/x86: Fix typo in core.c

2019-08-16 Thread Su Yanjun
No functional change related.

Signed-off-by: Su Yanjun 
---
 arch/x86/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 81b005e..325959d 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1236,7 +1236,7 @@ void x86_pmu_enable_event(struct perf_event *event)
  * Add a single event to the PMU.
  *
  * The event is added to the group of enabled events
- * but only if it can be scehduled with existing events.
+ * but only if it can be scheduled with existing events.
  */
 static int x86_pmu_add(struct perf_event *event, int flags)
 {
-- 
2.7.4





[PATCH net v3] net: ipv6: Fix a bug in ndisc_send_ns when netdev only has a global address

2019-07-30 Thread Su Yanjun
When the egress interface does not have a link local address, it can
not communicate with other hosts.

In RFC4861, 7.2.2 says
"If the source address of the packet prompting the solicitation is the
same as one of the addresses assigned to the outgoing interface, that
address SHOULD be placed in the IP Source Address of the outgoing
solicitation.  Otherwise, any one of the addresses assigned to the
interface should be used."

In this patch we try get a global address if we get ll address failed.

Signed-off-by: Su Yanjun 
---
Changes since V2:
- Let banned_flags under the scope of its use.
---
 include/net/addrconf.h |  2 ++
 net/ipv6/addrconf.c| 34 ++
 net/ipv6/ndisc.c   | 10 +++---
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 269ec27..eae1167 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -107,6 +107,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct 
in6_addr *addr,
  u32 banned_flags);
 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
+int ipv6_get_addr(struct net_device *dev, struct in6_addr *addr,
+   u32 banned_flags);
 bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
  bool match_wildcard);
 bool inet_rcv_saddr_any(const struct sock *sk);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4ae17a9..9e537bd 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1873,6 +1873,40 @@ int ipv6_get_lladdr(struct net_device *dev, struct 
in6_addr *addr,
return err;
 }
 
+int __ipv6_get_addr(struct inet6_dev *idev, struct in6_addr *addr,
+   u32 banned_flags)
+{
+   struct inet6_ifaddr *ifp;
+   int err = -EADDRNOTAVAIL;
+
+   list_for_each_entry(ifp, >addr_list, if_list) {
+   if (ifp->scope == 0 &&
+   !(ifp->flags & banned_flags)) {
+   *addr = ifp->addr;
+   err = 0;
+   break;
+   }
+   }
+   return err;
+}
+
+int ipv6_get_addr(struct net_device *dev, struct in6_addr *addr,
+ u32 banned_flags)
+{
+   struct inet6_dev *idev;
+   int err = -EADDRNOTAVAIL;
+
+   rcu_read_lock();
+   idev = __in6_dev_get(dev);
+   if (idev) {
+   read_lock_bh(>lock);
+   err = __ipv6_get_addr(idev, addr, banned_flags);
+   read_unlock_bh(>lock);
+   }
+   rcu_read_unlock();
+   return err;
+}
+
 static int ipv6_count_addresses(const struct inet6_dev *idev)
 {
const struct inet6_ifaddr *ifp;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 659ecf4e..fa58c6e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -592,9 +592,13 @@ void ndisc_send_ns(struct net_device *dev, const struct 
in6_addr *solicit,
struct nd_msg *msg;
 
if (!saddr) {
-   if (ipv6_get_lladdr(dev, _buf,
-  (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
-   return;
+   u32 banned_flags = IFA_F_TENTATIVE | IFA_F_OPTIMISTIC;
+
+   if (ipv6_get_lladdr(dev, _buf, banned_flags)) {
+   /* try global address */
+   if (ipv6_get_addr(dev, _buf, banned_flags))
+   return;
+   }
saddr = _buf;
}
 
-- 
2.7.4





[PATCH net v2] net: ipv6: Fix a bug in ndisc_send_ns when netdev only has a global address

2019-07-29 Thread Su Yanjun
When the egress interface does not have a link local address, it can
not communicate with other hosts.

In RFC4861, 7.2.2 says
"If the source address of the packet prompting the solicitation is the
same as one of the addresses assigned to the outgoing interface, that
address SHOULD be placed in the IP Source Address of the outgoing
solicitation.  Otherwise, any one of the addresses assigned to the
interface should be used."

In this patch we try get a global address if we get ll address failed.

Signed-off-by: Su Yanjun 
---
Changes since V1:
- Change patch description and code optimization at 
  David Ahern's suggestion
---
 include/net/addrconf.h |  2 ++
 net/ipv6/addrconf.c| 34 ++
 net/ipv6/ndisc.c   |  9 ++---
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index becdad5..ce1561e 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -107,6 +107,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct 
in6_addr *addr,
  u32 banned_flags);
 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
+int ipv6_get_addr(struct net_device *dev, struct in6_addr *addr,
+   u32 banned_flags);
 bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
  bool match_wildcard);
 bool inet_rcv_saddr_any(const struct sock *sk);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 521e320..9467457 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1870,6 +1870,40 @@ int ipv6_get_lladdr(struct net_device *dev, struct 
in6_addr *addr,
return err;
 }
 
+int __ipv6_get_addr(struct inet6_dev *idev, struct in6_addr *addr,
+   u32 banned_flags)
+{
+   struct inet6_ifaddr *ifp;
+   int err = -EADDRNOTAVAIL;
+
+   list_for_each_entry(ifp, >addr_list, if_list) {
+   if (ifp->scope == 0 &&
+   !(ifp->flags & banned_flags)) {
+   *addr = ifp->addr;
+   err = 0;
+   break;
+   }
+   }
+   return err;
+}
+
+int ipv6_get_addr(struct net_device *dev, struct in6_addr *addr,
+ u32 banned_flags)
+{
+   struct inet6_dev *idev;
+   int err = -EADDRNOTAVAIL;
+
+   rcu_read_lock();
+   idev = __in6_dev_get(dev);
+   if (idev) {
+   read_lock_bh(>lock);
+   err = __ipv6_get_addr(idev, addr, banned_flags);
+   read_unlock_bh(>lock);
+   }
+   rcu_read_unlock();
+   return err;
+}
+
 static int ipv6_count_addresses(const struct inet6_dev *idev)
 {
const struct inet6_ifaddr *ifp;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 083cc1c..156c027 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -603,11 +603,14 @@ void ndisc_send_ns(struct net_device *dev, const struct 
in6_addr *solicit,
int inc_opt = dev->addr_len;
int optlen = 0;
struct nd_msg *msg;
+   u32 banned_flags = IFA_F_TENTATIVE | IFA_F_OPTIMISTIC;
 
if (!saddr) {
-   if (ipv6_get_lladdr(dev, _buf,
-  (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
-   return;
+   if (ipv6_get_lladdr(dev, _buf, banned_flags)) {
+   /* try global address */
+   if (ipv6_get_addr(dev, _buf, banned_flags))
+   return;
+   }
saddr = _buf;
}
 
-- 
2.7.4





[PATCH net] net: ipv6: Fix a bug in ndisc_send_ns when netdev only has a global address

2019-07-28 Thread Su Yanjun
When we send mpls packets and the interface only has a
manual global ipv6 address, then the two hosts cant communicate.
I find that in ndisc_send_ns it only tries to get a ll address.
In my case, the executive path is as below.
ip6_output
 ->ip6_finish_output
  ->lwtunnel_xmit
   ->mpls_xmit
->neigh_resolve_output
 ->neigh_probe
  ->ndisc_solicit
   ->ndisc_send_ns

In RFC4861, 7.2.2 says
"If the source address of the packet prompting the solicitation is the
same as one of the addresses assigned to the outgoing interface, that
address SHOULD be placed in the IP Source Address of the outgoing
solicitation.  Otherwise, any one of the addresses assigned to the
interface should be used."

In this patch we try get a global address if we get ll address failed.

Signed-off-by: Su Yanjun 
---
 include/net/addrconf.h |  4 
 net/ipv6/addrconf.c| 34 ++
 net/ipv6/ndisc.c   |  8 ++--
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index becdad5..006db8e 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -107,6 +107,10 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct 
in6_addr *addr,
  u32 banned_flags);
 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
+int __ipv6_get_addr(struct inet6_dev *idev, struct in6_addr *addr,
+ u32 banned_flags);
+int ipv6_get_addr(struct net_device *dev, struct in6_addr *addr,
+   u32 banned_flags);
 bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
  bool match_wildcard);
 bool inet_rcv_saddr_any(const struct sock *sk);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 521e320..4c0a43f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1870,6 +1870,40 @@ int ipv6_get_lladdr(struct net_device *dev, struct 
in6_addr *addr,
return err;
 }
 
+int __ipv6_get_addr(struct inet6_dev *idev, struct in6_addr *addr,
+   u32 banned_flags)
+{
+   struct inet6_ifaddr *ifp;
+   int err = -EADDRNOTAVAIL;
+
+   list_for_each_entry_reverse(ifp, >addr_list, if_list) {
+   if (ifp->scope == 0 &&
+   !(ifp->flags & banned_flags)) {
+   *addr = ifp->addr;
+   err = 0;
+   break;
+   }
+   }
+   return err;
+}
+
+int ipv6_get_addr(struct net_device *dev, struct in6_addr *addr,
+ u32 banned_flags)
+{
+   struct inet6_dev *idev;
+   int err = -EADDRNOTAVAIL;
+
+   rcu_read_lock();
+   idev = __in6_dev_get(dev);
+   if (idev) {
+   read_lock_bh(>lock);
+   err = __ipv6_get_addr(idev, addr, banned_flags);
+   read_unlock_bh(>lock);
+   }
+   rcu_read_unlock();
+   return err;
+}
+
 static int ipv6_count_addresses(const struct inet6_dev *idev)
 {
const struct inet6_ifaddr *ifp;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 083cc1c..18ac2fb 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -606,8 +606,12 @@ void ndisc_send_ns(struct net_device *dev, const struct 
in6_addr *solicit,
 
if (!saddr) {
if (ipv6_get_lladdr(dev, _buf,
-  (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
-   return;
+  (IFA_F_TENTATIVE | IFA_F_OPTIMISTIC))) {
+   /* try global address */
+   if (ipv6_get_addr(dev, _buf,
+ (IFA_F_TENTATIVE | IFA_F_OPTIMISTIC)))
+   return;
+   }
saddr = _buf;
}
 
-- 
2.7.4





[PATCH] udp: Fix typo in net/ipv4/udp.c

2019-07-17 Thread Su Yanjun
Signed-off-by: Su Yanjun 
---
 net/ipv4/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c21862b..d88821c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2170,7 +2170,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, 
struct sk_buff *skb,
 
 /* Initialize UDP checksum. If exited with zero value (success),
  * CHECKSUM_UNNECESSARY means, that no more checks are required.
- * Otherwise, csum completion requires chacksumming packet body,
+ * Otherwise, csum completion requires checksumming packet body,
  * including udp header and folding it to skb->csum.
  */
 static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
-- 
2.7.4





[PATCH] udp: Fix typo in udpv4/p.c

2019-07-17 Thread Su Yanjun
Signed-off-by: Su Yanjun 
---
 net/ipv4/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c21862b..d88821c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2170,7 +2170,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, 
struct sk_buff *skb,
 
 /* Initialize UDP checksum. If exited with zero value (success),
  * CHECKSUM_UNNECESSARY means, that no more checks are required.
- * Otherwise, csum completion requires chacksumming packet body,
+ * Otherwise, csum completion requires checksumming packet body,
  * including udp header and folding it to skb->csum.
  */
 static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
-- 
2.7.4





Re: [Problem]testOpenUpgradeLock test failed in nfsv4.0 in 5.2.0-rc7

2019-07-07 Thread Su Yanjun

Ang ping?

在 2019/7/3 9:34, Su Yanjun 写道:

Hi Frank

We tested the pynfs of NFSv4.0 on the latest version of the kernel 
(5.2.0-rc7).
I encountered a problem while testing st_lock.testOpenUpgradeLock. The 
problem is now as follows:

**
LOCK24 st_lock.testOpenUpgradeLock : FAILURE
   OP_LOCK should return NFS4_OK, instead got
   NFS4ERR_BAD_SEQID
**
Is this normal?

The case is as follows:
Def testOpenUpgradeLock(t, env):
"""Try open, lock, open, downgrade, close

FLAGS: all lock
CODE: LOCK24
"""
c= env.c1
C.init_connection()
Os = open_sequence(c, t.code, lockowner="lockowner_LOCK24")
Os.open(OPEN4_SHARE_ACCESS_READ)
Os.lock(READ_LT)
Os.open(OPEN4_SHARE_ACCESS_WRITE)
Os.unlock()
Os.downgrade(OPEN4_SHARE_ACCESS_WRITE)
Os.lock(WRITE_LT)
Os.close()

After investigation, there was an error in unlock->lock. When 
unlocking, the lockowner of the file was not released, causing an 
error when locking again.
Will nfs4.0 support 1) open-> 2) lock-> 3) unlock-> 4) lock this 
function?









[Problem]testOpenUpgradeLock test failed in nfsv4.0 in 5.2.0-rc7

2019-07-02 Thread Su Yanjun

Hi Frank

We tested the pynfs of NFSv4.0 on the latest version of the kernel 
(5.2.0-rc7).
I encountered a problem while testing st_lock.testOpenUpgradeLock. The 
problem is now as follows:

**
LOCK24 st_lock.testOpenUpgradeLock : FAILURE
   OP_LOCK should return NFS4_OK, instead got
   NFS4ERR_BAD_SEQID
**
Is this normal?

The case is as follows:
Def testOpenUpgradeLock(t, env):
"""Try open, lock, open, downgrade, close

FLAGS: all lock
CODE: LOCK24
"""
c= env.c1
C.init_connection()
Os = open_sequence(c, t.code, lockowner="lockowner_LOCK24")
Os.open(OPEN4_SHARE_ACCESS_READ)
Os.lock(READ_LT)
Os.open(OPEN4_SHARE_ACCESS_WRITE)
Os.unlock()
Os.downgrade(OPEN4_SHARE_ACCESS_WRITE)
Os.lock(WRITE_LT)
Os.close()

After investigation, there was an error in unlock->lock. When unlocking, 
the lockowner of the file was not released, causing an error when 
locking again.

Will nfs4.0 support 1) open-> 2) lock-> 3) unlock-> 4) lock this function?





Re: [PATCH] sctp: Add rcu lock to protect dst entry in sctp_transport_route

2019-06-13 Thread Su Yanjun



在 2019/6/12 21:13, Neil Horman 写道:

On Tue, Jun 11, 2019 at 10:33:17AM +0800, Su Yanjun wrote:

在 2019/6/10 19:12, Neil Horman 写道:

On Mon, Jun 10, 2019 at 11:20:00AM +0800, Su Yanjun wrote:

syzbot found a crash in rt_cache_valid. Problem is that when more
threads release dst in sctp_transport_route, the route cache can
be freed.

As follows,
p1:
sctp_transport_route
dst_release
get_dst

p2:
sctp_transport_route
dst_release
get_dst
...

If enough threads calling dst_release will cause dst->refcnt==0
then rcu softirq will reclaim the dst entry,get_dst then use
the freed memory.

This patch adds rcu lock to protect the dst_entry here.

Fixes: 6e91b578bf3f("sctp: re-use sctp_transport_pmtu in

sctp_transport_route")

Signed-off-by: Su Yanjun 
Reported-by: syzbot+a9e23ea2aa21044c2...@syzkaller.appspotmail.com
---
   net/sctp/transport.c | 5 +
   1 file changed, 5 insertions(+)

diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index ad158d3..5ad7e20 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -308,8 +308,13 @@ void sctp_transport_route(struct sctp_transport

*transport,

struct sctp_association *asoc = transport->asoc;
struct sctp_af *af = transport->af_specific;
+   /* When dst entry is being released, route cache may be referred
+* again. Add rcu lock here to protect dst entry.
+*/
+   rcu_read_lock();
sctp_transport_dst_release(transport);
af->get_dst(transport, saddr, >fl, sctp_opt2sk(opt));
+   rcu_read_unlock();

What is the exact error that syzbot reported?  This doesn't seem like it
fixes

BUG: KASAN: slab-out-of-bounds in rt_cache_valid+0x158/0x190
net/ipv4/route.c:1556
Read of size 2 at addr 8880654f3ac7 by task syz-executor.0/26603

CPU: 0 PID: 26603 Comm: syz-executor.0 Not tainted 5.2.0-rc2+ #9
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x172/0x1f0 lib/dump_stack.c:113
  print_address_description.cold+0x7c/0x20d mm/kasan/report.c:188
  __kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317
  kasan_report+0x12/0x20 mm/kasan/common.c:614
  __asan_report_load2_noabort+0x14/0x20 mm/kasan/generic_report.c:130
  rt_cache_valid+0x158/0x190 net/ipv4/route.c:1556
  __mkroute_output net/ipv4/route.c:2332 [inline]
  ip_route_output_key_hash_rcu+0x819/0x2d50 net/ipv4/route.c:2564
  ip_route_output_key_hash+0x1ef/0x360 net/ipv4/route.c:2393
  __ip_route_output_key include/net/route.h:125 [inline]
  ip_route_output_flow+0x28/0xc0 net/ipv4/route.c:2651
  ip_route_output_key include/net/route.h:135 [inline]
  sctp_v4_get_dst+0x467/0x1260 net/sctp/protocol.c:435
  sctp_transport_route+0x12d/0x360 net/sctp/transport.c:297
  sctp_assoc_add_peer+0x53e/0xfc0 net/sctp/associola.c:663
  sctp_process_param net/sctp/sm_make_chunk.c:2531 [inline]
  sctp_process_init+0x2491/0x2b10 net/sctp/sm_make_chunk.c:2344
  sctp_cmd_process_init net/sctp/sm_sideeffect.c:667 [inline]
  sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1369 [inline]
  sctp_side_effects net/sctp/sm_sideeffect.c:1179 [inline]
  sctp_do_sm+0x3a30/0x50e0 net/sctp/sm_sideeffect.c:1150
  sctp_assoc_bh_rcv+0x343/0x660 net/sctp/associola.c:1059
  sctp_inq_push+0x1e4/0x280 net/sctp/inqueue.c:80
  sctp_backlog_rcv+0x196/0xbe0 net/sctp/input.c:339
  sk_backlog_rcv include/net/sock.h:945 [inline]
  __release_sock+0x129/0x390 net/core/sock.c:2412
  release_sock+0x59/0x1c0 net/core/sock.c:2928
  sctp_wait_for_connect+0x316/0x540 net/sctp/socket.c:9039
  __sctp_connect+0xab2/0xcd0 net/sctp/socket.c:1226
  __sctp_setsockopt_connectx+0x133/0x1a0 net/sctp/socket.c:1334
  sctp_setsockopt_connectx_old net/sctp/socket.c:1350 [inline]
  sctp_setsockopt net/sctp/socket.c:4644 [inline]
  sctp_setsockopt+0x22c0/0x6d10 net/sctp/socket.c:4608
  compat_sock_common_setsockopt+0x106/0x140 net/core/sock.c:3137
  __compat_sys_setsockopt+0x185/0x380 net/compat.c:383
  __do_compat_sys_setsockopt net/compat.c:396 [inline]
  __se_compat_sys_setsockopt net/compat.c:393 [inline]
  __ia32_compat_sys_setsockopt+0xbd/0x150 net/compat.c:393
  do_syscall_32_irqs_on arch/x86/entry/common.c:337 [inline]
  do_fast_syscall_32+0x27b/0xd7d arch/x86/entry/common.c:408
  entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139
RIP: 0023:0xf7ff5849
Code: 85 d2 74 02 89 0a 5b 5d c3 8b 04 24 c3 8b 14 24 c3 8b 3c 24 c3 90 90
90 90 90 90 90 90 90 90 90 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90
90 90 90 eb 0d 90 90 90 90 90 90 90 90 90 90 90 90
RSP: 002b:f5df10cc EFLAGS: 0296 ORIG_RAX: 016e
RAX: ffda RBX: 0007 RCX: 0084
RDX: 006b RSI: 2055bfe4 RDI: 001c
RBP:  R08:  R09: 
R10:  R11:  R12: 
R13:  R14:  R15: 

[PATCH] sctp: Add rcu lock to protect dst entry in sctp_transport_route

2019-06-09 Thread Su Yanjun
syzbot found a crash in rt_cache_valid. Problem is that when more
threads release dst in sctp_transport_route, the route cache can
be freed.

As follows,
p1:
sctp_transport_route
  dst_release
  get_dst

p2:
sctp_transport_route
  dst_release
  get_dst
...

If enough threads calling dst_release will cause dst->refcnt==0
then rcu softirq will reclaim the dst entry,get_dst then use
the freed memory.

This patch adds rcu lock to protect the dst_entry here.

Fixes: 6e91b578bf3f("sctp: re-use sctp_transport_pmtu in sctp_transport_route")
Signed-off-by: Su Yanjun 
Reported-by: syzbot+a9e23ea2aa21044c2...@syzkaller.appspotmail.com
---
 net/sctp/transport.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index ad158d3..5ad7e20 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -308,8 +308,13 @@ void sctp_transport_route(struct sctp_transport *transport,
struct sctp_association *asoc = transport->asoc;
struct sctp_af *af = transport->af_specific;
 
+   /* When dst entry is being released, route cache may be referred
+* again. Add rcu lock here to protect dst entry.
+*/
+   rcu_read_lock();
sctp_transport_dst_release(transport);
af->get_dst(transport, saddr, >fl, sctp_opt2sk(opt));
+   rcu_read_unlock();
 
if (saddr)
memcpy(>saddr, saddr, sizeof(union sctp_addr));
-- 
2.7.4





[PATCH v3] net: xfrm: Add '_rcu' tag for rcu protected pointer in netns_xfrm

2019-03-06 Thread Su Yanjun
For rcu protected pointers, we'd better add '__rcu' for them.

Once added '__rcu' tag for rcu protected pointer, the sparse tool reports
warnings.

net/xfrm/xfrm_user.c:1198:39: sparse:expected struct sock *sk
net/xfrm/xfrm_user.c:1198:39: sparse:got struct sock [noderef]  *nlsk
[...]

So introduce a new wrapper function of nlmsg_unicast  to handle type
conversions.

This patch also fixes a direct access of a rcu protected socket.

Fixes: be33690d8fcf("[XFRM]: Fix aevent related crash")
Signed-off-by: Su Yanjun 
---
Changes from v2:
- add 'Fixes' tag and some description

 include/net/netns/xfrm.h |  2 +-
 net/xfrm/xfrm_user.c | 30 +++---
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 59f45b1..d2a36fb 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -57,7 +57,7 @@ struct netns_xfrm {
struct list_headinexact_bins;
 
 
-   struct sock *nlsk;
+   struct sock __rcu *nlsk;
struct sock *nlsk_stash;
 
u32 sysctl_aevent_etime;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d832783..e8f23e4 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1071,6 +1071,22 @@ static inline int xfrm_nlmsg_multicast(struct net *net, 
struct sk_buff *skb,
return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
 }
 
+/* A similar wrapper like xfrm_nlmsg_multicast checking that nlsk is still
+ * available.
+ */
+static inline int xfrm_nlmsg_unicast(struct net *net, struct sk_buff *skb,
+u32 pid)
+{
+   struct sock *nlsk = rcu_dereference(net->xfrm.nlsk);
+
+   if (!nlsk) {
+   kfree_skb(skb);
+   return -EPIPE;
+   }
+
+   return nlmsg_unicast(nlsk, skb, pid);
+}
+
 static inline unsigned int xfrm_spdinfo_msgsize(void)
 {
return NLMSG_ALIGN(4)
@@ -1195,7 +1211,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct 
nlmsghdr *nlh,
err = build_spdinfo(r_skb, net, sportid, seq, *flags);
BUG_ON(err < 0);
 
-   return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
+   return xfrm_nlmsg_unicast(net, r_skb, sportid);
 }
 
 static inline unsigned int xfrm_sadinfo_msgsize(void)
@@ -1254,7 +1270,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct 
nlmsghdr *nlh,
err = build_sadinfo(r_skb, net, sportid, seq, *flags);
BUG_ON(err < 0);
 
-   return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
+   return xfrm_nlmsg_unicast(net, r_skb, sportid);
 }
 
 static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1274,7 +1290,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct 
nlmsghdr *nlh,
if (IS_ERR(resp_skb)) {
err = PTR_ERR(resp_skb);
} else {
-   err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, 
NETLINK_CB(skb).portid);
+   err = xfrm_nlmsg_unicast(net, resp_skb, NETLINK_CB(skb).portid);
}
xfrm_state_put(x);
 out_noput:
@@ -1337,7 +1353,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct 
nlmsghdr *nlh,
goto out;
}
 
-   err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
+   err = xfrm_nlmsg_unicast(net, resp_skb, NETLINK_CB(skb).portid);
 
 out:
xfrm_state_put(x);
@@ -1903,8 +1919,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct 
nlmsghdr *nlh,
if (IS_ERR(resp_skb)) {
err = PTR_ERR(resp_skb);
} else {
-   err = nlmsg_unicast(net->xfrm.nlsk, resp_skb,
-   NETLINK_CB(skb).portid);
+   err = xfrm_nlmsg_unicast(net, resp_skb,
+NETLINK_CB(skb).portid);
}
} else {
xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
@@ -2062,7 +2078,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct 
nlmsghdr *nlh,
err = build_aevent(r_skb, x, );
BUG_ON(err < 0);
 
-   err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);
+   err = xfrm_nlmsg_unicast(net, r_skb, NETLINK_CB(skb).portid);
spin_unlock_bh(>lock);
xfrm_state_put(x);
return err;
-- 
2.7.4





Re: [PATCH] net: xfrm: Fix potential oops in xfrm_user_rcv_msg and array out of bounds

2019-03-04 Thread Su Yanjun



On 2019/3/5 15:31, Steffen Klassert wrote:

On Tue, Mar 05, 2019 at 03:08:49PM +0800, Su Yanjun  
wrote:

On 2019/3/5 14:49, Herbert Xu wrote:


On Sun, Mar 03, 2019 at 10:47:39PM -0500, Su Yanjun wrote:

When i review xfrm_user.c code, i found some potentical bug in it.

In xfrm_user_rcvmsg if type parameter from user space is set to
XFRM_MSG_MAX or  XFRM_MSG_NEWSADINFO or XFRM_MSG_NEWSPDINFO. It will cause
xfrm_user_rcv_msg  referring to null entry in xfrm_dispatch array.

Signed-off-by: Su Yanjun 
---
   net/xfrm/xfrm_user.c | 4 +++-
   1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a131f9f..d832783 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2630,11 +2630,13 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, 
struct nlmsghdr *nlh,
return -EOPNOTSUPP;
type = nlh->nlmsg_type;
-   if (type > XFRM_MSG_MAX)
+   if (type >= XFRM_MSG_MAX)
return -EINVAL;

Your patch is wrong.  Please check the definition of XFRM_MSG_MAX.

I see, thanks for your reply.

type -= XFRM_MSG_BASE;
link = _dispatch[type];
+   if (!link)
+   return -EOPNOTSUPP;

Here **link** may refer to null entry for special types such as
XFRM_MSG_MAX or  XFRM_MSG_NEWSADINFO or XFRM_MSG_NEWSPDINFO
Am i miss something?

'link' is always a valid pointer into that array.


Thanks

Su





[PATCH v2] net: xfrm: Add '_rcu' tag for rcu protected pointer in netns_xfrm

2019-03-04 Thread Su Yanjun
For rcu protected pointers, we'd better add '__rcu' for them.

Once added '__rcu' tag for rcu protected pointer, the sparse tool reports
warnings.

net/xfrm/xfrm_user.c:1198:39: sparse:expected struct sock *sk
net/xfrm/xfrm_user.c:1198:39: sparse:got struct sock [noderef]  *nlsk
[...]

So introduce a new wrapper function of nlmsg_unicast  to handle type
conversions.

No functional change.

Signed-off-by: Su Yanjun 
---
Changes from v1:
- fix spelling mistakes and the sparse tool' warnings

 include/net/netns/xfrm.h |  2 +-
 net/xfrm/xfrm_user.c | 30 +++---
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 59f45b1..d2a36fb 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -57,7 +57,7 @@ struct netns_xfrm {
struct list_headinexact_bins;
 
 
-   struct sock *nlsk;
+   struct sock __rcu *nlsk;
struct sock *nlsk_stash;
 
u32 sysctl_aevent_etime;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d832783..e8f23e4 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1071,6 +1071,22 @@ static inline int xfrm_nlmsg_multicast(struct net *net, 
struct sk_buff *skb,
return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC);
 }
 
+/* A similar wrapper like xfrm_nlmsg_multicast checking that nlsk is still
+ * available.
+ */
+static inline int xfrm_nlmsg_unicast(struct net *net, struct sk_buff *skb,
+u32 pid)
+{
+   struct sock *nlsk = rcu_dereference(net->xfrm.nlsk);
+
+   if (!nlsk) {
+   kfree_skb(skb);
+   return -EPIPE;
+   }
+
+   return nlmsg_unicast(nlsk, skb, pid);
+}
+
 static inline unsigned int xfrm_spdinfo_msgsize(void)
 {
return NLMSG_ALIGN(4)
@@ -1195,7 +1211,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct 
nlmsghdr *nlh,
err = build_spdinfo(r_skb, net, sportid, seq, *flags);
BUG_ON(err < 0);
 
-   return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
+   return xfrm_nlmsg_unicast(net, r_skb, sportid);
 }
 
 static inline unsigned int xfrm_sadinfo_msgsize(void)
@@ -1254,7 +1270,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct 
nlmsghdr *nlh,
err = build_sadinfo(r_skb, net, sportid, seq, *flags);
BUG_ON(err < 0);
 
-   return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
+   return xfrm_nlmsg_unicast(net, r_skb, sportid);
 }
 
 static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1274,7 +1290,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct 
nlmsghdr *nlh,
if (IS_ERR(resp_skb)) {
err = PTR_ERR(resp_skb);
} else {
-   err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, 
NETLINK_CB(skb).portid);
+   err = xfrm_nlmsg_unicast(net, resp_skb, NETLINK_CB(skb).portid);
}
xfrm_state_put(x);
 out_noput:
@@ -1337,7 +1353,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct 
nlmsghdr *nlh,
goto out;
}
 
-   err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
+   err = xfrm_nlmsg_unicast(net, resp_skb, NETLINK_CB(skb).portid);
 
 out:
xfrm_state_put(x);
@@ -1903,8 +1919,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct 
nlmsghdr *nlh,
if (IS_ERR(resp_skb)) {
err = PTR_ERR(resp_skb);
} else {
-   err = nlmsg_unicast(net->xfrm.nlsk, resp_skb,
-   NETLINK_CB(skb).portid);
+   err = xfrm_nlmsg_unicast(net, resp_skb,
+NETLINK_CB(skb).portid);
}
} else {
xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
@@ -2062,7 +2078,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct 
nlmsghdr *nlh,
err = build_aevent(r_skb, x, );
BUG_ON(err < 0);
 
-   err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);
+   err = xfrm_nlmsg_unicast(net, r_skb, NETLINK_CB(skb).portid);
spin_unlock_bh(>lock);
xfrm_state_put(x);
return err;
-- 
2.7.4





[PATCH] net: xfrm: Add '_rcu' tag for rcu protected pointer in netns_xfrm

2019-03-03 Thread Su Yanjun
For rcu protected pointers, we'd bettter add '__rcu' for them.

No functional change.

Signed-off-by: Su Yanjun 
---
 include/net/netns/xfrm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 59f45b1..d2a36fb 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -57,7 +57,7 @@ struct netns_xfrm {
struct list_headinexact_bins;
 
 
-   struct sock *nlsk;
+   struct sock __rcu *nlsk;
struct sock *nlsk_stash;
 
u32 sysctl_aevent_etime;
-- 
2.7.4





[PATCH] net: xfrm: Fix potential oops in xfrm_user_rcv_msg and array out of bounds

2019-03-03 Thread Su Yanjun
When i review xfrm_user.c code, i found some potentical bug in it.

In xfrm_user_rcvmsg if type parameter from user space is set to
XFRM_MSG_MAX or  XFRM_MSG_NEWSADINFO or XFRM_MSG_NEWSPDINFO. It will cause
xfrm_user_rcv_msg  referring to null entry in xfrm_dispatch array.

Signed-off-by: Su Yanjun 
---
 net/xfrm/xfrm_user.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a131f9f..d832783 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2630,11 +2630,13 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, 
struct nlmsghdr *nlh,
return -EOPNOTSUPP;
 
type = nlh->nlmsg_type;
-   if (type > XFRM_MSG_MAX)
+   if (type >= XFRM_MSG_MAX)
return -EINVAL;
 
type -= XFRM_MSG_BASE;
link = _dispatch[type];
+   if (!link)
+   return -EOPNOTSUPP;
 
/* All operations require privileges, even GET */
if (!netlink_net_capable(skb, CAP_NET_ADMIN))
-- 
2.7.4





Re: [PATCH] netfilter: nf_ct_helper: Fix possible panic when nf_conntrack_helper_unregister is used in an unloadable module

2019-03-01 Thread Su Yanjun



On 2019/3/1 16:43, Sergei Shtylyov wrote:

Hello!

On 01.03.2019 8:56, Su Yanjun wrote:


From: Su Yanjun 

Because nf_conntrack_helper_unregister maybe used in an unloadable 
module,

it uses 'synchronize_rcu' which may cause kernel panic.

According to the artical:


   Article?


I got it.

RCU and Unloadable Modules
https://lwn.net/Articles/217484/

When we have a heavy rcu callback load, then some of the callbacks 
might be
deferred in order to allow other processing to proceed. 
sychnorize_rcu does
not wait rcu callback complete and module may be unloaded before 
callback

done.

This patch uses rcu_barrier instead of synchronize_rcu will prevent this
    ^ that/which 
missed?



Yes.

situation.

Signed-off-by: Su Yanjun 

[...]

MBR, Sergei



Thanks.

Su






[PATCH] netfilter: nf_ct_helper: Fix possible panic when nf_conntrack_helper_unregister is used in an unloadable module

2019-02-28 Thread Su Yanjun
From: Su Yanjun 

Because nf_conntrack_helper_unregister maybe used in an unloadable module,
it uses 'synchronize_rcu' which may cause kernel panic.

According to the artical:
RCU and Unloadable Modules
https://lwn.net/Articles/217484/

When we have a heavy rcu callback load, then some of the callbacks might be
deferred in order to allow other processing to proceed. sychnorize_rcu does
not wait rcu callback complete and module may be unloaded before callback
done.

This patch uses rcu_barrier instead of synchronize_rcu will prevent this
situation.

Signed-off-by: Su Yanjun 
---
 net/netfilter/nf_conntrack_helper.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_helper.c 
b/net/netfilter/nf_conntrack_helper.c
index 274baf1..0ee9378 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -397,8 +397,15 @@ void nf_conntrack_helper_unregister(struct 
nf_conntrack_helper *me)
 
/* Make sure every nothing is still using the helper unless its a
 * connection in the hash.
+*
+* 'synchronize_rcu' may have problem when rcu callback function
+* is used in unloadable modules. Use rcu_barrier instead, so that
+* it will wait until rcu callback completes before modules are
+* unloaded.
+* More detail about rcu_barrier please see:
+* https://lwn.net/Articles/217484/
 */
-   synchronize_rcu();
+   rcu_barrier();
 
nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
nf_ct_iterate_destroy(unhelp, me);
@@ -406,7 +413,7 @@ void nf_conntrack_helper_unregister(struct 
nf_conntrack_helper *me)
/* Maybe someone has gotten the helper already when unhelp above.
 * So need to wait it.
 */
-   synchronize_rcu();
+   rcu_barrier();
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
 
-- 
2.7.4




Re: [PATCH] xfs: correct statx's result_mask value

2019-01-07 Thread Su Yanjun




On 1/8/2019 1:07 PM, Darrick J. Wong wrote:

On Tue, Jan 08, 2019 at 12:58:43PM +0800, Su Yanjun  
wrote:


On 1/8/2019 2:04 AM, Eric Sandeen wrote:

On 1/7/19 11:52 AM, Darrick J. Wong wrote:

On Mon, Jan 07, 2019 at 04:53:10AM -0500, Su Yanjun wrote:

For statx syscall, xfs return the wrong result_mask.

Signed-off-by: Su Yanjun
---
   fs/xfs/xfs_iops.c | 3 +++
   1 file changed, 3 insertions(+)

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index f48ffd7..3811457 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -521,6 +521,9 @@ xfs_vn_getattr(
stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec;
}
}
+   
+   /* Only return mask that we care */
+   stat->result_mask &= request_mask;

Why not just:

stat->result_mask = STATX_BASIC_STATS;

at the top of the function?

I don't see the need to mask off result_mask at all, since we could some
day elect to return more than what's in request_mask...

When we run xfstests with nfs, the generic/423 case runs failed. So i review
the nfs'
nfs_getattr code it does validate the request_mask.

Then i review the xfs' getattr code, it has no such check. Whatever
request_mask
  is set, the stat's result_mask always the 0x7ff.

Yes, statx can return more data than what userspace callers ask for:


Maybe it has Unclear semantics about statx's result_mask.

"A filesystem may also fill in fields that the caller didn't ask for if
it has values for them available and the information is available at no
extra cost.  If this happens, the  corresponding  bits will be set in
stx_mask."

--D


I get it, then the testcase generic/423 may need update in xfstests.
Thanks for your reply.


...waitaminute, are you seeing garbage in the result_mask that's
returned to userspace?  I also noticed the vfs stat functions declare
"struct kstat stat;" without explicitly zeroing the structure fields,
which means (I think) that we can leak stack information if the kernel
isn't built with the stackleak plugin?

No such problem.

A clear problem statement and reproducer steps would be hugely useful
here.

-Eric

Thanks,
Su










Re: [PATCH] xfs: correct statx's result_mask value

2019-01-07 Thread Su Yanjun




On 1/8/2019 2:04 AM, Eric Sandeen wrote:

On 1/7/19 11:52 AM, Darrick J. Wong wrote:

On Mon, Jan 07, 2019 at 04:53:10AM -0500, Su Yanjun wrote:

For statx syscall, xfs return the wrong result_mask.

Signed-off-by: Su Yanjun
---
  fs/xfs/xfs_iops.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index f48ffd7..3811457 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -521,6 +521,9 @@ xfs_vn_getattr(
stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec;
}
}
+   
+   /* Only return mask that we care */
+   stat->result_mask &= request_mask;

Why not just:

stat->result_mask = STATX_BASIC_STATS;

at the top of the function?

I don't see the need to mask off result_mask at all, since we could some
day elect to return more than what's in request_mask...
When we run xfstests with nfs, the generic/423 case runs failed. So i 
review the nfs'

nfs_getattr code it does validate the request_mask.

Then i review the xfs' getattr code, it has no such check. Whatever 
request_mask

 is set, the stat's result_mask always the 0x7ff.

Maybe it has Unclear semantics about statx's result_mask.

...waitaminute, are you seeing garbage in the result_mask that's
returned to userspace?  I also noticed the vfs stat functions declare
"struct kstat stat;" without explicitly zeroing the structure fields,
which means (I think) that we can leak stack information if the kernel
isn't built with the stackleak plugin?

No such problem.

A clear problem statement and reproducer steps would be hugely useful
here.

-Eric

Thanks,
Su




[PATCH] xfs: correct statx's result_mask value

2019-01-07 Thread Su Yanjun
For statx syscall, xfs return the wrong result_mask.

Signed-off-by: Su Yanjun 
---
 fs/xfs/xfs_iops.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index f48ffd7..3811457 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -521,6 +521,9 @@ xfs_vn_getattr(
stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec;
}
}
+   
+   /* Only return mask that we care */
+   stat->result_mask &= request_mask;
 
if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
stat->attributes |= STATX_ATTR_IMMUTABLE;
-- 
2.7.4





[PATCH v2] vti4: Fix a ipip packet processing bug in 'IPCOMP' virtual tunnel

2019-01-06 Thread Su Yanjun
Recently we run a network test over ipcomp virtual tunnel.We find that
if a ipv4 packet needs fragment, then the peer can't receive
it.

We deep into the code and find that when packet need fragment the smaller
fragment will be encapsulated by ipip not ipcomp. So when the ipip packet
goes into xfrm, it's skb->dev is not properly set. The ipv4 reassembly code
always set skb'dev to the last fragment's dev. After ipv4 defrag processing,
when the kernel rp_filter parameter is set, the skb will be drop by -EXDEV
error.

This patch adds compatible support for the ipip process in ipcomp virtual 
tunnel.

Signed-off-by: Su Yanjun 
---
Based on Linus' master branch.

Changes from v1:
- use a separate handler for ipip packet processing in ipcomp virtual tunnel

 net/ipv4/ip_vti.c | 50 ++
 1 file changed, 50 insertions(+)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index de31b30..d155a51 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -74,6 +74,33 @@ static int vti_input(struct sk_buff *skb, int nexthdr, 
__be32 spi,
return 0;
 }
 
+static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi,
+int encap_type)
+{
+   struct ip_tunnel *tunnel;
+   const struct iphdr *iph = ip_hdr(skb);
+   struct net *net = dev_net(skb->dev);
+   struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
+
+   tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ iph->saddr, iph->daddr, 0);
+   if (tunnel) {
+   if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+   goto drop;
+
+   XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
+
+   skb->dev = tunnel->dev;
+
+   return xfrm_input(skb, nexthdr, spi, encap_type);
+   }
+
+   return -EINVAL;
+drop:
+   kfree_skb(skb);
+   return 0;
+}
+
 static int vti_rcv(struct sk_buff *skb)
 {
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
@@ -82,6 +109,14 @@ static int vti_rcv(struct sk_buff *skb)
return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
 }
 
+static int vti_rcv_ipip(struct sk_buff *skb)
+{
+   XFRM_SPI_SKB_CB(skb)->family = AF_INET;
+   XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+
+   return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 
0);
+}
+
 static int vti_rcv_cb(struct sk_buff *skb, int err)
 {
unsigned short family;
@@ -429,6 +464,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol 
__read_mostly = {
.priority   =   100,
 };
 
+static struct xfrm_tunnel ipip_handler __read_mostly = {
+   .handler=   vti_rcv_ipip,
+   .err_handler=   vti4_err,
+   .priority   =   0,
+};
+
 static int __net_init vti_init_net(struct net *net)
 {
int err;
@@ -597,6 +638,13 @@ static int __init vti_init(void)
if (err < 0)
goto xfrm_proto_comp_failed;
 
+   msg = "ipip tunnel";
+   err = xfrm4_tunnel_register(_handler, AF_INET);
+   if (err < 0) {
+   pr_info("%s: cant't register tunnel\n",__func__);
+   goto xfrm_tunnel_failed;
+   }
+
msg = "netlink interface";
err = rtnl_link_register(_link_ops);
if (err < 0)
@@ -606,6 +654,8 @@ static int __init vti_init(void)
 
 rtnl_link_failed:
xfrm4_protocol_deregister(_ipcomp4_protocol, IPPROTO_COMP);
+xfrm_tunnel_failed:
+   xfrm4_tunnel_deregister(_handler, AF_INET);
 xfrm_proto_comp_failed:
xfrm4_protocol_deregister(_ah4_protocol, IPPROTO_AH);
 xfrm_proto_ah_failed:
-- 
2.7.4





Re: [PATCH] vti4: Fix a ipip packet processing bug in 'IPCOMP' virtual tunnel

2019-01-04 Thread Su Yanjun

On 1/4/2019 3:43 PM, Steffen Klassert wrote:


On Thu, Jan 03, 2019 at 07:48:41AM -0500, Su Yanjun wrote:

Recently we run a network test over ipcomp virtual tunnel.We find that
if a ipv4 packet needs fragment, then the peer can't receive
it.

We deep into the code and find that when packet need fragment the smaller
fragment will be encapsulated by ipip not ipcomp. So when the ipip packet
goes into xfrm, it's skb->dev is not properly set. The ipv4 reassembly code
always set skb'dev to the last fragment's dev. After ipv4 defrag processing,
when the kernel rp_filter parameter is set, the skb will be drop by -EXDEV
error.

Why not just leaving rp_filter disabled or in 'loose mode' if you use ipcomp?

In my option rp_filter should not affect the ip_vti functionality.
The root cause is the origin tunnel code doesn't update skb->dev.
vti only cares about ipcomp, esp, ah packets.

This patch adds compatible support for the ipip process in ipcomp virtual 
tunnel.

Signed-off-by: Su Yanjun 
---
  net/ipv4/ip_vti.c | 25 -
  1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index de31b30..63de2f6 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -65,6 +65,9 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 
spi,
  
  		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
  
+		if (iph->protocol == IPPROTO_IPIP)

+   skb->dev = tunnel->dev;
+
return xfrm_input(skb, nexthdr, spi, encap_type);
}
  
@@ -76,10 +79,15 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
  
  static int vti_rcv(struct sk_buff *skb)

  {
+   __be32 spi = 0;
+   
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+   
+   if (ip_hdr(skb)->protocol == IPPROTO_IPIP)
+   spi = ip_hdr(skb)->saddr;
  
-	return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);

+   return vti_input(skb, ip_hdr(skb)->protocol, spi, 0);

You use the src address as spi, how is this supposed to work?

This code derives from xfrm4_tunnel and i just want the vti can handle 
ipip packet as xfrm4 tunnel does.



Thanks,
Su




[PATCH] vti4: Fix a ipip packet processing bug in 'IPCOMP' virtual tunnel

2019-01-02 Thread Su Yanjun
Recently we run a network test over ipcomp virtual tunnel.We find that
if a ipv4 packet needs fragment, then the peer can't receive
it.

We deep into the code and find that when packet need fragment the smaller
fragment will be encapsulated by ipip not ipcomp. So when the ipip packet
goes into xfrm, it's skb->dev is not properly set. The ipv4 reassembly code
always set skb'dev to the last fragment's dev. After ipv4 defrag processing,
when the kernel rp_filter parameter is set, the skb will be drop by -EXDEV
error.

This patch adds compatible support for the ipip process in ipcomp virtual 
tunnel.

Signed-off-by: Su Yanjun 
---
 net/ipv4/ip_vti.c | 25 -
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index de31b30..63de2f6 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -65,6 +65,9 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 
spi,
 
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
 
+   if (iph->protocol == IPPROTO_IPIP)
+   skb->dev = tunnel->dev;
+
return xfrm_input(skb, nexthdr, spi, encap_type);
}
 
@@ -76,10 +79,15 @@ static int vti_input(struct sk_buff *skb, int nexthdr, 
__be32 spi,
 
 static int vti_rcv(struct sk_buff *skb)
 {
+   __be32 spi = 0;
+   
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+   
+   if (ip_hdr(skb)->protocol == IPPROTO_IPIP)
+   spi = ip_hdr(skb)->saddr;
 
-   return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
+   return vti_input(skb, ip_hdr(skb)->protocol, spi, 0);
 }
 
 static int vti_rcv_cb(struct sk_buff *skb, int err)
@@ -429,6 +437,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol 
__read_mostly = {
.priority   =   100,
 };
 
+static struct xfrm_tunnel ipip_handler __read_mostly = {
+   .handler=   vti_rcv,
+   .err_handler=   vti4_err,
+   .priority   =   0,
+};
+
 static int __net_init vti_init_net(struct net *net)
 {
int err;
@@ -597,6 +611,13 @@ static int __init vti_init(void)
if (err < 0)
goto xfrm_proto_comp_failed;
 
+   msg = "ipip tunnel";
+   err = xfrm4_tunnel_register(_handler, AF_INET);
+   if (err < 0) {
+   pr_info("%s: cant't register tunnel\n",__func__);
+   goto xfrm_tunnel_failed;
+   }
+
msg = "netlink interface";
err = rtnl_link_register(_link_ops);
if (err < 0)
@@ -606,6 +627,8 @@ static int __init vti_init(void)
 
 rtnl_link_failed:
xfrm4_protocol_deregister(_ipcomp4_protocol, IPPROTO_COMP);
+xfrm_tunnel_failed:
+   xfrm4_tunnel_deregister(_handler, AF_INET);
 xfrm_proto_comp_failed:
xfrm4_protocol_deregister(_ah4_protocol, IPPROTO_AH);
 xfrm_proto_ah_failed:
-- 
2.7.4





[PATCH] ipv6: fix typo in net/ipv6/reassembly.c

2018-12-29 Thread Su Yanjun
Signed-off-by: Su Yanjun 
---
 net/ipv6/reassembly.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index a5bb59e..36a3d8d 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -210,7 +210,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct 
sk_buff *skb,
if (next && next->ip_defrag_offset < end)
goto discard_fq;
 
-   /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+   /* Note : skb->ip_defrag_offset and skb->sk share the same location */
dev = skb->dev;
if (dev)
fq->iif = dev->ifindex;
-- 
2.7.4