Re: [Cluster-devel] [PATCH 20/33] ipv4: add ip_sock_set_recverr

2020-05-13 Thread Joe Perches
On Wed, 2020-05-13 at 08:26 +0200, Christoph Hellwig wrote:
> Add a helper to directly set the IP_RECVERR sockopt from kernel space
> without going through a fake uaccess.

This seems used only with true as the second arg.
Is there reason to have that argument at all?

> diff --git a/include/net/ip.h b/include/net/ip.h
[]
> @@ -767,5 +767,6 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
>  
>  void ip_sock_set_tos(struct sock *sk, int val);
>  void ip_sock_set_freebind(struct sock *sk, bool val);
> +void ip_sock_set_recverr(struct sock *sk, bool val);
>  
>  #endif   /* _IP_H */
> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> index 0c40887a817f8..9abecc3195520 100644
> --- a/net/ipv4/ip_sockglue.c
> +++ b/net/ipv4/ip_sockglue.c
> @@ -589,6 +589,16 @@ void ip_sock_set_freebind(struct sock *sk, bool val)
>  }
>  EXPORT_SYMBOL(ip_sock_set_freebind);
>  
> +void ip_sock_set_recverr(struct sock *sk, bool val)
> +{
> + lock_sock(sk);
> + inet_sk(sk)->recverr = val;
> + if (!val)
> + skb_queue_purge(>sk_error_queue);
> + release_sock(sk);
> +}
> +EXPORT_SYMBOL(ip_sock_set_recverr);
> +
>  /*
>   *   Socket option code for IP. This is the end of the line after any
>   *   TCP,UDP etc options on an IP socket.
> diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
> index 562ea36c96b0f..1b87b8a9ff725 100644
> --- a/net/rxrpc/local_object.c
> +++ b/net/rxrpc/local_object.c
> @@ -171,13 +171,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, 
> struct net *net)
>   /* Fall through */
>   case AF_INET:
>   /* we want to receive ICMP errors */
> - opt = 1;
> - ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
> - (char *) , sizeof(opt));
> - if (ret < 0) {
> - _debug("setsockopt failed");
> - goto error;
> - }
> + ip_sock_set_recverr(local->socket->sk, true);
>  
>   /* we want to set the don't fragment bit */
>   opt = IP_PMTUDISC_DO;



Re: [Cluster-devel] remove kernel_setsockopt and kernel_getsockopt

2020-05-13 Thread David Miller
From: Christoph Hellwig 
Date: Wed, 13 May 2020 08:26:15 +0200

> Hi Dave,
> 
> this series removes the kernel_setsockopt and kernel_getsockopt
> functions, and instead switches their users to small functions that
> implement setting (or in one case getting) a sockopt directly using
> a normal kernel function call with type safety and all the other
> benefits of not having a function call.
> 
> In some cases these functions seem pretty heavy handed as they do
> a lock_sock even for just setting a single variable, but this mirrors
> the real setsockopt implementation - counter to that a few kernel
> drivers just set the fields directly already.
> 
> Nevertheless the diffstat looks quite promising:
> 
>  42 files changed, 721 insertions(+), 799 deletions(-)

Overall I'm fine with these changes, but three things need to happen
before I can think about applying this:

1) Address David's feedback about the ip_mtu*() calls that can occur
   on ipv6 sockets too.

2) Handle the feedback about dlm now bringing in sctp even if sctp
   sockets are not even used because of the symbol dependency.

3) Add the rxrpc documentation requested by David.

Thank you.



Re: [Cluster-devel] remove kernel_setsockopt and kernel_getsockopt

2020-05-13 Thread Sagi Grimberg




Hi Dave,

this series removes the kernel_setsockopt and kernel_getsockopt
functions, and instead switches their users to small functions that
implement setting (or in one case getting) a sockopt directly using
a normal kernel function call with type safety and all the other
benefits of not having a function call.

In some cases these functions seem pretty heavy handed as they do
a lock_sock even for just setting a single variable, but this mirrors
the real setsockopt implementation - counter to that a few kernel
drivers just set the fields directly already.

Nevertheless the diffstat looks quite promising:

  42 files changed, 721 insertions(+), 799 deletions(-)


For the nvme-tcp bits,

Acked-by: Sagi Grimberg 



Re: [Cluster-devel] [PATCH 32/33] sctp: add sctp_sock_get_primary_addr

2020-05-13 Thread Marcelo Ricardo Leitner
On Wed, May 13, 2020 at 08:26:47AM +0200, Christoph Hellwig wrote:
> Add a helper to directly get the SCTP_PRIMARY_ADDR sockopt from kernel
> space without going through a fake uaccess.

Same comment as on the other dlm/sctp patch.

> 
> Signed-off-by: Christoph Hellwig 
> ---
>  fs/dlm/lowcomms.c   | 11 +++-
>  include/net/sctp/sctp.h |  1 +
>  net/sctp/socket.c   | 57 +
>  3 files changed, 39 insertions(+), 30 deletions(-)
> 
> diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
> index 6fa45365666a8..46d2d71b62c57 100644
> --- a/fs/dlm/lowcomms.c
> +++ b/fs/dlm/lowcomms.c
> @@ -855,10 +855,9 @@ static int tcp_accept_from_sock(struct connection *con)
>  static int sctp_accept_from_sock(struct connection *con)
>  {
>   /* Check that the new node is in the lockspace */
> - struct sctp_prim prim;
> + struct sctp_prim prim = { };
>   int nodeid;
> - int prim_len, ret;
> - int addr_len;
> + int addr_len, ret;
>   struct connection *newcon;
>   struct connection *addcon;
>   struct socket *newsock;
> @@ -876,11 +875,7 @@ static int sctp_accept_from_sock(struct connection *con)
>   if (ret < 0)
>   goto accept_err;
>  
> - memset(, 0, sizeof(struct sctp_prim));
> - prim_len = sizeof(struct sctp_prim);
> -
> - ret = kernel_getsockopt(newsock, IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
> - (char *), _len);
> + ret = sctp_sock_get_primary_addr(con->sock->sk, );
>   if (ret < 0) {
>   log_print("getsockopt/sctp_primary_addr failed: %d", ret);
>   goto accept_err;
> diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
> index b505fa082f254..c98b1d14db853 100644
> --- a/include/net/sctp/sctp.h
> +++ b/include/net/sctp/sctp.h
> @@ -618,5 +618,6 @@ static inline bool sctp_newsk_ready(const struct sock *sk)
>  int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *kaddrs,
>   int addrs_size, int op);
>  void sctp_sock_set_nodelay(struct sock *sk, bool val);
> +int sctp_sock_get_primary_addr(struct sock *sk, struct sctp_prim *prim);
>  
>  #endif /* __net_sctp_h__ */
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index 64c395f7a86d5..39bf8090dbe1e 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -6411,6 +6411,35 @@ static int sctp_getsockopt_local_addrs(struct sock 
> *sk, int len,
>   return err;
>  }
>  
> +static int __sctp_sock_get_primary_addr(struct sock *sk, struct sctp_prim 
> *prim)
> +{
> + struct sctp_association *asoc;
> +
> + asoc = sctp_id2assoc(sk, prim->ssp_assoc_id);
> + if (!asoc)
> + return -EINVAL;
> + if (!asoc->peer.primary_path)
> + return -ENOTCONN;
> +
> + memcpy(>ssp_addr, >peer.primary_path->ipaddr,
> + asoc->peer.primary_path->af_specific->sockaddr_len);
> +
> + sctp_get_pf_specific(sk->sk_family)->addr_to_user(sctp_sk(sk),
> + (union sctp_addr *)>ssp_addr);
> + return 0;
> +}
> +
> +int sctp_sock_get_primary_addr(struct sock *sk, struct sctp_prim *prim)
> +{
> + int ret;
> +
> + lock_sock(sk);
> + ret = __sctp_sock_get_primary_addr(sk, prim);
> + release_sock(sk);
> + return ret;
> +}
> +EXPORT_SYMBOL(sctp_sock_get_primary_addr);
> +
>  /* 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR)
>   *
>   * Requests that the local SCTP stack use the enclosed peer address as
> @@ -6421,35 +6450,19 @@ static int sctp_getsockopt_primary_addr(struct sock 
> *sk, int len,
>   char __user *optval, int __user *optlen)
>  {
>   struct sctp_prim prim;
> - struct sctp_association *asoc;
> - struct sctp_sock *sp = sctp_sk(sk);
> + int ret;
>  
>   if (len < sizeof(struct sctp_prim))
>   return -EINVAL;
> -
> - len = sizeof(struct sctp_prim);
> -
> - if (copy_from_user(, optval, len))
> + if (copy_from_user(, optval, sizeof(struct sctp_prim)))
>   return -EFAULT;
>  
> - asoc = sctp_id2assoc(sk, prim.ssp_assoc_id);
> - if (!asoc)
> - return -EINVAL;
> -
> - if (!asoc->peer.primary_path)
> - return -ENOTCONN;
> -
> - memcpy(_addr, >peer.primary_path->ipaddr,
> - asoc->peer.primary_path->af_specific->sockaddr_len);
> -
> - sctp_get_pf_specific(sk->sk_family)->addr_to_user(sp,
> - (union sctp_addr *)_addr);
> + ret = __sctp_sock_get_primary_addr(sk, );
> + if (ret)
> + return ret;
>  
> - if (put_user(len, optlen))
> + if (put_user(len, optlen) || copy_to_user(optval, , len))
>   return -EFAULT;
> - if (copy_to_user(optval, , len))
> - return -EFAULT;
> -
>   return 0;
>  }
>  
> -- 
> 2.26.2
> 



Re: [Cluster-devel] [PATCH 27/33] sctp: export sctp_setsockopt_bindx

2020-05-13 Thread Marcelo Ricardo Leitner
On Wed, May 13, 2020 at 08:26:42AM +0200, Christoph Hellwig wrote:
> And call it directly from dlm instead of going through kernel_setsockopt.

The advantage on using kernel_setsockopt here is that sctp module will
only be loaded if dlm actually creates a SCTP socket.  With this
change, sctp will be loaded on setups that may not be actually using
it. It's a quite big module and might expose the system.

I'm okay with the SCTP changes, but I'll defer to DLM folks to whether
that's too bad or what for DLM.

> 
> Signed-off-by: Christoph Hellwig 
> ---
>  fs/dlm/lowcomms.c   | 13 -
>  include/net/sctp/sctp.h |  3 +++
>  net/sctp/socket.c   |  5 +++--
>  3 files changed, 14 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
> index b722a09a7ca05..e4939d770df53 100644
> --- a/fs/dlm/lowcomms.c
> +++ b/fs/dlm/lowcomms.c
> @@ -1005,14 +1005,17 @@ static int sctp_bind_addrs(struct connection *con, 
> uint16_t port)
>   memcpy(, dlm_local_addr[i], sizeof(localaddr));
>   make_sockaddr(, port, _len);
>  
> - if (!i)
> + if (!i) {
>   result = kernel_bind(con->sock,
>(struct sockaddr *),
>addr_len);
> - else
> - result = kernel_setsockopt(con->sock, SOL_SCTP,
> -SCTP_SOCKOPT_BINDX_ADD,
> -(char *), 
> addr_len);
> + } else {
> + lock_sock(con->sock->sk);
> + result = sctp_setsockopt_bindx(con->sock->sk,
> + (struct sockaddr *), addr_len,
> + SCTP_BINDX_ADD_ADDR);
> + release_sock(con->sock->sk);
> + }
>  
>   if (result < 0) {
>   log_print("Can't bind to %d addr number %d, %d.\n",
> diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
> index 3ab5c6bbb90bd..f702b14d768ba 100644
> --- a/include/net/sctp/sctp.h
> +++ b/include/net/sctp/sctp.h
> @@ -615,4 +615,7 @@ static inline bool sctp_newsk_ready(const struct sock *sk)
>   return sock_flag(sk, SOCK_DEAD) || sk->sk_socket;
>  }
>  
> +int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *kaddrs,
> + int addrs_size, int op);
> +
>  #endif /* __net_sctp_h__ */
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index 1c96b52c4aa28..30c981d9f6158 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -979,8 +979,8 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct 
> sctp_sockaddr_entry *addrw)
>   *
>   * Returns 0 if ok, <0 errno code on error.
>   */
> -static int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *kaddrs,
> -  int addrs_size, int op)
> +int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *kaddrs,
> + int addrs_size, int op)
>  {
>   int err;
>   int addrcnt = 0;
> @@ -1032,6 +1032,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, 
> struct sockaddr *kaddrs,
>   return -EINVAL;
>   }
>  }
> +EXPORT_SYMBOL(sctp_setsockopt_bindx);
>  
>  static int sctp_connect_new_asoc(struct sctp_endpoint *ep,
>const union sctp_addr *daddr,
> -- 
> 2.26.2
> 



Re: [Cluster-devel] remove kernel_setsockopt and kernel_getsockopt

2020-05-13 Thread Joe Perches
On Wed, 2020-05-13 at 08:26 +0200, Christoph Hellwig wrote:
> this series removes the kernel_setsockopt and kernel_getsockopt
> functions, and instead switches their users to small functions that
> implement setting (or in one case getting) a sockopt directly using
> a normal kernel function call with type safety and all the other
> benefits of not having a function call.
> 
> In some cases these functions seem pretty heavy handed as they do
> a lock_sock even for just setting a single variable, but this mirrors
> the real setsockopt implementation - counter to that a few kernel
> drivers just set the fields directly already.
> 
> Nevertheless the diffstat looks quite promising:
> 
>  42 files changed, 721 insertions(+), 799 deletions(-)

trivia:

It might be useful to show overall object size change.

More EXPORT_SYMBOL uses increase object size a little.

And not sure it matters much except it reduces overall object
size, but these patches remove (unnecessary) logging on error
and that could be mentioned in the cover letter too.

e.g.:

-   ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_LINGER,
-   (char *), sizeof(sol));
-   if (ret) {
-   dev_err(nctrl->device,
-   "failed to set SO_LINGER sock opt %d\n", ret);
-   goto err_sock;
-   }
+   sock_set_linger(queue->sock->sk, true, 0);





Re: [Cluster-devel] [PATCH 06/33] net: add sock_set_timestamps

2020-05-13 Thread David Howells
Christoph Hellwig  wrote:

> Add a helper to directly set the SO_TIMESTAMP* sockopts from kernel space
> without going through a fake uaccess.
> 
> Signed-off-by: Christoph Hellwig 

Reviewed-by: David Howells 



Re: [Cluster-devel] [PATCH 23/33] ipv6: add ip6_sock_set_recverr

2020-05-13 Thread David Howells
Christoph Hellwig  wrote:

> Add a helper to directly set the IPV6_RECVERR sockopt from kernel space
> without going through a fake uaccess.
> 
> Signed-off-by: Christoph Hellwig 

Reviewed-by: David Howells 



Re: [Cluster-devel] [PATCH 20/33] ipv4: add ip_sock_set_recverr

2020-05-13 Thread David Howells
Christoph Hellwig  wrote:

> Add a helper to directly set the IP_RECVERR sockopt from kernel space
> without going through a fake uaccess.

It looks like if this is an AF_INET6 socket, it will just pass the message
straight through to AF_INET4, so:

Reviewed-by: David Howells 



Re: [Cluster-devel] [PATCH 21/33] ipv4: add ip_sock_set_mtu_discover

2020-05-13 Thread David Howells
Christoph Hellwig  wrote:

> + ip_sock_set_mtu_discover(conn->params.local->socket->sk,
> + IP_PMTUDISC_DONT);

Um... The socket in question could be an AF_INET6 socket, not an AF_INET4
socket - I presume it will work in that case.  If so:

Reviewed-by: David Howells  [rxrpc bits]



Re: [Cluster-devel] [PATCH 29/33] rxrpc_sock_set_min_security_level

2020-05-13 Thread David Howells
Christoph Hellwig  wrote:

> +int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val);
> +

Looks good - but you do need to add this to Documentation/networking/rxrpc.txt
also, thanks.

David



Re: [Cluster-devel] [PATCH 11/33] tcp: tcp_sock_set_nodelay

2020-05-13 Thread Jason Gunthorpe
On Wed, May 13, 2020 at 08:26:26AM +0200, Christoph Hellwig wrote:
> Add a helper to directly set the TCP_NODELAY sockopt from kernel space
> without going through a fake uaccess.  Cleanup the callers to avoid
> pointless wrappers now that this is a simple function call.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  drivers/block/drbd/drbd_int.h |  7 
>  drivers/block/drbd/drbd_main.c|  2 +-
>  drivers/block/drbd/drbd_receiver.c|  4 +--
>  drivers/infiniband/sw/siw/siw_cm.c| 24 +++---
>  drivers/nvme/host/tcp.c   |  9 +-
>  drivers/nvme/target/tcp.c | 12 ++-
>  drivers/target/iscsi/iscsi_target_login.c | 15 ++---
>  fs/cifs/connect.c | 10 ++
>  fs/dlm/lowcomms.c |  8 ++---
>  fs/ocfs2/cluster/tcp.c| 20 ++--
>  include/linux/tcp.h   |  1 +
>  net/ceph/messenger.c  | 11 ++-
>  net/ipv4/tcp.c| 39 +++
>  net/rds/tcp.c | 11 +--
>  net/rds/tcp.h |  1 -
>  net/rds/tcp_listen.c  |  2 +-
>  16 files changed, 49 insertions(+), 127 deletions(-)

No problem with the siw change

Acked-by: Jason Gunthorpe 

Jason



[Cluster-devel] [PATCH 05/33] net: add sock_bindtoindex

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the SO_BINDTOIFINDEX sockopt from kernel
space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 include/net/sock.h|  1 +
 net/core/sock.c   | 21 +++--
 net/ipv4/udp_tunnel.c |  4 +---
 net/ipv6/ip6_udp_tunnel.c |  4 +---
 4 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 809596ffd32d2..b63ea15362065 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2691,5 +2691,6 @@ void sock_set_reuseaddr(struct sock *sk, unsigned char 
reuse);
 void sock_set_linger(struct sock *sk, bool onoff, unsigned int linger);
 void sock_set_priority(struct sock *sk, u32 priority);
 void sock_set_sndtimeo(struct sock *sk, unsigned int secs);
+int sock_bindtoindex(struct sock *sk, int ifindex);
 
 #endif /* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 76527681e50b9..4b7439308caec 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -566,7 +566,7 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
 }
 EXPORT_SYMBOL(sk_dst_check);
 
-static int sock_setbindtodevice_locked(struct sock *sk, int ifindex)
+static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
 {
int ret = -ENOPROTOOPT;
 #ifdef CONFIG_NETDEVICES
@@ -594,6 +594,18 @@ static int sock_setbindtodevice_locked(struct sock *sk, 
int ifindex)
return ret;
 }
 
+int sock_bindtoindex(struct sock *sk, int ifindex)
+{
+   int ret;
+
+   lock_sock(sk);
+   ret = sock_bindtoindex_locked(sk, ifindex);
+   release_sock(sk);
+
+   return ret;
+}
+EXPORT_SYMBOL(sock_bindtoindex);
+
 static int sock_setbindtodevice(struct sock *sk, char __user *optval,
int optlen)
 {
@@ -634,10 +646,7 @@ static int sock_setbindtodevice(struct sock *sk, char 
__user *optval,
goto out;
}
 
-   lock_sock(sk);
-   ret = sock_setbindtodevice_locked(sk, index);
-   release_sock(sk);
-
+   return sock_bindtoindex(sk, index);
 out:
 #endif
 
@@ -1221,7 +1230,7 @@ int sock_setsockopt(struct socket *sock, int level, int 
optname,
break;
 
case SO_BINDTOIFINDEX:
-   ret = sock_setbindtodevice_locked(sk, val);
+   ret = sock_bindtoindex_locked(sk, val);
break;
 
default:
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 150e6f0fdbf59..2158e8bddf41c 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -22,9 +22,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg 
*cfg,
goto error;
 
if (cfg->bind_ifindex) {
-   err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
-   (void *)>bind_ifindex,
-   sizeof(cfg->bind_ifindex));
+   err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
if (err < 0)
goto error;
}
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 58956a6b66a21..6523609516d25 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -33,9 +33,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg 
*cfg,
goto error;
}
if (cfg->bind_ifindex) {
-   err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
-   (void *)>bind_ifindex,
-   sizeof(cfg->bind_ifindex));
+   err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
if (err < 0)
goto error;
}
-- 
2.26.2



[Cluster-devel] [PATCH 06/33] net: add sock_set_timestamps

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the SO_TIMESTAMP* sockopts from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 include/net/sock.h   |  1 +
 net/core/sock.c  | 47 +---
 net/rxrpc/local_object.c |  8 +--
 3 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index b63ea15362065..cf8a30e0168de 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2692,5 +2692,6 @@ void sock_set_linger(struct sock *sk, bool onoff, 
unsigned int linger);
 void sock_set_priority(struct sock *sk, u32 priority);
 void sock_set_sndtimeo(struct sock *sk, unsigned int secs);
 int sock_bindtoindex(struct sock *sk, int ifindex);
+void sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns);
 
 #endif /* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 4b7439308caec..1589f242ecc7e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -772,6 +772,28 @@ void sock_set_sndtimeo(struct sock *sk, unsigned int secs)
 }
 EXPORT_SYMBOL(sock_set_sndtimeo);
 
+static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
+{
+   if (val)  {
+   sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
+   sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
+   sock_set_flag(sk, SOCK_RCVTSTAMP);
+   sock_enable_timestamp(sk, SOCK_TIMESTAMP);
+   } else {
+   sock_reset_flag(sk, SOCK_RCVTSTAMP);
+   sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+   sock_reset_flag(sk, SOCK_TSTAMP_NEW);
+   }
+}
+
+void sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
+{
+   lock_sock(sk);
+   __sock_set_timestamps(sk, val, new, ns);
+   release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_timestamps);
+
 /*
  * This is meant for all protocols to use and covers goings on
  * at the socket level. Everything here is generic.
@@ -953,28 +975,17 @@ int sock_setsockopt(struct socket *sock, int level, int 
optname,
break;
 
case SO_TIMESTAMP_OLD:
+   __sock_set_timestamps(sk, valbool, false, false);
+   break;
case SO_TIMESTAMP_NEW:
+   __sock_set_timestamps(sk, valbool, true, false);
+   break;
case SO_TIMESTAMPNS_OLD:
+   __sock_set_timestamps(sk, valbool, false, true);
+   break;
case SO_TIMESTAMPNS_NEW:
-   if (valbool)  {
-   if (optname == SO_TIMESTAMP_NEW || optname == 
SO_TIMESTAMPNS_NEW)
-   sock_set_flag(sk, SOCK_TSTAMP_NEW);
-   else
-   sock_reset_flag(sk, SOCK_TSTAMP_NEW);
-
-   if (optname == SO_TIMESTAMP_OLD || optname == 
SO_TIMESTAMP_NEW)
-   sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
-   else
-   sock_set_flag(sk, SOCK_RCVTSTAMPNS);
-   sock_set_flag(sk, SOCK_RCVTSTAMP);
-   sock_enable_timestamp(sk, SOCK_TIMESTAMP);
-   } else {
-   sock_reset_flag(sk, SOCK_RCVTSTAMP);
-   sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
-   sock_reset_flag(sk, SOCK_TSTAMP_NEW);
-   }
+   __sock_set_timestamps(sk, valbool, true, true);
break;
-
case SO_TIMESTAMPING_NEW:
sock_set_flag(sk, SOCK_TSTAMP_NEW);
/* fall through */
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 01135e54d95d2..562ea36c96b0f 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -189,13 +189,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, 
struct net *net)
}
 
/* We want receive timestamps. */
-   opt = 1;
-   ret = kernel_setsockopt(local->socket, SOL_SOCKET, 
SO_TIMESTAMPNS_OLD,
-   (char *), sizeof(opt));
-   if (ret < 0) {
-   _debug("setsockopt failed");
-   goto error;
-   }
+   sock_set_timestamps(local->socket->sk, true, false, true);
break;
 
default:
-- 
2.26.2



[Cluster-devel] [PATCH 13/33] tcp: add tcp_sock_set_syncnt

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the TCP_SYNCNT sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 drivers/nvme/host/tcp.c |  9 +
 include/linux/tcp.h |  1 +
 net/ipv4/tcp.c  | 12 
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index a8070f93fd0a0..8417eeb83fcd2 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1336,14 +1336,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
}
 
/* Single syn retry */
-   opt = 1;
-   ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, TCP_SYNCNT,
-   (char *), sizeof(opt));
-   if (ret) {
-   dev_err(nctrl->device,
-   "failed to set TCP_SYNCNT sock opt %d\n", ret);
-   goto err_sock;
-   }
+   tcp_sock_set_syncnt(queue->sock->sk, 1);
 
/* Set TCP no delay */
tcp_sock_set_nodelay(queue->sock->sk, true);
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index e7ab6da5111b5..77b832acf3398 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -497,5 +497,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, 
int pcount,
 void tcp_sock_set_cork(struct sock *sk, bool on);
 void tcp_sock_set_nodelay(struct sock *sk, bool on);
 void tcp_sock_set_quickack(struct sock *sk, int val);
+int tcp_sock_set_syncnt(struct sock *sk, int val);
 
 #endif /* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c681f43f0bb85..773b5cd366ab7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2870,6 +2870,18 @@ void tcp_sock_set_quickack(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(tcp_sock_set_quickack);
 
+int tcp_sock_set_syncnt(struct sock *sk, int val)
+{
+   if (val < 1 || val > MAX_TCP_SYNCNT)
+   return -EINVAL;
+
+   lock_sock(sk);
+   inet_csk(sk)->icsk_syn_retries = val;
+   release_sock(sk);
+   return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_syncnt);
+
 /*
  * Socket option code for TCP.
  */
-- 
2.26.2



[Cluster-devel] [PATCH 16/33] tcp: add tcp_sock_set_keepintvl

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the TCP_KEEPINTVL sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 include/linux/tcp.h   |  1 +
 net/ipv4/tcp.c| 12 
 net/rds/tcp_listen.c  |  4 +---
 net/sunrpc/xprtsock.c |  3 +--
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4d3a3e959e45b..dad18ca361c01 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -500,5 +500,6 @@ void tcp_sock_set_quickack(struct sock *sk, int val);
 int tcp_sock_set_syncnt(struct sock *sk, int val);
 void tcp_sock_set_user_timeout(struct sock *sk, u32 val);
 int tcp_sock_set_keepidle(struct sock *sk, int val);
+int tcp_sock_set_keepintvl(struct sock *sk, int val);
 
 #endif /* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 22eb9159c7d05..b714f2b2fa54e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2923,6 +2923,18 @@ int tcp_sock_set_keepidle(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(tcp_sock_set_keepidle);
 
+int tcp_sock_set_keepintvl(struct sock *sk, int val)
+{
+   if (val < 1 || val > MAX_TCP_KEEPINTVL)
+   return -EINVAL;
+
+   lock_sock(sk);
+   tcp_sk(sk)->keepalive_intvl = val * HZ;
+   release_sock(sk);
+   return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepintvl);
+
 /*
  * Socket option code for TCP.
  */
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 8c76969d8c878..a5db2f8bb7339 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -53,12 +53,10 @@ int rds_tcp_keepalive(struct socket *sock)
goto bail;
 
tcp_sock_set_keepidle(sock->sk, keepidle);
-
/* KEEPINTVL is the interval between successive probes. We follow
 * the model in xs_tcp_finish_connecting() and re-use keepidle.
 */
-   ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL,
-   (char *), sizeof(keepidle));
+   tcp_sock_set_keepintvl(sock->sk, keepidle);
 bail:
return ret;
 }
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ea79446789c69..e20de4a52edb7 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2108,8 +2108,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt 
*xprt,
/* TCP Keepalive options */
sock_set_keepalive(sock->sk, 1);
tcp_sock_set_keepidle(sock->sk, keepidle);
-   kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
-   (char *), sizeof(keepidle));
+   tcp_sock_set_keepintvl(sock->sk, keepidle);
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
(char *), sizeof(keepcnt));
 
-- 
2.26.2



[Cluster-devel] [PATCH 04/33] net: add sock_set_sndtimeo

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the SO_SNDTIMEO_NEW sockopt from kernel
space without going through a fake uaccess.  The interface is
simplified to only pass the seconds value, as that is the only
thing needed at the moment.

Signed-off-by: Christoph Hellwig 
---
 fs/dlm/lowcomms.c  |  8 ++--
 include/net/sock.h |  1 +
 net/core/sock.c| 11 +++
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 48e7ba796c6fb..0c0a6413fdfcc 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1035,7 +1035,6 @@ static void sctp_connect_to_sock(struct connection *con)
int result;
int addr_len;
struct socket *sock;
-   struct __kernel_sock_timeval tv = { .tv_sec = 5, .tv_usec = 0 };
 
if (con->nodeid == 0) {
log_print("attempt to connect sock 0 foiled");
@@ -1087,13 +1086,10 @@ static void sctp_connect_to_sock(struct connection *con)
 * since O_NONBLOCK argument in connect() function does not work here,
 * then, we should restore the default value of this attribute.
 */
-   kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *),
- sizeof(tv));
+   sock_set_sndtimeo(sock->sk, 5);
result = sock->ops->connect(sock, (struct sockaddr *), addr_len,
   0);
-   memset(, 0, sizeof(tv));
-   kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *),
- sizeof(tv));
+   sock_set_sndtimeo(sock->sk, 0);
 
if (result == -EINPROGRESS)
result = 0;
diff --git a/include/net/sock.h b/include/net/sock.h
index cce11782dc295..809596ffd32d2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2690,5 +2690,6 @@ void sock_def_readable(struct sock *sk);
 void sock_set_reuseaddr(struct sock *sk, unsigned char reuse);
 void sock_set_linger(struct sock *sk, bool onoff, unsigned int linger);
 void sock_set_priority(struct sock *sk, u32 priority);
+void sock_set_sndtimeo(struct sock *sk, unsigned int secs);
 
 #endif /* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index e9f1e2247b004..76527681e50b9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -752,6 +752,17 @@ void sock_set_priority(struct sock *sk, u32 priority)
 }
 EXPORT_SYMBOL(sock_set_priority);
 
+void sock_set_sndtimeo(struct sock *sk, unsigned int secs)
+{
+   lock_sock(sk);
+   if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
+   sk->sk_sndtimeo = secs * HZ;
+   else
+   sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+   release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_sndtimeo);
+
 /*
  * This is meant for all protocols to use and covers goings on
  * at the socket level. Everything here is generic.
-- 
2.26.2



[Cluster-devel] [PATCH 01/33] net: add sock_set_reuseaddr

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the SO_REUSEADDR sockopt from kernel space
without going through a fake uaccess.

For this the iscsi target now has to formally depend on inet to avoid
a mostly theoretical compile failure.  For actual operation it already
did depend on having ipv4 or ipv6 support.

Signed-off-by: Christoph Hellwig 
---
 drivers/infiniband/sw/siw/siw_cm.c| 18 +-
 drivers/nvme/target/tcp.c |  8 +---
 drivers/target/iscsi/Kconfig  |  2 +-
 drivers/target/iscsi/iscsi_target_login.c |  9 +
 fs/dlm/lowcomms.c |  6 +-
 include/net/sock.h|  1 +
 net/core/sock.c   |  8 
 7 files changed, 18 insertions(+), 34 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw_cm.c 
b/drivers/infiniband/sw/siw/siw_cm.c
index 559e5fd3bad8b..6d7c8c933736c 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -1312,17 +1312,14 @@ static void siw_cm_llp_state_change(struct sock *sk)
 static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
  struct sockaddr *raddr)
 {
-   int rv, flags = 0, s_val = 1;
+   int rv, flags = 0;
size_t size = laddr->sa_family == AF_INET ?
sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
 
/*
 * Make address available again asap.
 */
-   rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)_val,
-  sizeof(s_val));
-   if (rv < 0)
-   return rv;
+   sock_set_reuseaddr(s->sk, SK_CAN_REUSE);
 
rv = s->ops->bind(s, laddr, size);
if (rv < 0)
@@ -1781,7 +1778,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
struct siw_cep *cep = NULL;
struct siw_device *sdev = to_siw_dev(id->device);
int addr_family = id->local_addr.ss_family;
-   int rv = 0, s_val;
+   int rv = 0;
 
if (addr_family != AF_INET && addr_family != AF_INET6)
return -EAFNOSUPPORT;
@@ -1793,13 +1790,8 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
/*
 * Allow binding local port when still in TIME_WAIT from last close.
 */
-   s_val = 1;
-   rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)_val,
-  sizeof(s_val));
-   if (rv) {
-   siw_dbg(id->device, "setsockopt error: %d\n", rv);
-   goto error;
-   }
+   sock_set_reuseaddr(s->sk, SK_CAN_REUSE);
+
if (addr_family == AF_INET) {
struct sockaddr_in *laddr = _sockaddr_in(id->local_addr);
 
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index f0da04e960f40..791aa32beeb98 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1632,6 +1632,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
port->sock->sk->sk_user_data = port;
port->data_ready = port->sock->sk->sk_data_ready;
port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready;
+   sock_set_reuseaddr(port->sock->sk, SK_CAN_REUSE);
 
opt = 1;
ret = kernel_setsockopt(port->sock, IPPROTO_TCP,
@@ -1641,13 +1642,6 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
goto err_sock;
}
 
-   ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_REUSEADDR,
-   (char *), sizeof(opt));
-   if (ret) {
-   pr_err("failed to set SO_REUSEADDR sock opt %d\n", ret);
-   goto err_sock;
-   }
-
if (so_priority > 0) {
ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_PRIORITY,
(char *)_priority, sizeof(so_priority));
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig
index 1f93ea3813536..922484ea4e304 100644
--- a/drivers/target/iscsi/Kconfig
+++ b/drivers/target/iscsi/Kconfig
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config ISCSI_TARGET
tristate "Linux-iSCSI.org iSCSI Target Mode Stack"
-   depends on NET
+   depends on INET
select CRYPTO
select CRYPTO_CRC32C
select CRYPTO_CRC32C_INTEL if X86
diff --git a/drivers/target/iscsi/iscsi_target_login.c 
b/drivers/target/iscsi/iscsi_target_login.c
index 731ee67fe914b..7da59ece3eb99 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -909,14 +909,7 @@ int iscsit_setup_np(
}
}
 
-   /* FIXME: Someone please explain why this is endian-safe */
-   ret = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
-   (char *), sizeof(opt));
-   if (ret < 0) {
-   pr_err("kernel_setsockopt() for SO_REUSEADDR"
-   " failed\n");
-   goto fail;
-   }
+   sock_set_reuseaddr(sock->sk, 

[Cluster-devel] [PATCH 03/33] net: add sock_set_priority

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the SO_PRIORITY sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 drivers/nvme/host/tcp.c   | 12 ++--
 drivers/nvme/target/tcp.c | 18 --
 include/net/sock.h|  1 +
 net/core/sock.c   |  8 
 4 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 5cacb61c73229..cd6a8fc14a139 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1362,16 +1362,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 */
sock_set_linger(queue->sock->sk, true, 0);
 
-   if (so_priority > 0) {
-   ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY,
-   (char *)_priority, sizeof(so_priority));
-   if (ret) {
-   dev_err(ctrl->ctrl.device,
-   "failed to set SO_PRIORITY sock opt, ret %d\n",
-   ret);
-   goto err_sock;
-   }
-   }
+   if (so_priority > 0)
+   sock_set_priority(queue->sock->sk, so_priority);
 
/* Set socket type of service */
if (nctrl->opts->tos >= 0) {
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 87aba417189d2..778c1ce3137b7 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1448,12 +1448,8 @@ static int nvmet_tcp_set_queue_sock(struct 
nvmet_tcp_queue *queue)
 */
sock_set_linger(sock->sk, true, 0);
 
-   if (so_priority > 0) {
-   ret = kernel_setsockopt(sock, SOL_SOCKET, SO_PRIORITY,
-   (char *)_priority, sizeof(so_priority));
-   if (ret)
-   return ret;
-   }
+   if (so_priority > 0)
+   sock_set_priority(sock->sk, so_priority);
 
/* Set socket type of service */
if (inet->rcv_tos > 0) {
@@ -1638,14 +1634,8 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
goto err_sock;
}
 
-   if (so_priority > 0) {
-   ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_PRIORITY,
-   (char *)_priority, sizeof(so_priority));
-   if (ret) {
-   pr_err("failed to set SO_PRIORITY sock opt %d\n", ret);
-   goto err_sock;
-   }
-   }
+   if (so_priority > 0)
+   sock_set_priority(port->sock->sk, so_priority);
 
ret = kernel_bind(port->sock, (struct sockaddr *)>addr,
sizeof(port->addr));
diff --git a/include/net/sock.h b/include/net/sock.h
index 60890fb47fbc0..cce11782dc295 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2689,5 +2689,6 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, 
int dif)
 void sock_def_readable(struct sock *sk);
 void sock_set_reuseaddr(struct sock *sk, unsigned char reuse);
 void sock_set_linger(struct sock *sk, bool onoff, unsigned int linger);
+void sock_set_priority(struct sock *sk, u32 priority);
 
 #endif /* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index cbc5104ca3515..e9f1e2247b004 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -744,6 +744,14 @@ void sock_set_linger(struct sock *sk, bool onoff, unsigned 
int linger)
 }
 EXPORT_SYMBOL(sock_set_linger);
 
+void sock_set_priority(struct sock *sk, u32 priority)
+{
+   lock_sock(sk);
+   sk->sk_priority = priority;
+   release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_priority);
+
 /*
  * This is meant for all protocols to use and covers goings on
  * at the socket level. Everything here is generic.
-- 
2.26.2



[Cluster-devel] [PATCH 02/33] net: add sock_set_linger

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the SO_LINGER sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 drivers/nvme/host/tcp.c   |  9 +
 drivers/nvme/target/tcp.c |  6 +-
 include/net/sock.h|  1 +
 net/core/sock.c   | 36 +---
 net/rds/tcp_listen.c  |  8 +---
 net/sunrpc/svcsock.c  | 12 ++--
 6 files changed, 31 insertions(+), 41 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index c15a92163c1f7..5cacb61c73229 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1313,7 +1313,6 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 {
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = >queues[qid];
-   struct linger sol = { .l_onoff = 1, .l_linger = 0 };
int ret, opt, rcv_pdu_size;
 
queue->ctrl = ctrl;
@@ -1361,13 +1360,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 * close. This is done to prevent stale data from being sent should
 * the network connection be restored before TCP times out.
 */
-   ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_LINGER,
-   (char *), sizeof(sol));
-   if (ret) {
-   dev_err(nctrl->device,
-   "failed to set SO_LINGER sock opt %d\n", ret);
-   goto err_sock;
-   }
+   sock_set_linger(queue->sock->sk, true, 0);
 
if (so_priority > 0) {
ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY,
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 791aa32beeb98..87aba417189d2 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1429,7 +1429,6 @@ static int nvmet_tcp_set_queue_sock(struct 
nvmet_tcp_queue *queue)
 {
struct socket *sock = queue->sock;
struct inet_sock *inet = inet_sk(sock->sk);
-   struct linger sol = { .l_onoff = 1, .l_linger = 0 };
int ret;
 
ret = kernel_getsockname(sock,
@@ -1447,10 +1446,7 @@ static int nvmet_tcp_set_queue_sock(struct 
nvmet_tcp_queue *queue)
 * close. This is done to prevent stale data from being sent should
 * the network connection be restored before TCP times out.
 */
-   ret = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
-   (char *), sizeof(sol));
-   if (ret)
-   return ret;
+   sock_set_linger(sock->sk, true, 0);
 
if (so_priority > 0) {
ret = kernel_setsockopt(sock, SOL_SOCKET, SO_PRIORITY,
diff --git a/include/net/sock.h b/include/net/sock.h
index e801a147ad746..60890fb47fbc0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2688,5 +2688,6 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, 
int dif)
 
 void sock_def_readable(struct sock *sk);
 void sock_set_reuseaddr(struct sock *sk, unsigned char reuse);
+void sock_set_linger(struct sock *sk, bool onoff, unsigned int linger);
 
 #endif /* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index ff4faa3e68ac4..cbc5104ca3515 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -720,6 +720,30 @@ void sock_set_reuseaddr(struct sock *sk, unsigned char 
reuse)
 }
 EXPORT_SYMBOL(sock_set_reuseaddr);
 
+static void __sock_set_linger(struct sock *sk, bool onoff, unsigned int linger)
+{
+   if (!onoff) {
+   sock_reset_flag(sk, SOCK_LINGER);
+   return;
+   }
+
+#if (BITS_PER_LONG == 32)
+   if (linger >= MAX_SCHEDULE_TIMEOUT / HZ)
+   sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+   else
+#endif
+   sk->sk_lingertime = linger * HZ;
+   sock_set_flag(sk, SOCK_LINGER);
+}
+
+void sock_set_linger(struct sock *sk, bool onoff, unsigned int linger)
+{
+   lock_sock(sk);
+   __sock_set_linger(sk, onoff, linger);
+   release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_linger);
+
 /*
  * This is meant for all protocols to use and covers goings on
  * at the socket level. Everything here is generic.
@@ -886,17 +910,7 @@ int sock_setsockopt(struct socket *sock, int level, int 
optname,
ret = -EFAULT;
break;
}
-   if (!ling.l_onoff)
-   sock_reset_flag(sk, SOCK_LINGER);
-   else {
-#if (BITS_PER_LONG == 32)
-   if ((unsigned int)ling.l_linger >= 
MAX_SCHEDULE_TIMEOUT/HZ)
-   sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
-   else
-#endif
-   sk->sk_lingertime = (unsigned int)ling.l_linger 
* HZ;
-   sock_set_flag(sk, SOCK_LINGER);
-   }
+   __sock_set_linger(sk, ling.l_onoff, ling.l_linger);
break;
 
case SO_BSDCOMPAT:
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 

[Cluster-devel] [PATCH 10/33] tcp: add tcp_sock_set_cork

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the TCP_CORK sockopt from kernel space
without going through a fake uaccess.  Cleanup the callers to avoid
pointless wrappers now that this is a simple function call.

Signed-off-by: Christoph Hellwig 
---
 drivers/block/drbd/drbd_int.h  | 14 
 drivers/block/drbd/drbd_receiver.c |  4 +--
 drivers/block/drbd/drbd_worker.c   |  6 ++--
 fs/cifs/transport.c|  8 ++---
 include/linux/tcp.h|  2 ++
 net/ipv4/tcp.c | 51 +++---
 net/rds/tcp_send.c |  9 ++
 7 files changed, 43 insertions(+), 51 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index aae99a2d7bd40..3550adc93c68b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1570,20 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device 
*device, int tcq_enabled);
 extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head 
*to_be_freed);
 extern int drbd_connected(struct drbd_peer_device *);
 
-static inline void drbd_tcp_cork(struct socket *sock)
-{
-   int val = 1;
-   (void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK,
-   (char*), sizeof(val));
-}
-
-static inline void drbd_tcp_uncork(struct socket *sock)
-{
-   int val = 0;
-   (void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK,
-   (char*), sizeof(val));
-}
-
 static inline void drbd_tcp_nodelay(struct socket *sock)
 {
int val = 1;
diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index c15e7083b13a6..55ea907ad33cb 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -6162,7 +6162,7 @@ void drbd_send_acks_wf(struct work_struct *ws)
rcu_read_unlock();
 
if (tcp_cork)
-   drbd_tcp_cork(connection->meta.socket);
+   tcp_sock_set_cork(connection->meta.socket->sk, true);
 
err = drbd_finish_peer_reqs(device);
kref_put(>kref, drbd_destroy_device);
@@ -6175,7 +6175,7 @@ void drbd_send_acks_wf(struct work_struct *ws)
}
 
if (tcp_cork)
-   drbd_tcp_uncork(connection->meta.socket);
+   tcp_sock_set_cork(connection->meta.socket->sk, false);
 
return;
 }
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 0dc019da1f8d0..2b89c9f2ca707 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -2098,7 +2098,7 @@ static void wait_for_work(struct drbd_connection 
*connection, struct list_head *
if (uncork) {
mutex_lock(>data.mutex);
if (connection->data.socket)
-   drbd_tcp_uncork(connection->data.socket);
+   tcp_sock_set_cork(connection->data.socket->sk, false);
mutex_unlock(>data.mutex);
}
 
@@ -2153,9 +2153,9 @@ static void wait_for_work(struct drbd_connection 
*connection, struct list_head *
mutex_lock(>data.mutex);
if (connection->data.socket) {
if (cork)
-   drbd_tcp_cork(connection->data.socket);
+   tcp_sock_set_cork(connection->data.socket->sk, true);
else if (!uncork)
-   drbd_tcp_uncork(connection->data.socket);
+   tcp_sock_set_cork(connection->data.socket->sk, false);
}
mutex_unlock(>data.mutex);
 }
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c97570eb2c180..99760063e0006 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -325,7 +325,6 @@ __smb_send_rqst(struct TCP_Server_Info *server, int 
num_rqst,
size_t total_len = 0, sent, size;
struct socket *ssocket = server->ssocket;
struct msghdr smb_msg;
-   int val = 1;
__be32 rfc1002_marker;
 
if (cifs_rdma_enabled(server)) {
@@ -345,8 +344,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int 
num_rqst,
}
 
/* cork the socket */
-   kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
-   (char *), sizeof(val));
+   tcp_sock_set_cork(ssocket->sk, true);
 
for (j = 0; j < num_rqst; j++)
send_length += smb_rqst_len(server, [j]);
@@ -435,9 +433,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int 
num_rqst,
}
 
/* uncork it */
-   val = 0;
-   kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
-   (char *), sizeof(val));
+   tcp_sock_set_cork(ssocket->sk, false);
 
if ((total_len > 0) && (total_len != send_length)) {
cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating 
session\n",
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index e60db06ec28d7..7ef0f975a7658 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -494,4 +494,6 @@ static inline u16 

[Cluster-devel] [PATCH 19/33] ipv4: add ip_sock_set_freebind

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the IP_FREEBIND sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 drivers/target/iscsi/iscsi_target_login.c | 13 +++--
 include/net/ip.h  |  1 +
 net/ipv4/ip_sockglue.c|  8 
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_login.c 
b/drivers/target/iscsi/iscsi_target_login.c
index 165fa573bcb29..9f69e16cfef5f 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include /* TCP_NODELAY */
+#include 
 #include  /* ipv6_addr_v4mapped() */
 #include 
 #include 
@@ -855,7 +856,7 @@ int iscsit_setup_np(
struct sockaddr_storage *sockaddr)
 {
struct socket *sock = NULL;
-   int backlog = ISCSIT_TCP_BACKLOG, ret, opt = 0, len;
+   int backlog = ISCSIT_TCP_BACKLOG, ret, len;
 
switch (np->np_network_transport) {
case ISCSI_TCP:
@@ -900,15 +901,7 @@ int iscsit_setup_np(
if (np->np_network_transport == ISCSI_TCP)
tcp_sock_set_nodelay(sock->sk, true);
sock_set_reuseaddr(sock->sk, SK_CAN_REUSE);
-
-   opt = 1;
-   ret = kernel_setsockopt(sock, IPPROTO_IP, IP_FREEBIND,
-   (char *), sizeof(opt));
-   if (ret < 0) {
-   pr_err("kernel_setsockopt() for IP_FREEBIND"
-   " failed\n");
-   goto fail;
-   }
+   ip_sock_set_freebind(sock->sk, true);
 
ret = kernel_bind(sock, (struct sockaddr *)>np_sockaddr, len);
if (ret < 0) {
diff --git a/include/net/ip.h b/include/net/ip.h
index 2fc52e26fa88b..1e2feca8630d0 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -766,5 +766,6 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
 }
 
 void ip_sock_set_tos(struct sock *sk, int val);
+void ip_sock_set_freebind(struct sock *sk, bool val);
 
 #endif /* _IP_H */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 1733ac78c21aa..0c40887a817f8 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -581,6 +581,14 @@ void ip_sock_set_tos(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(ip_sock_set_tos);
 
+void ip_sock_set_freebind(struct sock *sk, bool val)
+{
+   lock_sock(sk);
+   inet_sk(sk)->freebind = val;
+   release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_freebind);
+
 /*
  * Socket option code for IP. This is the end of the line after any
  * TCP,UDP etc options on an IP socket.
-- 
2.26.2



[Cluster-devel] [PATCH 15/33] tcp: add tcp_sock_set_keepidle

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the TCP_KEEP_IDLE sockopt from kernel
space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 include/linux/tcp.h   |  1 +
 net/ipv4/tcp.c| 49 ++-
 net/rds/tcp_listen.c  |  5 +
 net/sunrpc/xprtsock.c |  3 +--
 4 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 69c988f84a184..4d3a3e959e45b 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -499,5 +499,6 @@ void tcp_sock_set_nodelay(struct sock *sk, bool on);
 void tcp_sock_set_quickack(struct sock *sk, int val);
 int tcp_sock_set_syncnt(struct sock *sk, int val);
 void tcp_sock_set_user_timeout(struct sock *sk, u32 val);
+int tcp_sock_set_keepidle(struct sock *sk, int val);
 
 #endif /* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9a8d062b17a48..22eb9159c7d05 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2890,6 +2890,39 @@ void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
 }
 EXPORT_SYMBOL(tcp_sock_set_user_timeout);
 
+static int __tcp_sock_set_keepidle(struct sock *sk, int val)
+{
+   struct tcp_sock *tp = tcp_sk(sk);
+
+   if (val < 1 || val > MAX_TCP_KEEPIDLE)
+   return -EINVAL;
+
+   tp->keepalive_time = val * HZ;
+   if (sock_flag(sk, SOCK_KEEPOPEN) &&
+   !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
+   u32 elapsed = keepalive_time_elapsed(tp);
+
+   if (tp->keepalive_time > elapsed)
+   elapsed = tp->keepalive_time - elapsed;
+   else
+   elapsed = 0;
+   inet_csk_reset_keepalive_timer(sk, elapsed);
+   }
+
+   return 0;
+}
+
+int tcp_sock_set_keepidle(struct sock *sk, int val)
+{
+   int err;
+
+   lock_sock(sk);
+   err = __tcp_sock_set_keepidle(sk, val);
+   release_sock(sk);
+   return err;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepidle);
+
 /*
  * Socket option code for TCP.
  */
@@ -3059,21 +3092,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
 
case TCP_KEEPIDLE:
-   if (val < 1 || val > MAX_TCP_KEEPIDLE)
-   err = -EINVAL;
-   else {
-   tp->keepalive_time = val * HZ;
-   if (sock_flag(sk, SOCK_KEEPOPEN) &&
-   !((1 << sk->sk_state) &
- (TCPF_CLOSE | TCPF_LISTEN))) {
-   u32 elapsed = keepalive_time_elapsed(tp);
-   if (tp->keepalive_time > elapsed)
-   elapsed = tp->keepalive_time - elapsed;
-   else
-   elapsed = 0;
-   inet_csk_reset_keepalive_timer(sk, elapsed);
-   }
-   }
+   err = __tcp_sock_set_keepidle(sk, val);
break;
case TCP_KEEPINTVL:
if (val < 1 || val > MAX_TCP_KEEPINTVL)
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index e76ec64b43fe7..8c76969d8c878 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -52,10 +52,7 @@ int rds_tcp_keepalive(struct socket *sock)
if (ret < 0)
goto bail;
 
-   ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE,
-   (char *), sizeof(keepidle));
-   if (ret < 0)
-   goto bail;
+   tcp_sock_set_keepidle(sock->sk, keepidle);
 
/* KEEPINTVL is the interval between successive probes. We follow
 * the model in xs_tcp_finish_connecting() and re-use keepidle.
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 30d4c4fcd3e38..ea79446789c69 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2107,8 +2107,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt 
*xprt,
 
/* TCP Keepalive options */
sock_set_keepalive(sock->sk, 1);
-   kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
-   (char *), sizeof(keepidle));
+   tcp_sock_set_keepidle(sock->sk, keepidle);
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
(char *), sizeof(keepidle));
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
-- 
2.26.2



[Cluster-devel] [PATCH 07/33] net: add sock_set_keepalive

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the SO_KEEPALIVE sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 fs/dlm/lowcomms.c |  6 +-
 include/net/sock.h|  1 +
 net/core/sock.c   | 10 ++
 net/rds/tcp_listen.c  |  6 +-
 net/sunrpc/xprtsock.c |  4 +---
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 0c0a6413fdfcc..16d616c180613 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1259,11 +1259,7 @@ static struct socket *tcp_create_listen_sock(struct 
connection *con,
con->sock = NULL;
goto create_out;
}
-   result = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
-(char *), sizeof(one));
-   if (result < 0) {
-   log_print("Set keepalive failed: %d", result);
-   }
+   sock_set_keepalive(sock->sk, true);
 
result = sock->ops->listen(sock, 5);
if (result < 0) {
diff --git a/include/net/sock.h b/include/net/sock.h
index cf8a30e0168de..4cedde585424f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2691,6 +2691,7 @@ void sock_set_reuseaddr(struct sock *sk, unsigned char 
reuse);
 void sock_set_linger(struct sock *sk, bool onoff, unsigned int linger);
 void sock_set_priority(struct sock *sk, u32 priority);
 void sock_set_sndtimeo(struct sock *sk, unsigned int secs);
+void sock_set_keepalive(struct sock *sk, bool keepalive);
 int sock_bindtoindex(struct sock *sk, int ifindex);
 void sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns);
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 1589f242ecc7e..dfd2b839f88bb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -794,6 +794,16 @@ void sock_set_timestamps(struct sock *sk, bool val, bool 
new, bool ns)
 }
 EXPORT_SYMBOL(sock_set_timestamps);
 
+void sock_set_keepalive(struct sock *sk, bool keepalive)
+{
+   lock_sock(sk);
+   if (sk->sk_prot->keepalive)
+   sk->sk_prot->keepalive(sk, keepalive);
+   sock_valbool_flag(sk, SOCK_KEEPOPEN, keepalive);
+   release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_keepalive);
+
 /*
  * This is meant for all protocols to use and covers goings on
  * at the socket level. Everything here is generic.
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 96f7538e5fa8d..a55b39cd45a6c 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -43,13 +43,9 @@ int rds_tcp_keepalive(struct socket *sock)
/* values below based on xs_udp_default_timeout */
int keepidle = 5; /* send a probe 'keepidle' secs after last data */
int keepcnt = 5; /* number of unack'ed probes before declaring dead */
-   int keepalive = 1;
int ret = 0;
 
-   ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
-   (char *), sizeof(keepalive));
-   if (ret < 0)
-   goto bail;
+   sock_set_keepalive(sock->sk, true);
 
ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT,
(char *), sizeof(keepcnt));
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 845d0be805ece..bb61d3758be2b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2110,7 +2110,6 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt 
*xprt,
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, 
xprt);
unsigned int keepidle;
unsigned int keepcnt;
-   unsigned int opt_on = 1;
unsigned int timeo;
 
spin_lock(>transport_lock);
@@ -2122,8 +2121,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt 
*xprt,
spin_unlock(>transport_lock);
 
/* TCP Keepalive options */
-   kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
-   (char *)_on, sizeof(opt_on));
+   sock_set_keepalive(sock->sk, 1);
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
(char *), sizeof(keepidle));
kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
-- 
2.26.2



[Cluster-devel] [PATCH 12/33] tcp: add tcp_sock_set_quickack

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the TCP_QUICKACK sockopt from kernel space
without going through a fake uaccess.  Cleanup the callers to avoid
pointless wrappers now that this is a simple function call.

Signed-off-by: Christoph Hellwig 
---
 drivers/block/drbd/drbd_int.h  |  7 --
 drivers/block/drbd/drbd_receiver.c |  5 ++--
 include/linux/tcp.h|  1 +
 net/ipv4/tcp.c | 39 --
 4 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e24bba87c8e02..14345a87c7cc5 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1570,13 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device 
*device, int tcq_enabled);
 extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head 
*to_be_freed);
 extern int drbd_connected(struct drbd_peer_device *);
 
-static inline void drbd_tcp_quickack(struct socket *sock)
-{
-   int val = 2;
-   (void) kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
-   (char*), sizeof(val));
-}
-
 /* sets the number of 512 byte sectors of our virtual device */
 void drbd_set_my_capacity(struct drbd_device *device, sector_t size);
 
diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index da5a9ee896a43..cdd317ae97021 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1223,7 +1223,7 @@ static int drbd_recv_header_maybe_unplug(struct 
drbd_connection *connection, str
 * quickly as possible, and let remote TCP know what we have
 * received so far. */
if (err == -EAGAIN) {
-   drbd_tcp_quickack(connection->data.socket);
+   tcp_sock_set_quickack(connection->data.socket->sk, 2);
drbd_unplug_all_devices(connection);
}
if (err > 0) {
@@ -4959,8 +4959,7 @@ static int receive_UnplugRemote(struct drbd_connection 
*connection, struct packe
 {
/* Make sure we've acked all the TCP data associated
 * with the data requests being unplugged */
-   drbd_tcp_quickack(connection->data.socket);
-
+   tcp_sock_set_quickack(connection->data.socket->sk, 2);
return 0;
 }
 
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 533610b6ae420..e7ab6da5111b5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -496,5 +496,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, 
int pcount,
 
 void tcp_sock_set_cork(struct sock *sk, bool on);
 void tcp_sock_set_nodelay(struct sock *sk, bool on);
+void tcp_sock_set_quickack(struct sock *sk, int val);
 
 #endif /* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 300ce622607d8..c681f43f0bb85 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2845,6 +2845,31 @@ void tcp_sock_set_nodelay(struct sock *sk, bool on)
 }
 EXPORT_SYMBOL(tcp_sock_set_nodelay);
 
+static void __tcp_sock_set_quickack(struct sock *sk, int val)
+{
+   if (!val) {
+   inet_csk_enter_pingpong_mode(sk);
+   return;
+   }
+
+   inet_csk_exit_pingpong_mode(sk);
+   if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
+   inet_csk_ack_scheduled(sk)) {
+   inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_PUSHED;
+   tcp_cleanup_rbuf(sk, 1);
+   if (!(val & 1))
+   inet_csk_enter_pingpong_mode(sk);
+   }
+}
+
+void tcp_sock_set_quickack(struct sock *sk, int val)
+{
+   lock_sock(sk);
+   __tcp_sock_set_quickack(sk, val);
+   release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_quickack);
+
 /*
  * Socket option code for TCP.
  */
@@ -3085,19 +3110,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
 
case TCP_QUICKACK:
-   if (!val) {
-   inet_csk_enter_pingpong_mode(sk);
-   } else {
-   inet_csk_exit_pingpong_mode(sk);
-   if ((1 << sk->sk_state) &
-   (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
-   inet_csk_ack_scheduled(sk)) {
-   icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
-   tcp_cleanup_rbuf(sk, 1);
-   if (!(val & 1))
-   inet_csk_enter_pingpong_mode(sk);
-   }
-   }
+   __tcp_sock_set_quickack(sk, val);
break;
 
 #ifdef CONFIG_TCP_MD5SIG
-- 
2.26.2



[Cluster-devel] [PATCH 14/33] tcp: add tcp_sock_set_user_timeout

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the TCP_USER_TIMEOUT sockopt from kernel
space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 fs/ocfs2/cluster/tcp.c | 22 ++
 include/linux/tcp.h|  1 +
 net/ipv4/tcp.c |  8 
 net/sunrpc/xprtsock.c  |  3 +--
 4 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 7936e22e39f34..5776df10d11f9 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1441,14 +1441,6 @@ static void o2net_rx_until_empty(struct work_struct 
*work)
sc_put(sc);
 }
 
-static int o2net_set_usertimeout(struct socket *sock)
-{
-   int user_timeout = O2NET_TCP_USER_TIMEOUT;
-
-   return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
-   (void *)_timeout, sizeof(user_timeout));
-}
-
 static void o2net_initialize_handshake(void)
 {
o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
@@ -1629,12 +1621,7 @@ static void o2net_start_connect(struct work_struct *work)
}
 
tcp_sock_set_nodelay(sc->sc_sock->sk, true);
-
-   ret = o2net_set_usertimeout(sock);
-   if (ret) {
-   mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
-   goto out;
-   }
+   tcp_sock_set_user_timeout(sock->sk, O2NET_TCP_USER_TIMEOUT);
 
o2net_register_callbacks(sc->sc_sock->sk, sc);
 
@@ -1821,12 +1808,7 @@ static int o2net_accept_one(struct socket *sock, int 
*more)
new_sock->sk->sk_allocation = GFP_ATOMIC;
 
tcp_sock_set_nodelay(new_sock->sk, true);
-
-   ret = o2net_set_usertimeout(new_sock);
-   if (ret) {
-   mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
-   goto out;
-   }
+   tcp_sock_set_user_timeout(new_sock->sk, O2NET_TCP_USER_TIMEOUT);
 
ret = new_sock->ops->getname(new_sock, (struct sockaddr *) , 1);
if (ret < 0)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 77b832acf3398..69c988f84a184 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -498,5 +498,6 @@ void tcp_sock_set_cork(struct sock *sk, bool on);
 void tcp_sock_set_nodelay(struct sock *sk, bool on);
 void tcp_sock_set_quickack(struct sock *sk, int val);
 int tcp_sock_set_syncnt(struct sock *sk, int val);
+void tcp_sock_set_user_timeout(struct sock *sk, u32 val);
 
 #endif /* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 773b5cd366ab7..9a8d062b17a48 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2882,6 +2882,14 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(tcp_sock_set_syncnt);
 
+void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
+{
+   lock_sock(sk);
+   inet_csk(sk)->icsk_user_timeout = val;
+   release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_user_timeout);
+
 /*
  * Socket option code for TCP.
  */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3dc2d52371a0e..30d4c4fcd3e38 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2115,8 +2115,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt 
*xprt,
(char *), sizeof(keepcnt));
 
/* TCP user timeout (see RFC5482) */
-   kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
-   (char *), sizeof(timeo));
+   tcp_sock_set_user_timeout(sock->sk, timeo);
 }
 
 static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
-- 
2.26.2



[Cluster-devel] [PATCH 24/33] ipv6: add ip6_sock_set_addr_preferences

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the IPV6_ADD_PREFERENCES sockopt from kernel
space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 include/net/ipv6.h   |   1 +
 net/ipv6/ipv6_sockglue.c | 127 +--
 net/sunrpc/xprtsock.c|   8 ++-
 3 files changed, 75 insertions(+), 61 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 69bc1651aaef8..04b2bc1935054 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1177,5 +1177,6 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
 
 int ip6_sock_set_v6only(struct sock *sk, bool val);
 void ip6_sock_set_recverr(struct sock *sk, bool val);
+int ip6_sock_set_addr_preferences(struct sock *sk, bool val);
 
 #endif /* _NET_IPV6_H */
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 3c67626b6f5a9..c23d42e809d7e 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -157,6 +157,74 @@ void ip6_sock_set_recverr(struct sock *sk, bool val)
 }
 EXPORT_SYMBOL(ip6_sock_set_recverr);
 
+static int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
+{
+   unsigned int pref = 0;
+   unsigned int prefmask = ~0;
+
+   /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
+   switch (val & (IPV6_PREFER_SRC_PUBLIC |
+  IPV6_PREFER_SRC_TMP |
+  IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
+   case IPV6_PREFER_SRC_PUBLIC:
+   pref |= IPV6_PREFER_SRC_PUBLIC;
+   prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP);
+   break;
+   case IPV6_PREFER_SRC_TMP:
+   pref |= IPV6_PREFER_SRC_TMP;
+   prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP);
+   break;
+   case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
+   prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+ IPV6_PREFER_SRC_TMP);
+   break;
+   case 0:
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   /* check HOME/COA conflicts */
+   switch (val & (IPV6_PREFER_SRC_HOME | IPV6_PREFER_SRC_COA)) {
+   case IPV6_PREFER_SRC_HOME:
+   prefmask &= ~IPV6_PREFER_SRC_COA;
+   break;
+   case IPV6_PREFER_SRC_COA:
+   pref |= IPV6_PREFER_SRC_COA;
+   break;
+   case 0:
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   /* check CGA/NONCGA conflicts */
+   switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
+   case IPV6_PREFER_SRC_CGA:
+   case IPV6_PREFER_SRC_NONCGA:
+   case 0:
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref;
+   return 0;
+}
+
+int ip6_sock_set_addr_preferences(struct sock *sk, bool val)
+{
+   int ret;
+
+   lock_sock(sk);
+   ret = __ip6_sock_set_addr_preferences(sk, val);
+   release_sock(sk);
+   return ret;
+}
+EXPORT_SYMBOL(ip6_sock_set_addr_preferences);
+
 static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
 {
@@ -859,67 +927,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, 
int optname,
break;
 
case IPV6_ADDR_PREFERENCES:
-   {
-   unsigned int pref = 0;
-   unsigned int prefmask = ~0;
-
if (optlen < sizeof(int))
goto e_inval;
-
-   retv = -EINVAL;
-
-   /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
-   switch (val & (IPV6_PREFER_SRC_PUBLIC|
-  IPV6_PREFER_SRC_TMP|
-  IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
-   case IPV6_PREFER_SRC_PUBLIC:
-   pref |= IPV6_PREFER_SRC_PUBLIC;
-   break;
-   case IPV6_PREFER_SRC_TMP:
-   pref |= IPV6_PREFER_SRC_TMP;
-   break;
-   case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
-   break;
-   case 0:
-   goto pref_skip_pubtmp;
-   default:
-   goto e_inval;
-   }
-
-   prefmask &= ~(IPV6_PREFER_SRC_PUBLIC|
- IPV6_PREFER_SRC_TMP);
-pref_skip_pubtmp:
-
-   /* check HOME/COA conflicts */
-   switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) {
-   case IPV6_PREFER_SRC_HOME:
-   break;
-   case IPV6_PREFER_SRC_COA:
-   pref |= IPV6_PREFER_SRC_COA;
-   case 0:
-   goto pref_skip_coa;
-   default:
-   goto e_inval;
-   }
-
-   

[Cluster-devel] [PATCH 26/33] sctp: lift copying in addrs into sctp_setsockopt

2020-05-13 Thread Christoph Hellwig
Prepare for additional kernel-space callers of sctp_setsockopt_bindx.

Signed-off-by: Christoph Hellwig 
---
 net/sctp/socket.c | 71 ++-
 1 file changed, 27 insertions(+), 44 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 827a9903ee288..1c96b52c4aa28 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -972,18 +972,16 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct 
sctp_sockaddr_entry *addrw)
  * it.
  *
  * skThe sk of the socket
- * addrs The pointer to the addresses in user land
+ * addrs The pointer to the addresses
  * addrssize Size of the addrs buffer
  * opOperation to perform (add or remove, see the flags of
  *   sctp_bindx)
  *
  * Returns 0 if ok, <0 errno code on error.
  */
-static int sctp_setsockopt_bindx(struct sock *sk,
-struct sockaddr __user *addrs,
+static int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *kaddrs,
 int addrs_size, int op)
 {
-   struct sockaddr *kaddrs;
int err;
int addrcnt = 0;
int walk_size = 0;
@@ -991,23 +989,13 @@ static int sctp_setsockopt_bindx(struct sock *sk,
void *addr_buf;
struct sctp_af *af;
 
-   pr_debug("%s: sk:%p addrs:%p addrs_size:%d opt:%d\n",
-__func__, sk, addrs, addrs_size, op);
-
-   if (unlikely(addrs_size <= 0))
-   return -EINVAL;
+   pr_debug("%s: sk:%p kaddrs:%p addrs_size:%d opt:%d\n",
+__func__, sk, kaddrs, addrs_size, op);
 
-   kaddrs = memdup_user(addrs, addrs_size);
-   if (IS_ERR(kaddrs))
-   return PTR_ERR(kaddrs);
-
-   /* Walk through the addrs buffer and count the number of addresses. */
addr_buf = kaddrs;
while (walk_size < addrs_size) {
-   if (walk_size + sizeof(sa_family_t) > addrs_size) {
-   kfree(kaddrs);
+   if (walk_size + sizeof(sa_family_t) > addrs_size)
return -EINVAL;
-   }
 
sa_addr = addr_buf;
af = sctp_get_af_specific(sa_addr->sa_family);
@@ -1015,10 +1003,8 @@ static int sctp_setsockopt_bindx(struct sock *sk,
/* If the address family is not supported or if this address
 * causes the address buffer to overflow return EINVAL.
 */
-   if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
-   kfree(kaddrs);
+   if (!af || (walk_size + af->sockaddr_len) > addrs_size)
return -EINVAL;
-   }
addrcnt++;
addr_buf += af->sockaddr_len;
walk_size += af->sockaddr_len;
@@ -1032,29 +1018,19 @@ static int sctp_setsockopt_bindx(struct sock *sk,
 (struct sockaddr *)kaddrs,
 addrs_size);
if (err)
-   goto out;
+   return err;
err = sctp_bindx_add(sk, kaddrs, addrcnt);
if (err)
-   goto out;
-   err = sctp_send_asconf_add_ip(sk, kaddrs, addrcnt);
-   break;
-
+   return err;
+   return sctp_send_asconf_add_ip(sk, kaddrs, addrcnt);
case SCTP_BINDX_REM_ADDR:
err = sctp_bindx_rem(sk, kaddrs, addrcnt);
if (err)
-   goto out;
-   err = sctp_send_asconf_del_ip(sk, kaddrs, addrcnt);
-   break;
-
+   return err;
+   return sctp_send_asconf_del_ip(sk, kaddrs, addrcnt);
default:
-   err = -EINVAL;
-   break;
+   return -EINVAL;
}
-
-out:
-   kfree(kaddrs);
-
-   return err;
 }
 
 static int sctp_connect_new_asoc(struct sctp_endpoint *ep,
@@ -4670,6 +4646,7 @@ static int sctp_setsockopt_pf_expose(struct sock *sk,
 static int sctp_setsockopt(struct sock *sk, int level, int optname,
   char __user *optval, unsigned int optlen)
 {
+   struct sockaddr *kaddrs;
int retval = 0;
 
pr_debug("%s: sk:%p, optname:%d\n", __func__, sk, optname);
@@ -4682,30 +4659,37 @@ static int sctp_setsockopt(struct sock *sk, int level, 
int optname,
 */
if (level != SOL_SCTP) {
struct sctp_af *af = sctp_sk(sk)->pf->af;
-   retval = af->setsockopt(sk, level, optname, optval, optlen);
-   goto out_nounlock;
+   return af->setsockopt(sk, level, optname, optval, optlen);
}
 
+   if (unlikely(optlen <= 0))
+   return -EINVAL;
+
+   kaddrs = memdup_user(optval, optlen);
+   if (IS_ERR(kaddrs))
+   return PTR_ERR(kaddrs);
+
+   /* Walk through the addrs buffer and count the number of 

[Cluster-devel] [PATCH 33/33] net: remove kernel_getsockopt

2020-05-13 Thread Christoph Hellwig
No users left.

Signed-off-by: Christoph Hellwig 
---
 include/linux/net.h |  2 --
 net/socket.c| 34 --
 2 files changed, 36 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index ece7513326293..e10f378194a59 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -303,8 +303,6 @@ int kernel_connect(struct socket *sock, struct sockaddr 
*addr, int addrlen,
   int flags);
 int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
 int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
-int kernel_getsockopt(struct socket *sock, int level, int optname, char 
*optval,
- int *optlen);
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
 int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
diff --git a/net/socket.c b/net/socket.c
index f37c3ef508691..49000f0d87f71 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3715,40 +3715,6 @@ int kernel_getpeername(struct socket *sock, struct 
sockaddr *addr)
 }
 EXPORT_SYMBOL(kernel_getpeername);
 
-/**
- * kernel_getsockopt - get a socket option (kernel space)
- * @sock: socket
- * @level: API level (SOL_SOCKET, ...)
- * @optname: option tag
- * @optval: option value
- * @optlen: option length
- *
- * Assigns the option length to @optlen.
- * Returns 0 or an error.
- */
-
-int kernel_getsockopt(struct socket *sock, int level, int optname,
-   char *optval, int *optlen)
-{
-   mm_segment_t oldfs = get_fs();
-   char __user *uoptval;
-   int __user *uoptlen;
-   int err;
-
-   uoptval = (char __user __force *) optval;
-   uoptlen = (int __user __force *) optlen;
-
-   set_fs(KERNEL_DS);
-   if (level == SOL_SOCKET)
-   err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
-   else
-   err = sock->ops->getsockopt(sock, level, optname, uoptval,
-   uoptlen);
-   set_fs(oldfs);
-   return err;
-}
-EXPORT_SYMBOL(kernel_getsockopt);
-
 /**
  * kernel_sendpage - send a  through a socket (kernel space)
  * @sock: socket
-- 
2.26.2



[Cluster-devel] [PATCH 09/33] net: add sock_set_reuseport

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the SO_REUSEPORT sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 include/net/sock.h|  1 +
 net/core/sock.c   |  8 
 net/sunrpc/xprtsock.c | 17 +
 3 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index e1ed40ff01312..6b8e06947b243 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2688,6 +2688,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, 
int dif)
 
 void sock_def_readable(struct sock *sk);
 void sock_set_reuseaddr(struct sock *sk, unsigned char reuse);
+void sock_set_reuseport(struct sock *sk, bool reuseport);
 void sock_set_linger(struct sock *sk, bool onoff, unsigned int linger);
 void sock_set_priority(struct sock *sk, u32 priority);
 void sock_set_sndtimeo(struct sock *sk, unsigned int secs);
diff --git a/net/core/sock.c b/net/core/sock.c
index 6af01b757cf24..7f0baf1ccde17 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -729,6 +729,14 @@ void sock_set_reuseaddr(struct sock *sk, unsigned char 
reuse)
 }
 EXPORT_SYMBOL(sock_set_reuseaddr);
 
+void sock_set_reuseport(struct sock *sk, bool reuseport)
+{
+   lock_sock(sk);
+   sk->sk_reuseport = reuseport;
+   release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_reuseport);
+
 static void __sock_set_linger(struct sock *sk, bool onoff, unsigned int linger)
 {
if (!onoff) {
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index bb61d3758be2b..3dc2d52371a0e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1594,21 +1594,6 @@ static int xs_get_random_port(void)
return rand + min;
 }
 
-/**
- * xs_set_reuseaddr_port - set the socket's port and address reuse options
- * @sock: socket
- *
- * Note that this function has to be called on all sockets that share the
- * same port, and it must be called before binding.
- */
-static void xs_sock_set_reuseport(struct socket *sock)
-{
-   int opt = 1;
-
-   kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT,
-   (char *), sizeof(opt));
-}
-
 static unsigned short xs_sock_getport(struct socket *sock)
 {
struct sockaddr_storage buf;
@@ -1801,7 +1786,7 @@ static struct socket *xs_create_sock(struct rpc_xprt 
*xprt,
xs_reclassify_socket(family, sock);
 
if (reuseport)
-   xs_sock_set_reuseport(sock);
+   sock_set_reuseport(sock->sk, true);
 
err = xs_bind(transport, sock);
if (err) {
-- 
2.26.2



[Cluster-devel] [PATCH 30/33] tipc: call tsk_set_importance from tipc_topsrv_create_listener

2020-05-13 Thread Christoph Hellwig
Avoid using kernel_setsockopt for the TIPC_IMPORTANCE option when we can
just use the internal helper.  The only change needed is to pass a struct
sock instead of tipc_sock, which is private to socket.c

Signed-off-by: Christoph Hellwig 
---
 net/tipc/socket.c | 18 +-
 net/tipc/socket.h |  2 ++
 net/tipc/topsrv.c |  6 +++---
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 87466607097f1..f2e10fbfb03df 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -191,17 +191,17 @@ static int tsk_importance(struct tipc_sock *tsk)
return msg_importance(>phdr);
 }
 
-static int tsk_set_importance(struct tipc_sock *tsk, int imp)
+static struct tipc_sock *tipc_sk(const struct sock *sk)
 {
-   if (imp > TIPC_CRITICAL_IMPORTANCE)
-   return -EINVAL;
-   msg_set_importance(>phdr, (u32)imp);
-   return 0;
+   return container_of(sk, struct tipc_sock, sk);
 }
 
-static struct tipc_sock *tipc_sk(const struct sock *sk)
+int tsk_set_importance(struct sock *sk, int imp)
 {
-   return container_of(sk, struct tipc_sock, sk);
+   if (imp > TIPC_CRITICAL_IMPORTANCE)
+   return -EINVAL;
+   msg_set_importance(_sk(sk)->phdr, (u32)imp);
+   return 0;
 }
 
 static bool tsk_conn_cong(struct tipc_sock *tsk)
@@ -2661,7 +2661,7 @@ static int tipc_accept(struct socket *sock, struct socket 
*new_sock, int flags,
/* Connect new socket to it's peer */
tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
 
-   tsk_set_importance(new_tsock, msg_importance(msg));
+   tsk_set_importance(new_sk, msg_importance(msg));
if (msg_named(msg)) {
new_tsock->conn_type = msg_nametype(msg);
new_tsock->conn_instance = msg_nameinst(msg);
@@ -3079,7 +3079,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, 
int opt,
 
switch (opt) {
case TIPC_IMPORTANCE:
-   res = tsk_set_importance(tsk, value);
+   res = tsk_set_importance(sk, value);
break;
case TIPC_SRC_DROPPABLE:
if (sock->type != SOCK_STREAM)
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 235b9679acee4..b11575afc66fe 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -75,4 +75,6 @@ u32 tipc_sock_get_portid(struct sock *sk);
 bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb);
 bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb);
 
+int tsk_set_importance(struct sock *sk, int imp);
+
 #endif
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 73dbed0c4b6b8..a0d50649f71c2 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -494,7 +494,6 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk)
 
 static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
 {
-   int imp = TIPC_CRITICAL_IMPORTANCE;
struct socket *lsock = NULL;
struct sockaddr_tipc saddr;
struct sock *sk;
@@ -511,8 +510,9 @@ static int tipc_topsrv_create_listener(struct tipc_topsrv 
*srv)
sk->sk_user_data = srv;
write_unlock_bh(>sk_callback_lock);
 
-   rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
-  (char *), sizeof(imp));
+   lock_sock(sk);
+   rc = tsk_set_importance(sk, TIPC_CRITICAL_IMPORTANCE);
+   release_sock(sk);
if (rc < 0)
goto err;
 
-- 
2.26.2



[Cluster-devel] [PATCH 25/33] ipv6: add ip6_sock_set_recvpktinfo

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the IPV6_RECVPKTINFO sockopt from kernel
space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 include/net/ipv6.h   |  1 +
 net/ipv6/ipv6_sockglue.c |  8 
 net/sunrpc/svcsock.c | 11 +++
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 04b2bc1935054..170872bc4e960 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1178,5 +1178,6 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
 int ip6_sock_set_v6only(struct sock *sk, bool val);
 void ip6_sock_set_recverr(struct sock *sk, bool val);
 int ip6_sock_set_addr_preferences(struct sock *sk, bool val);
+void ip6_sock_set_recvpktinfo(struct sock *sk, bool val);
 
 #endif /* _NET_IPV6_H */
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c23d42e809d7e..d60adb018d71c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -225,6 +225,14 @@ int ip6_sock_set_addr_preferences(struct sock *sk, bool 
val)
 }
 EXPORT_SYMBOL(ip6_sock_set_addr_preferences);
 
+void ip6_sock_set_recvpktinfo(struct sock *sk, bool val)
+{
+   lock_sock(sk);
+   inet6_sk(sk)->rxopt.bits.rxinfo = val;
+   release_sock(sk);
+}
+EXPORT_SYMBOL(ip6_sock_set_recvpktinfo);
+
 static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
 {
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7fa7fedec3c5a..7cf8389b6f46f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -595,8 +595,6 @@ static struct svc_xprt_class svc_udp_class = {
 
 static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 {
-   int err, level, optname, one = 1;
-
svc_xprt_init(sock_net(svsk->sk_sock->sk), _udp_class,
  >sk_xprt, serv);
clear_bit(XPT_CACHE_AUTH, >sk_xprt.xpt_flags);
@@ -617,17 +615,14 @@ static void svc_udp_init(struct svc_sock *svsk, struct 
svc_serv *serv)
switch (svsk->sk_sk->sk_family) {
case AF_INET:
ip_sock_set_pktinfo(svsk->sk_sock->sk, true);
-   return;
+   break;
case AF_INET6:
-   level = SOL_IPV6;
-   optname = IPV6_RECVPKTINFO;
+   if (IS_REACHABLE(CONFIG_IPV6))
+   ip6_sock_set_recvpktinfo(svsk->sk_sock->sk, true);
break;
default:
BUG();
}
-   err = kernel_setsockopt(svsk->sk_sock, level, optname,
-   (char *), sizeof(one));
-   dprintk("svc: kernel_setsockopt returned %d\n", err);
 }
 
 /*
-- 
2.26.2



[Cluster-devel] [PATCH 18/33] ipv4: add ip_sock_set_tos

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the IP_TOS sockopt from kernel space without
going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 drivers/nvme/host/tcp.c   | 14 +++---
 drivers/nvme/target/tcp.c | 10 ++
 include/net/ip.h  |  2 ++
 net/ipv4/ip_sockglue.c| 30 +-
 4 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 8417eeb83fcd2..6c069e982989e 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1313,7 +1313,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 {
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = >queues[qid];
-   int ret, opt, rcv_pdu_size;
+   int ret, rcv_pdu_size;
 
queue->ctrl = ctrl;
INIT_LIST_HEAD(>send_list);
@@ -1352,16 +1352,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
sock_set_priority(queue->sock->sk, so_priority);
 
/* Set socket type of service */
-   if (nctrl->opts->tos >= 0) {
-   opt = nctrl->opts->tos;
-   ret = kernel_setsockopt(queue->sock, SOL_IP, IP_TOS,
-   (char *), sizeof(opt));
-   if (ret) {
-   dev_err(nctrl->device,
-   "failed to set IP_TOS sock opt %d\n", ret);
-   goto err_sock;
-   }
-   }
+   if (nctrl->opts->tos >= 0)
+   ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos);
 
queue->sock->sk->sk_allocation = GFP_ATOMIC;
nvme_tcp_set_queue_io_cpu(queue);
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index b2bfa791c5cb2..4296fe3c745bf 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1452,14 +1452,8 @@ static int nvmet_tcp_set_queue_sock(struct 
nvmet_tcp_queue *queue)
sock_set_priority(sock->sk, so_priority);
 
/* Set socket type of service */
-   if (inet->rcv_tos > 0) {
-   int tos = inet->rcv_tos;
-
-   ret = kernel_setsockopt(sock, SOL_IP, IP_TOS,
-   (char *), sizeof(tos));
-   if (ret)
-   return ret;
-   }
+   if (inet->rcv_tos > 0)
+   ip_sock_set_tos(sock->sk, inet->rcv_tos);
 
write_lock_bh(>sk->sk_callback_lock);
sock->sk->sk_user_data = queue;
diff --git a/include/net/ip.h b/include/net/ip.h
index 5b317c9f4470a..2fc52e26fa88b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -765,4 +765,6 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
return likely(mtu >= IPV4_MIN_MTU);
 }
 
+void ip_sock_set_tos(struct sock *sk, int val);
+
 #endif /* _IP_H */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8206047d70b6b..1733ac78c21aa 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -560,6 +560,26 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int 
len, int *addr_len)
return err;
 }
 
+static void __ip_sock_set_tos(struct sock *sk, int val)
+{
+   if (sk->sk_type == SOCK_STREAM) {
+   val &= ~INET_ECN_MASK;
+   val |= inet_sk(sk)->tos & INET_ECN_MASK;
+   }
+   if (inet_sk(sk)->tos != val) {
+   inet_sk(sk)->tos = val;
+   sk->sk_priority = rt_tos2priority(val);
+   sk_dst_reset(sk);
+   }
+}
+
+void ip_sock_set_tos(struct sock *sk, int val)
+{
+   lock_sock(sk);
+   __ip_sock_set_tos(sk, val);
+   release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_tos);
 
 /*
  * Socket option code for IP. This is the end of the line after any
@@ -743,15 +763,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
break;
case IP_TOS:/* This sets both TOS and Precedence */
-   if (sk->sk_type == SOCK_STREAM) {
-   val &= ~INET_ECN_MASK;
-   val |= inet->tos & INET_ECN_MASK;
-   }
-   if (inet->tos != val) {
-   inet->tos = val;
-   sk->sk_priority = rt_tos2priority(val);
-   sk_dst_reset(sk);
-   }
+   __ip_sock_set_tos(sk, val);
break;
case IP_TTL:
if (optlen < 1)
-- 
2.26.2



[Cluster-devel] [PATCH 08/33] net: add sock_set_rcvbuf

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the SO_RCVBUFFORCE sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 fs/dlm/lowcomms.c  |  7 +-
 include/net/sock.h |  1 +
 net/core/sock.c| 59 +-
 3 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 16d616c180613..223c185ecd0c7 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1297,7 +1297,6 @@ static int sctp_listen_for_all(void)
struct socket *sock = NULL;
int result = -EINVAL;
struct connection *con = nodeid2con(0, GFP_NOFS);
-   int bufsize = NEEDED_RMEM;
int one = 1;
 
if (!con)
@@ -1312,11 +1311,7 @@ static int sctp_listen_for_all(void)
goto out;
}
 
-   result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE,
-(char *), sizeof(bufsize));
-   if (result)
-   log_print("Error increasing buffer space on socket %d", result);
-
+   sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *),
   sizeof(one));
if (result < 0)
diff --git a/include/net/sock.h b/include/net/sock.h
index 4cedde585424f..e1ed40ff01312 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2692,6 +2692,7 @@ void sock_set_linger(struct sock *sk, bool onoff, 
unsigned int linger);
 void sock_set_priority(struct sock *sk, u32 priority);
 void sock_set_sndtimeo(struct sock *sk, unsigned int secs);
 void sock_set_keepalive(struct sock *sk, bool keepalive);
+void sock_set_rcvbuf(struct sock *sk, int val);
 int sock_bindtoindex(struct sock *sk, int ifindex);
 void sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns);
 
diff --git a/net/core/sock.c b/net/core/sock.c
index dfd2b839f88bb..6af01b757cf24 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -804,6 +804,35 @@ void sock_set_keepalive(struct sock *sk, bool keepalive)
 }
 EXPORT_SYMBOL(sock_set_keepalive);
 
+void __sock_set_rcvbuf(struct sock *sk, int val)
+{
+   /* Ensure val * 2 fits into an int, to prevent max_t() from treating it
+* as a negative value.
+*/
+   val = min_t(int, val, INT_MAX / 2);
+   sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+
+   /* We double it on the way in to account for "struct sk_buff" etc.
+* overhead.   Applications assume that the SO_RCVBUF setting they make
+* will allow that much actual data to be received on that socket.
+*
+* Applications are unaware that "struct sk_buff" and other overheads
+* allocate from the receive buffer during socket buffer allocation.
+*
+* And after considering the possible alternatives, returning the value
+* we actually used in getsockopt is the most desirable behavior.
+*/
+   WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
+}
+
+void sock_set_rcvbuf(struct sock *sk, int val)
+{
+   lock_sock(sk);
+   __sock_set_rcvbuf(sk, val);
+   release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_rcvbuf);
+
 /*
  * This is meant for all protocols to use and covers goings on
  * at the socket level. Everything here is generic.
@@ -900,30 +929,7 @@ int sock_setsockopt(struct socket *sock, int level, int 
optname,
 * play 'guess the biggest size' games. RCVBUF/SNDBUF
 * are treated in BSD as hints
 */
-   val = min_t(u32, val, sysctl_rmem_max);
-set_rcvbuf:
-   /* Ensure val * 2 fits into an int, to prevent max_t()
-* from treating it as a negative value.
-*/
-   val = min_t(int, val, INT_MAX / 2);
-   sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
-   /*
-* We double it on the way in to account for
-* "struct sk_buff" etc. overhead.   Applications
-* assume that the SO_RCVBUF setting they make will
-* allow that much actual data to be received on that
-* socket.
-*
-* Applications are unaware that "struct sk_buff" and
-* other overheads allocate from the receive buffer
-* during socket buffer allocation.
-*
-* And after considering the possible alternatives,
-* returning the value we actually used in getsockopt
-* is the most desirable behavior.
-*/
-   WRITE_ONCE(sk->sk_rcvbuf,
-  max_t(int, val * 2, SOCK_MIN_RCVBUF));
+   __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
break;
 
case SO_RCVBUFFORCE:
@@ -935,9 +941,8 @@ int sock_setsockopt(struct socket *sock, int level, int 
optname,
/* No negative values (to prevent 

[Cluster-devel] [PATCH 11/33] tcp: tcp_sock_set_nodelay

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the TCP_NODELAY sockopt from kernel space
without going through a fake uaccess.  Cleanup the callers to avoid
pointless wrappers now that this is a simple function call.

Signed-off-by: Christoph Hellwig 
---
 drivers/block/drbd/drbd_int.h |  7 
 drivers/block/drbd/drbd_main.c|  2 +-
 drivers/block/drbd/drbd_receiver.c|  4 +--
 drivers/infiniband/sw/siw/siw_cm.c| 24 +++---
 drivers/nvme/host/tcp.c   |  9 +-
 drivers/nvme/target/tcp.c | 12 ++-
 drivers/target/iscsi/iscsi_target_login.c | 15 ++---
 fs/cifs/connect.c | 10 ++
 fs/dlm/lowcomms.c |  8 ++---
 fs/ocfs2/cluster/tcp.c| 20 ++--
 include/linux/tcp.h   |  1 +
 net/ceph/messenger.c  | 11 ++-
 net/ipv4/tcp.c| 39 +++
 net/rds/tcp.c | 11 +--
 net/rds/tcp.h |  1 -
 net/rds/tcp_listen.c  |  2 +-
 16 files changed, 49 insertions(+), 127 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 3550adc93c68b..e24bba87c8e02 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1570,13 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device 
*device, int tcq_enabled);
 extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head 
*to_be_freed);
 extern int drbd_connected(struct drbd_peer_device *);
 
-static inline void drbd_tcp_nodelay(struct socket *sock)
-{
-   int val = 1;
-   (void) kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
-   (char*), sizeof(val));
-}
-
 static inline void drbd_tcp_quickack(struct socket *sock)
 {
int val = 2;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index c094c3c2c5d4d..4c876c7d7067f 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -660,7 +660,7 @@ static int __send_command(struct drbd_connection 
*connection, int vnr,
/* DRBD protocol "pings" are latency critical.
 * This is supposed to trigger tcp_push_pending_frames() */
if (!err && (cmd == P_PING || cmd == P_PING_ACK))
-   drbd_tcp_nodelay(sock->socket);
+   tcp_sock_set_nodelay(sock->socket->sk, true);
 
return err;
 }
diff --git a/drivers/block/drbd/drbd_receiver.c 
b/drivers/block/drbd/drbd_receiver.c
index 55ea907ad33cb..da5a9ee896a43 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1051,8 +1051,8 @@ static int conn_connect(struct drbd_connection 
*connection)
 
/* we don't want delays.
 * we use TCP_CORK where appropriate, though */
-   drbd_tcp_nodelay(sock.socket);
-   drbd_tcp_nodelay(msock.socket);
+   tcp_sock_set_nodelay(sock.socket->sk, true);
+   tcp_sock_set_nodelay(msock.socket->sk, true);
 
connection->data.socket = sock.socket;
connection->meta.socket = msock.socket;
diff --git a/drivers/infiniband/sw/siw/siw_cm.c 
b/drivers/infiniband/sw/siw/siw_cm.c
index 6d7c8c933736c..7781bcddf7e23 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -947,16 +947,8 @@ static void siw_accept_newconn(struct siw_cep *cep)
siw_cep_get(new_cep);
new_s->sk->sk_user_data = new_cep;
 
-   if (siw_tcp_nagle == false) {
-   int val = 1;
-
-   rv = kernel_setsockopt(new_s, SOL_TCP, TCP_NODELAY,
-  (char *), sizeof(val));
-   if (rv) {
-   siw_dbg_cep(cep, "setsockopt NODELAY error: %d\n", rv);
-   goto error;
-   }
-   }
+   if (siw_tcp_nagle == false)
+   tcp_sock_set_nodelay(new_s->sk, true);
new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ;
 
rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT);
@@ -1386,16 +1378,8 @@ int siw_connect(struct iw_cm_id *id, struct 
iw_cm_conn_param *params)
siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv);
goto error;
}
-   if (siw_tcp_nagle == false) {
-   int val = 1;
-
-   rv = kernel_setsockopt(s, SOL_TCP, TCP_NODELAY, (char *),
-  sizeof(val));
-   if (rv) {
-   siw_dbg_qp(qp, "setsockopt NODELAY error: %d\n", rv);
-   goto error;
-   }
-   }
+   if (siw_tcp_nagle == false)
+   tcp_sock_set_nodelay(s->sk, true);
cep = siw_cep_alloc(sdev);
if (!cep) {
rv = -ENOMEM;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index cd6a8fc14a139..a8070f93fd0a0 100644
--- a/drivers/nvme/host/tcp.c
+++ 

[Cluster-devel] [PATCH 32/33] sctp: add sctp_sock_get_primary_addr

2020-05-13 Thread Christoph Hellwig
Add a helper to directly get the SCTP_PRIMARY_ADDR sockopt from kernel
space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 fs/dlm/lowcomms.c   | 11 +++-
 include/net/sctp/sctp.h |  1 +
 net/sctp/socket.c   | 57 +
 3 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 6fa45365666a8..46d2d71b62c57 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -855,10 +855,9 @@ static int tcp_accept_from_sock(struct connection *con)
 static int sctp_accept_from_sock(struct connection *con)
 {
/* Check that the new node is in the lockspace */
-   struct sctp_prim prim;
+   struct sctp_prim prim = { };
int nodeid;
-   int prim_len, ret;
-   int addr_len;
+   int addr_len, ret;
struct connection *newcon;
struct connection *addcon;
struct socket *newsock;
@@ -876,11 +875,7 @@ static int sctp_accept_from_sock(struct connection *con)
if (ret < 0)
goto accept_err;
 
-   memset(, 0, sizeof(struct sctp_prim));
-   prim_len = sizeof(struct sctp_prim);
-
-   ret = kernel_getsockopt(newsock, IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
-   (char *), _len);
+   ret = sctp_sock_get_primary_addr(con->sock->sk, );
if (ret < 0) {
log_print("getsockopt/sctp_primary_addr failed: %d", ret);
goto accept_err;
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index b505fa082f254..c98b1d14db853 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -618,5 +618,6 @@ static inline bool sctp_newsk_ready(const struct sock *sk)
 int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *kaddrs,
int addrs_size, int op);
 void sctp_sock_set_nodelay(struct sock *sk, bool val);
+int sctp_sock_get_primary_addr(struct sock *sk, struct sctp_prim *prim);
 
 #endif /* __net_sctp_h__ */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 64c395f7a86d5..39bf8090dbe1e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6411,6 +6411,35 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, 
int len,
return err;
 }
 
+static int __sctp_sock_get_primary_addr(struct sock *sk, struct sctp_prim 
*prim)
+{
+   struct sctp_association *asoc;
+
+   asoc = sctp_id2assoc(sk, prim->ssp_assoc_id);
+   if (!asoc)
+   return -EINVAL;
+   if (!asoc->peer.primary_path)
+   return -ENOTCONN;
+
+   memcpy(>ssp_addr, >peer.primary_path->ipaddr,
+   asoc->peer.primary_path->af_specific->sockaddr_len);
+
+   sctp_get_pf_specific(sk->sk_family)->addr_to_user(sctp_sk(sk),
+   (union sctp_addr *)>ssp_addr);
+   return 0;
+}
+
+int sctp_sock_get_primary_addr(struct sock *sk, struct sctp_prim *prim)
+{
+   int ret;
+
+   lock_sock(sk);
+   ret = __sctp_sock_get_primary_addr(sk, prim);
+   release_sock(sk);
+   return ret;
+}
+EXPORT_SYMBOL(sctp_sock_get_primary_addr);
+
 /* 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR)
  *
  * Requests that the local SCTP stack use the enclosed peer address as
@@ -6421,35 +6450,19 @@ static int sctp_getsockopt_primary_addr(struct sock 
*sk, int len,
char __user *optval, int __user *optlen)
 {
struct sctp_prim prim;
-   struct sctp_association *asoc;
-   struct sctp_sock *sp = sctp_sk(sk);
+   int ret;
 
if (len < sizeof(struct sctp_prim))
return -EINVAL;
-
-   len = sizeof(struct sctp_prim);
-
-   if (copy_from_user(, optval, len))
+   if (copy_from_user(, optval, sizeof(struct sctp_prim)))
return -EFAULT;
 
-   asoc = sctp_id2assoc(sk, prim.ssp_assoc_id);
-   if (!asoc)
-   return -EINVAL;
-
-   if (!asoc->peer.primary_path)
-   return -ENOTCONN;
-
-   memcpy(_addr, >peer.primary_path->ipaddr,
-   asoc->peer.primary_path->af_specific->sockaddr_len);
-
-   sctp_get_pf_specific(sk->sk_family)->addr_to_user(sp,
-   (union sctp_addr *)_addr);
+   ret = __sctp_sock_get_primary_addr(sk, );
+   if (ret)
+   return ret;
 
-   if (put_user(len, optlen))
+   if (put_user(len, optlen) || copy_to_user(optval, , len))
return -EFAULT;
-   if (copy_to_user(optval, , len))
-   return -EFAULT;
-
return 0;
 }
 
-- 
2.26.2



[Cluster-devel] [PATCH 28/33] sctp: add sctp_sock_set_nodelay

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the SCTP_NODELAY sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 fs/dlm/lowcomms.c   | 10 ++
 include/net/sctp/sctp.h |  1 +
 net/sctp/socket.c   |  8 
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index e4939d770df53..6fa45365666a8 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1034,7 +1034,6 @@ static int sctp_bind_addrs(struct connection *con, 
uint16_t port)
 static void sctp_connect_to_sock(struct connection *con)
 {
struct sockaddr_storage daddr;
-   int one = 1;
int result;
int addr_len;
struct socket *sock;
@@ -1081,8 +1080,7 @@ static void sctp_connect_to_sock(struct connection *con)
log_print("connecting to %d", con->nodeid);
 
/* Turn off Nagle's algorithm */
-   kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *),
- sizeof(one));
+   sctp_sock_set_nodelay(sock->sk, true);
 
/*
 * Make sock->ops->connect() function return in specified time,
@@ -1296,7 +1294,6 @@ static int sctp_listen_for_all(void)
struct socket *sock = NULL;
int result = -EINVAL;
struct connection *con = nodeid2con(0, GFP_NOFS);
-   int one = 1;
 
if (!con)
return -ENOMEM;
@@ -1311,10 +1308,7 @@ static int sctp_listen_for_all(void)
}
 
sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
-   result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *),
-  sizeof(one));
-   if (result < 0)
-   log_print("Could not set SCTP NODELAY error %d\n", result);
+   sctp_sock_set_nodelay(sock->sk, true);
 
write_lock_bh(>sk->sk_callback_lock);
/* Init con struct */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index f702b14d768ba..b505fa082f254 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -617,5 +617,6 @@ static inline bool sctp_newsk_ready(const struct sock *sk)
 
 int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *kaddrs,
int addrs_size, int op);
+void sctp_sock_set_nodelay(struct sock *sk, bool val);
 
 #endif /* __net_sctp_h__ */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 30c981d9f6158..64c395f7a86d5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3066,6 +3066,14 @@ static int sctp_setsockopt_nodelay(struct sock *sk, char 
__user *optval,
return 0;
 }
 
+void sctp_sock_set_nodelay(struct sock *sk, bool val)
+{
+   lock_sock(sk);
+   sctp_sk(sk)->nodelay = val;
+   release_sock(sk);
+}
+EXPORT_SYMBOL(sctp_sock_set_nodelay);
+
 /*
  *
  * 7.1.1 SCTP_RTOINFO
-- 
2.26.2



[Cluster-devel] [PATCH 27/33] sctp: export sctp_setsockopt_bindx

2020-05-13 Thread Christoph Hellwig
And call it directly from dlm instead of going through kernel_setsockopt.

Signed-off-by: Christoph Hellwig 
---
 fs/dlm/lowcomms.c   | 13 -
 include/net/sctp/sctp.h |  3 +++
 net/sctp/socket.c   |  5 +++--
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index b722a09a7ca05..e4939d770df53 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1005,14 +1005,17 @@ static int sctp_bind_addrs(struct connection *con, 
uint16_t port)
memcpy(, dlm_local_addr[i], sizeof(localaddr));
make_sockaddr(, port, _len);
 
-   if (!i)
+   if (!i) {
result = kernel_bind(con->sock,
 (struct sockaddr *),
 addr_len);
-   else
-   result = kernel_setsockopt(con->sock, SOL_SCTP,
-  SCTP_SOCKOPT_BINDX_ADD,
-  (char *), 
addr_len);
+   } else {
+   lock_sock(con->sock->sk);
+   result = sctp_setsockopt_bindx(con->sock->sk,
+   (struct sockaddr *), addr_len,
+   SCTP_BINDX_ADD_ADDR);
+   release_sock(con->sock->sk);
+   }
 
if (result < 0) {
log_print("Can't bind to %d addr number %d, %d.\n",
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 3ab5c6bbb90bd..f702b14d768ba 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -615,4 +615,7 @@ static inline bool sctp_newsk_ready(const struct sock *sk)
return sock_flag(sk, SOCK_DEAD) || sk->sk_socket;
 }
 
+int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *kaddrs,
+   int addrs_size, int op);
+
 #endif /* __net_sctp_h__ */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 1c96b52c4aa28..30c981d9f6158 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -979,8 +979,8 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct 
sctp_sockaddr_entry *addrw)
  *
  * Returns 0 if ok, <0 errno code on error.
  */
-static int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *kaddrs,
-int addrs_size, int op)
+int sctp_setsockopt_bindx(struct sock *sk, struct sockaddr *kaddrs,
+   int addrs_size, int op)
 {
int err;
int addrcnt = 0;
@@ -1032,6 +1032,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, struct 
sockaddr *kaddrs,
return -EINVAL;
}
 }
+EXPORT_SYMBOL(sctp_setsockopt_bindx);
 
 static int sctp_connect_new_asoc(struct sctp_endpoint *ep,
 const union sctp_addr *daddr,
-- 
2.26.2



[Cluster-devel] [PATCH 23/33] ipv6: add ip6_sock_set_recverr

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the IPV6_RECVERR sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 include/net/ipv6.h   |  1 +
 net/ipv6/ipv6_sockglue.c | 10 ++
 net/rxrpc/local_object.c | 10 ++
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e24b59201a00d..69bc1651aaef8 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1176,5 +1176,6 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
  const struct in6_addr *addr);
 
 int ip6_sock_set_v6only(struct sock *sk, bool val);
+void ip6_sock_set_recverr(struct sock *sk, bool val);
 
 #endif /* _NET_IPV6_H */
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index f26224bb3e098..3c67626b6f5a9 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -147,6 +147,16 @@ int ip6_sock_set_v6only(struct sock *sk, bool val)
 }
 EXPORT_SYMBOL(ip6_sock_set_v6only);
 
+void ip6_sock_set_recverr(struct sock *sk, bool val)
+{
+   lock_sock(sk);
+   inet6_sk(sk)->recverr = val;
+   if (!val)
+   skb_queue_purge(>sk_error_queue);
+   release_sock(sk);
+}
+EXPORT_SYMBOL(ip6_sock_set_recverr);
+
 static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
 {
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 20236ddecd2ef..5e356a63aa791 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -107,7 +107,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct 
rxrpc_net *rxnet,
 static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 {
struct sock *usk;
-   int ret, opt;
+   int ret;
 
_enter("%p{%d,%d}",
   local, local->srx.transport_type, local->srx.transport.family);
@@ -157,13 +157,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, 
struct net *net)
switch (local->srx.transport.family) {
case AF_INET6:
/* we want to receive ICMPv6 errors */
-   opt = 1;
-   ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR,
-   (char *) , sizeof(opt));
-   if (ret < 0) {
-   _debug("setsockopt failed");
-   goto error;
-   }
+   ip6_sock_set_recverr(local->socket->sk, true);
 
/* Fall through and set IPv4 options too otherwise we don't get
 * errors from IPv4 packets sent through the IPv6 socket.
-- 
2.26.2



[Cluster-devel] [PATCH 29/33] rxrpc_sock_set_min_security_level

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the RXRPC_MIN_SECURITY_LEVEL sockopt from
kernel space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 fs/afs/rxrpc.c |  6 ++
 include/net/af_rxrpc.h |  2 ++
 net/rxrpc/af_rxrpc.c   | 13 +
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 1ecc67da6c1a4..7dfcbd58da85c 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -37,7 +37,6 @@ int afs_open_socket(struct afs_net *net)
 {
struct sockaddr_rxrpc srx;
struct socket *socket;
-   unsigned int min_level;
int ret;
 
_enter("");
@@ -57,9 +56,8 @@ int afs_open_socket(struct afs_net *net)
srx.transport.sin6.sin6_family  = AF_INET6;
srx.transport.sin6.sin6_port= htons(AFS_CM_PORT);
 
-   min_level = RXRPC_SECURITY_ENCRYPT;
-   ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
-   (void *)_level, sizeof(min_level));
+   ret = rxrpc_sock_set_min_security_level(socket->sk,
+   RXRPC_SECURITY_ENCRYPT);
if (ret < 0)
goto error_2;
 
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 04e97bab6f28b..8d7b469453bda 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -72,4 +72,6 @@ bool rxrpc_kernel_call_is_complete(struct rxrpc_call *);
 void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *,
   unsigned long);
 
+int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val);
+
 #endif /* _NET_RXRPC_H */
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 15ee92d795815..394189b81849f 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -571,6 +571,19 @@ static int rxrpc_sendmsg(struct socket *sock, struct 
msghdr *m, size_t len)
return ret;
 }
 
+int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val)
+{
+   if (sk->sk_state != RXRPC_UNBOUND)
+   return -EISCONN;
+   if (val > RXRPC_SECURITY_MAX)
+   return -EINVAL;
+   lock_sock(sk);
+   rxrpc_sk(sk)->min_sec_level = val;
+   release_sock(sk);
+   return 0;
+}
+EXPORT_SYMBOL(rxrpc_sock_set_min_security_level);
+
 /*
  * set RxRPC socket options
  */
-- 
2.26.2



[Cluster-devel] [PATCH 20/33] ipv4: add ip_sock_set_recverr

2020-05-13 Thread Christoph Hellwig
Add a helper to directly set the IP_RECVERR sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig 
---
 include/net/ip.h |  1 +
 net/ipv4/ip_sockglue.c   | 10 ++
 net/rxrpc/local_object.c |  8 +---
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 1e2feca8630d0..7ab8140b54429 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -767,5 +767,6 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
 
 void ip_sock_set_tos(struct sock *sk, int val);
 void ip_sock_set_freebind(struct sock *sk, bool val);
+void ip_sock_set_recverr(struct sock *sk, bool val);
 
 #endif /* _IP_H */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 0c40887a817f8..9abecc3195520 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -589,6 +589,16 @@ void ip_sock_set_freebind(struct sock *sk, bool val)
 }
 EXPORT_SYMBOL(ip_sock_set_freebind);
 
+void ip_sock_set_recverr(struct sock *sk, bool val)
+{
+   lock_sock(sk);
+   inet_sk(sk)->recverr = val;
+   if (!val)
+   skb_queue_purge(>sk_error_queue);
+   release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_recverr);
+
 /*
  * Socket option code for IP. This is the end of the line after any
  * TCP,UDP etc options on an IP socket.
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 562ea36c96b0f..1b87b8a9ff725 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -171,13 +171,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, 
struct net *net)
/* Fall through */
case AF_INET:
/* we want to receive ICMP errors */
-   opt = 1;
-   ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
-   (char *) , sizeof(opt));
-   if (ret < 0) {
-   _debug("setsockopt failed");
-   goto error;
-   }
+   ip_sock_set_recverr(local->socket->sk, true);
 
/* we want to set the don't fragment bit */
opt = IP_PMTUDISC_DO;
-- 
2.26.2



[Cluster-devel] [PATCH 31/33] net: remove kernel_setsockopt

2020-05-13 Thread Christoph Hellwig
No users left.

Signed-off-by: Christoph Hellwig 
---
 include/linux/net.h |  2 --
 net/socket.c| 31 ---
 2 files changed, 33 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 6451425e828f5..ece7513326293 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -305,8 +305,6 @@ int kernel_getsockname(struct socket *sock, struct sockaddr 
*addr);
 int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
 int kernel_getsockopt(struct socket *sock, int level, int optname, char 
*optval,
  int *optlen);
-int kernel_setsockopt(struct socket *sock, int level, int optname, char 
*optval,
- unsigned int optlen);
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
 int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
diff --git a/net/socket.c b/net/socket.c
index 1c9a7260a41de..f37c3ef508691 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3749,37 +3749,6 @@ int kernel_getsockopt(struct socket *sock, int level, 
int optname,
 }
 EXPORT_SYMBOL(kernel_getsockopt);
 
-/**
- * kernel_setsockopt - set a socket option (kernel space)
- * @sock: socket
- * @level: API level (SOL_SOCKET, ...)
- * @optname: option tag
- * @optval: option value
- * @optlen: option length
- *
- * Returns 0 or an error.
- */
-
-int kernel_setsockopt(struct socket *sock, int level, int optname,
-   char *optval, unsigned int optlen)
-{
-   mm_segment_t oldfs = get_fs();
-   char __user *uoptval;
-   int err;
-
-   uoptval = (char __user __force *) optval;
-
-   set_fs(KERNEL_DS);
-   if (level == SOL_SOCKET)
-   err = sock_setsockopt(sock, level, optname, uoptval, optlen);
-   else
-   err = sock->ops->setsockopt(sock, level, optname, uoptval,
-   optlen);
-   set_fs(oldfs);
-   return err;
-}
-EXPORT_SYMBOL(kernel_setsockopt);
-
 /**
  * kernel_sendpage - send a  through a socket (kernel space)
  * @sock: socket
-- 
2.26.2