Hi,

David Ahern wrote:
> Currently, all ipv6 addresses are flushed when the interface is configured
> down, including global, static addresses:
> 
>     $ ip -6 addr add dev eth1 2000:11:1:1::1/64
>     $ ip addr show dev eth1
>     3: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group 
> default qlen 1000
>         link/ether 02:04:11:22:33:01 brd ff:ff:ff:ff:ff:ff
>         inet6 2000:11:1:1::1/64 scope global tentative
>            valid_lft forever preferred_lft forever
>     $ ip link set dev eth1 up
>     $ ip link set dev eth1 down
>     $ ip addr show dev eth1
>     3: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc pfifo_fast state DOWN group 
> default qlen 1000
>         link/ether 02:04:11:22:33:01 brd ff:ff:ff:ff:ff:ff
> 
> Add a new sysctl to make this behavior optional. The new setting defaults to
> flush all addresses to maintain backwards compatibility. When the set global
> addresses with no expire times are not flushed on an admin down:
> 
>     $ echo 1 > /proc/sys/net/ipv6/conf/eth1/keep_addr_on_down
>     $ ip -6 addr add dev eth1 2000:11:1:1::1/64
>     $ ip addr show dev eth1
>     3: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc pfifo_fast state DOWN group 
> default qlen 1000
>         link/ether 02:04:11:22:33:01 brd ff:ff:ff:ff:ff:ff
>         inet6 2000:11:1:1::1/64 scope global tentative
>            valid_lft forever preferred_lft forever
>     $ ip link set dev eth1 up
>     $ ip link set dev eth1 down
>     $ ip addr show dev eth1
>     3: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc pfifo_fast state DOWN group 
> default qlen 1000
>         link/ether 02:04:11:22:33:01 brd ff:ff:ff:ff:ff:ff
>         inet6 2000:11:1:1::1/64 scope global
>            valid_lft forever preferred_lft forever
>         inet6 fe80::4:11ff:fe22:3301/64 scope link
>            valid_lft forever preferred_lft forever
> 
> Signed-off-by: David Ahern <d...@cumulusnetworks.com>
> ---
> Dave: per the discussion at netconf tossing this out again. While the
>       failure semantics are not ideal it only occurs on GFP_ATOMIC
>       memory failures.
:
> diff --git a/Documentation/networking/ip-sysctl.txt 
> b/Documentation/networking/ip-sysctl.txt
> index 24ce97f42d35..7ddbbb67f0db 100644
> --- a/Documentation/networking/ip-sysctl.txt
> +++ b/Documentation/networking/ip-sysctl.txt
> @@ -1563,6 +1563,12 @@ temp_prefered_lft - INTEGER
>       Preferred lifetime (in seconds) for temporary addresses.
>       Default: 86400 (1 day)
>  
> +keep_addr_on_down - BOOLEAN
> +     Keep all IPv6 addresses on an interface down event. If set static
> +     global addresses with no expiration time are not flushed.
> +
> +     Default: disabled
> +

How about this:
   1: enabled
   0: system default
  -1: disabled
so that an iterface can override system-wide config?

>  max_desync_factor - INTEGER
>       Maximum value for DESYNC_FACTOR, which is a random value
>       that ensures that clients don't synchronize with each
> diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
> index 4b2267e1b7c3..7edc14fb66b6 100644
> --- a/include/linux/ipv6.h
> +++ b/include/linux/ipv6.h
> @@ -62,6 +62,7 @@ struct ipv6_devconf {
>               struct in6_addr secret;
>       } stable_secret;
>       __s32           use_oif_addrs_only;
> +     __s32           keep_addr_on_down;
>       void            *sysctl;
>  };
>  
> diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
> index 1c8b6820b694..01ba6a286a4b 100644
> --- a/include/net/if_inet6.h
> +++ b/include/net/if_inet6.h
> @@ -72,6 +72,7 @@ struct inet6_ifaddr {
>       int                     regen_count;
>  
>       bool                    tokenized;
> +     bool                    user_managed;

Can't we use IFA_F_PERMANENT?

> diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
> index ec117b65d5a5..395876060f50 100644
> --- a/include/uapi/linux/ipv6.h
> +++ b/include/uapi/linux/ipv6.h
> @@ -176,6 +176,7 @@ enum {
>       DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN,
>       DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
>       DEVCONF_DROP_UNSOLICITED_NA,
> +     DEVCONF_KEEP_ADDR_ON_DOWN,
>       DEVCONF_MAX
>  };
>  
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index ac0ba9e4e06b..0bcb0f538e54 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -216,6 +216,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
>       },
>       .use_oif_addrs_only     = 0,
>       .ignore_routes_with_linkdown = 0,
> +     .keep_addr_on_down      = 0,
>  };
>  
>  static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
> @@ -260,6 +261,7 @@ static struct ipv6_devconf ipv6_devconf_dflt 
> __read_mostly = {
>       },
>       .use_oif_addrs_only     = 0,
>       .ignore_routes_with_linkdown = 0,
> +     .keep_addr_on_down      = 0,
>  };
>  
>  /* Check if a valid qdisc is available */
> @@ -962,6 +964,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct 
> in6_addr *addr,
>       ifa->prefered_lft = prefered_lft;
>       ifa->cstamp = ifa->tstamp = jiffies;
>       ifa->tokenized = false;
> +     ifa->user_managed = false;
>  
>       ifa->rt = rt;
>  
> @@ -2701,6 +2704,9 @@ static int inet6_addr_add(struct net *net, int ifindex,
>                           valid_lft, prefered_lft);
>  
>       if (!IS_ERR(ifp)) {
> +             if (!expires)
> +                     ifp->user_managed = true;
> +
>               if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
>                       addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
>                                             expires, flags);
> @@ -3168,6 +3174,55 @@ static void addrconf_gre_config(struct net_device *dev)
>  }
>  #endif
>  
> +static int fixup_user_managed_addr(struct inet6_dev *idev,
> +                                struct inet6_ifaddr *ifp)
> +{
> +     if (!ifp->rt) {
> +             struct rt6_info *rt;
> +
> +             rt = addrconf_dst_alloc(idev, &ifp->addr, false);
> +             if (unlikely(IS_ERR(rt)))
> +                     return PTR_ERR(rt);
> +
> +             ifp->rt = rt;
> +     }
> +
> +     if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
> +             addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
> +                                   idev->dev, 0, 0);
> +     }
> +
> +     addrconf_dad_start(ifp);
> +
> +     return 0;
> +}
> +
> +static void addrconf_user_managed_addr(struct net_device *dev)
> +{
> +     struct inet6_ifaddr *ifp, *tmp;
> +     struct inet6_dev *idev;
> +
> +     idev = __in6_dev_get(dev);
> +     if (!idev)
> +             return;
> +
> +     write_lock_bh(&idev->lock);
> +
> +     list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
> +             if (ifp->user_managed &&
> +                 fixup_user_managed_addr(idev, ifp) < 0) {
> +                     write_unlock_bh(&idev->lock);
> +                     ipv6_del_addr(ifp);
> +                     write_lock_bh(&idev->lock);
> +
> +                     net_info_ratelimited("%s: Failed to add prefix route 
> for address %pI6c; dropping\n",
> +                                          idev->dev->name, &ifp->addr);
> +             }
> +     }
> +
> +     write_unlock_bh(&idev->lock);
> +}
> +
>  static int addrconf_notify(struct notifier_block *this, unsigned long event,
>                          void *ptr)
>  {
> @@ -3253,6 +3308,8 @@ static int addrconf_notify(struct notifier_block *this, 
> unsigned long event,
>                       run_pending = 1;
>               }
>  
> +             addrconf_user_managed_addr(dev);
> +
>               switch (dev->type) {
>  #if IS_ENABLED(CONFIG_IPV6_SIT)
>               case ARPHRD_SIT:
> @@ -3356,7 +3413,9 @@ static int addrconf_ifdown(struct net_device *dev, int 
> how)
>  {
>       struct net *net = dev_net(dev);
>       struct inet6_dev *idev;
> -     struct inet6_ifaddr *ifa;
> +     struct inet6_ifaddr *ifa, *tmp;
> +     struct list_head del_list;
> +     int keep_addr;
>       int state, i;
>  
>       ASSERT_RTNL();
> @@ -3383,6 +3442,10 @@ static int addrconf_ifdown(struct net_device *dev, int 
> how)
>  
>       }
>  
> +     keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
> +     if (!keep_addr)
> +             keep_addr = idev->cnf.keep_addr_on_down;
> +
>       /* Step 2: clear hash table */
>       for (i = 0; i < IN6_ADDR_HSIZE; i++) {
>               struct hlist_head *h = &inet6_addr_lst[i];
> @@ -3391,9 +3454,12 @@ static int addrconf_ifdown(struct net_device *dev, int 
> how)
>  restart:
>               hlist_for_each_entry_rcu(ifa, h, addr_lst) {
>                       if (ifa->idev == idev) {
> -                             hlist_del_init_rcu(&ifa->addr_lst);
>                               addrconf_del_dad_work(ifa);
> -                             goto restart;
> +                             if (how || !keep_addr || !ifa->user_managed) {

keep_addr <= 0

> +                                     hlist_del_init_rcu(&ifa->addr_lst);
> +                                     goto restart;
> +                             }
> +
>                       }
>               }
>               spin_unlock_bh(&addrconf_hash_lock);
> @@ -3427,31 +3493,52 @@ static int addrconf_ifdown(struct net_device *dev, 
> int how)
>               write_lock_bh(&idev->lock);
>       }
>  
> -     while (!list_empty(&idev->addr_list)) {
> -             ifa = list_first_entry(&idev->addr_list,
> -                                    struct inet6_ifaddr, if_list);
> -             addrconf_del_dad_work(ifa);
> +     INIT_LIST_HEAD(&del_list);
> +     list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
> +             bool keep_ifa = false;
>  
> -             list_del(&ifa->if_list);
> +             if (!how && keep_addr && ifa->user_managed)

keep_addr > 0

etc...

> +                     keep_ifa = true;
>  
> -             write_unlock_bh(&idev->lock);
> +             addrconf_del_dad_work(ifa);
>  
> +             write_unlock_bh(&idev->lock);
>               spin_lock_bh(&ifa->lock);
> -             state = ifa->state;
> -             ifa->state = INET6_IFADDR_STATE_DEAD;
> +
> +             if (unlikely(keep_ifa)) {
> +                     /* set state to skip the notifier below */
> +                     state = INET6_IFADDR_STATE_DEAD;
> +                     ifa->state = 0;
> +                     if (!(ifa->flags & IFA_F_NODAD))
> +                             ifa->flags |= IFA_F_TENTATIVE;
> +             } else {
> +                     state = ifa->state;
> +                     ifa->state = INET6_IFADDR_STATE_DEAD;
> +
> +                     list_del(&ifa->if_list);
> +                     list_add(&ifa->if_list, &del_list);
> +             }
> +
>               spin_unlock_bh(&ifa->lock);
>  
>               if (state != INET6_IFADDR_STATE_DEAD) {
>                       __ipv6_ifa_notify(RTM_DELADDR, ifa);
>                       inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
>               }
> -             in6_ifa_put(ifa);
>  
>               write_lock_bh(&idev->lock);
>       }
>  
>       write_unlock_bh(&idev->lock);
>  
> +     while (!list_empty(&del_list)) {
> +             ifa = list_first_entry(&del_list,
> +                                    struct inet6_ifaddr, if_list);
> +             list_del(&ifa->if_list);
> +
> +             in6_ifa_put(ifa);
> +     }
> +
>       /* Step 5: Discard anycast and multicast list */
>       if (how) {
>               ipv6_ac_destroy_dev(idev);
> @@ -4713,6 +4800,7 @@ static inline void ipv6_store_devconf(struct 
> ipv6_devconf *cnf,
>       array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
>       array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = 
> cnf->drop_unicast_in_l2_multicast;
>       array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
> +     array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
>  }
>  
>  static inline size_t inet6_ifla6_size(void)
> @@ -5194,10 +5282,12 @@ static void __ipv6_ifa_notify(int event, struct 
> inet6_ifaddr *ifp)
>                       if (rt)
>                               ip6_del_rt(rt);
>               }
> -             dst_hold(&ifp->rt->dst);
> -
> -             ip6_del_rt(ifp->rt);
> +             if (ifp->rt) {
> +                     dst_hold(&ifp->rt->dst);
>  
> +                     ip6_del_rt(ifp->rt);
> +                     ifp->rt = NULL;
> +             }
>               rt_genid_bump_ipv6(net);
>               break;
>       }
> @@ -5801,6 +5891,14 @@ static struct addrconf_sysctl_table
>                       .proc_handler   = proc_dointvec,
>               },
>               {
> +                     .procname       = "keep_addr_on_down",
> +                     .data           = &ipv6_devconf.keep_addr_on_down,
> +                     .maxlen         = sizeof(int),
> +                     .mode           = 0644,
> +                     .proc_handler   = proc_dointvec,
> +
> +             },
> +             {
>                       /* sentinel */
>               }
>       },
> 

-- 
Hideaki Yoshifuji <hideaki.yoshif...@miraclelinux.com>
Technical Division, MIRACLE LINUX CORPORATION

Reply via email to