Jason Wang <jasow...@redhat.com> writes:

> On Wed, Feb 4, 2015 at 1:00 AM, Vitaly Kuznetsov <vkuzn...@redhat.com>
> wrote:
>> free_channel() function frees the channel unconditionally so we need
>> to make
>> sure nobody has any link to it. This is not trivial and there are
>> several
>> examples of races we have:
>>
>> 1) In vmbus_onoffer_rescind() we check for channel existence with
>>    relid2channel() and then use it. This can go wrong if we're in
>> the middle
>>    of channel removal (free_channel() was already called).
>>
>> 2) In process_chn_event() we check for channel existence with
>>    pcpu_relid2channel() and then use it. This can also go wrong.
>>
>> 3) vmbus_free_channels() just frees all channels, in case we're in
>> the middle
>>    of vmbus_process_rescind_offer() crash is possible.
>>
>> The issue can be solved by holding vmbus_connection.channel_lock
>> everywhere,
>> however, it looks like a way to deadlocks and performance
>> degradation. Get/put
>> workflow fits here the best.
>>
>> Implement vmbus_get_channel()/vmbus_put_channel() pair instead of
>> free_channel().
>>
>> Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com>
>> ---
>>  drivers/hv/channel_mgmt.c | 45
>> ++++++++++++++++++++++++++++++++++++++-------
>>  drivers/hv/connection.c   |  7 +++++--
>>  drivers/hv/hyperv_vmbus.h |  4 ++++
>>  include/linux/hyperv.h    | 13 +++++++++++++
>>  4 files changed, 60 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
>> index 36bacc7..eb9ce94 100644
>> --- a/drivers/hv/channel_mgmt.c
>> +++ b/drivers/hv/channel_mgmt.c
>> @@ -147,6 +147,8 @@ static struct vmbus_channel *alloc_channel(void)
>>              return NULL;
>>      channel->id = atomic_inc_return(&chan_num);
>> +    atomic_set(&channel->count, 1);
>> +
>>      spin_lock_init(&channel->inbound_lock);
>>      spin_lock_init(&channel->lock);
>>  @@ -178,19 +180,47 @@ static void release_channel(struct
>> work_struct *work)
>>  }
>>   /*
>> - * free_channel - Release the resources used by the vmbus channel
>> object
>> + * vmbus_put_channel - Decrease the channel usage counter and
>> release the
>> + * resources when this counter reaches zero.
>>   */
>> -static void free_channel(struct vmbus_channel *channel)
>> +void vmbus_put_channel(struct vmbus_channel *channel)
>>  {
>> +    unsigned long flags;
>>      /*
>>       * We have to release the channel's workqueue/thread in the vmbus's
>>       * workqueue/thread context
>>       * ie we can't destroy ourselves.
>>       */
>> -    INIT_WORK(&channel->work, release_channel);
>> -    queue_work(vmbus_connection.work_queue, &channel->work);
>> +    spin_lock_irqsave(&channel->lock, flags);
>> +    if (atomic_dec_and_test(&channel->count)) {
>> +            channel->dying = true;
>> +            INIT_WORK(&channel->work, release_channel);
>> +            spin_unlock_irqrestore(&channel->lock, flags);
>> +            queue_work(vmbus_connection.work_queue, &channel->work);
>> +    } else
>> +            spin_unlock_irqrestore(&channel->lock, flags);
>> +}
>> +EXPORT_SYMBOL_GPL(vmbus_put_channel);
>> +
>> +/* vmbus_get_channel - Get additional reference to the channel */
>> +struct vmbus_channel *vmbus_get_channel(struct vmbus_channel
>> *channel)
>> +{
>> +    unsigned long flags;
>> +    struct vmbus_channel *ret = NULL;
>> +
>> +    if (!channel)
>> +            return NULL;
>> +
>> +    spin_lock_irqsave(&channel->lock, flags);
>> +    if (!channel->dying) {
>> +            atomic_inc(&channel->count);
>> +            ret = channel;
>> +    }
>> +    spin_unlock_irqrestore(&channel->lock, flags);
>
> Looks like we can use atomic_inc_return_safe() here to avoid extra
> dying. And then there's also no need for the spinlock.
>
> if (atomic_inc_return_safe(&channel->count) > 0)
>       return channel;
> else
>       return NULL;

Good idea, thanks! I'll try.

>>
>> +    return ret;
>>  }
>> +EXPORT_SYMBOL_GPL(vmbus_get_channel);
>>   static void percpu_channel_enq(void *arg)
>>  {
>> @@ -253,7 +283,7 @@ static void vmbus_process_rescind_offer(struct
>> work_struct *work)
>>              list_del(&channel->sc_list);
>>              spin_unlock_irqrestore(&primary_channel->lock, flags);
>>      }
>> -    free_channel(channel);
>> +    vmbus_put_channel(channel);
>>  }
>>   void vmbus_free_channels(void)
>> @@ -262,7 +292,7 @@ void vmbus_free_channels(void)
>>      list_for_each_entry(channel, &vmbus_connection.chn_list,
>> listentry) {
>>              vmbus_device_unregister(channel->device_obj);
>> -            free_channel(channel);
>> +            vmbus_put_channel(channel);
>>      }
>>  }
>>  @@ -391,7 +421,7 @@ done_init_rescind:
>>      spin_unlock_irqrestore(&newchannel->lock, flags);
>>      return;
>>  err_free_chan:
>> -    free_channel(newchannel);
>> +    vmbus_put_channel(newchannel);
>>  }
>>   enum {
>> @@ -549,6 +579,7 @@ static void vmbus_onoffer_rescind(struct
>> vmbus_channel_message_header *hdr)
>>              queue_work(channel->controlwq, &channel->work);
>>      spin_unlock_irqrestore(&channel->lock, flags);
>> +    vmbus_put_channel(channel);
>>  }
>>   /*
>> diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
>> index c4acd1c..d1ce134 100644
>> --- a/drivers/hv/connection.c
>> +++ b/drivers/hv/connection.c
>> @@ -247,7 +247,8 @@ void vmbus_disconnect(void)
>>   * Map the given relid to the corresponding channel based on the
>>   * per-cpu list of channels that have been affinitized to this CPU.
>>   * This will be used in the channel callback path as we can do this
>> - * mapping in a lock-free fashion.
>> + * mapping in a lock-free fashion. Takes additional reference to the
>> + * channel, all users are supposed to do vmbus_put_channel().
>>   */
>>  static struct vmbus_channel *pcpu_relid2channel(u32 relid)
>>  {
>> @@ -263,7 +264,7 @@ static struct vmbus_channel
>> *pcpu_relid2channel(u32 relid)
>>              }
>>      }
>>  -   return found_channel;
>> +    return vmbus_get_channel(found_channel);
>>  }
>>   /*
>> @@ -297,6 +298,7 @@ struct vmbus_channel *relid2channel(u32 relid)
>>                      }
>>              }
>>      }
>> +    found_channel = vmbus_get_channel(found_channel);
>>      spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
>>      return found_channel;
>> @@ -360,6 +362,7 @@ static void process_chn_event(u32 relid)
>>              pr_err("no channel callback for relid - %u\n", relid);
>>      }
>>  +   vmbus_put_channel(channel);
>>  }
>>   /*
>> diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
>> index b055e53..40d70f0 100644
>> --- a/drivers/hv/hyperv_vmbus.h
>> +++ b/drivers/hv/hyperv_vmbus.h
>> @@ -687,6 +687,10 @@ void vmbus_device_unregister(struct hv_device
>> *device_obj);
>>  /* VmbusChildDeviceDestroy( */
>>  /* struct hv_device *); */
>>  +/*
>> + * Get the channel by its relid. Takes additional reference to the
>> channel so
>> + * all users are supposed to do vmbus_put_channel() when they're
>> done.
>> + */
>>  struct vmbus_channel *relid2channel(u32 relid);
>>   void vmbus_free_channels(void);
>> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
>> index e73cfeb..c576d2d 100644
>> --- a/include/linux/hyperv.h
>> +++ b/include/linux/hyperv.h
>> @@ -649,6 +649,9 @@ struct vmbus_channel {
>>      /* Unique channel id */
>>      int id;
>>  +   /* Active reference count */
>> +    atomic_t count;
>> +
>>      struct list_head listentry;
>>      struct hv_device *device_obj;
>> @@ -666,6 +669,7 @@ struct vmbus_channel {
>>      u8 monitor_bit;
>>      bool rescind; /* got rescind msg */
>> +    bool dying; /* channel is dying */
>>      u32 ringbuffer_gpadlhandle;
>>  @@ -907,6 +911,15 @@ extern int vmbus_recvpacket_raw(struct
>> vmbus_channel *channel,
>>   extern void vmbus_ontimer(unsigned long data);
>>  +/*
>> + * Decrease reference count for the channel. Frees the channel when
>> its usage
>> + * count reaches zero.
>> + */
>> +extern void vmbus_put_channel(struct vmbus_channel *channel);
>> +
>> +/* Get additional reference to the channel */
>> +extern struct vmbus_channel *vmbus_get_channel(struct vmbus_channel
>> *channel);
>> +
>>  /* Base driver object */
>>  struct hv_driver {
>>      const char *name;
>> -- 
>> 1.9.3
>>

-- 
  Vitaly
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to