> -Original Message-
> From: Stephen Hemminger [mailto:step...@networkplumber.org]
> Sent: Thursday, May 18, 2017 9:25 AM
> To: KY Srinivasan ; gre...@linuxfoundation.org
> Cc: de...@linuxdriverproject.org; Stephen Hemminger
>
> Subject: [PATCH 2/5] vmbus: implement lock-less ring buffer
>
> Use a reservation similar to ftrace to make vmbus ring buffer writes
> lock free.
>
> The algorithm uses cmpxchg to atomically reserve an area in the ring
> buffer. Then the data is copied into the ring, and the updates to the
> head of the ring are ordered.
>
> Other similar implementions are FreeBSD buf_ring, and DPDK rte_ring.
>
> Signed-off-by: Stephen Hemminger
> ---
> drivers/hv/ring_buffer.c | 192 +--
>
> include/linux/hyperv.h | 1 -
> 2 files changed, 84 insertions(+), 109 deletions(-)
>
> diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
> index f29981764653..7ca1f750e037 100644
> --- a/drivers/hv/ring_buffer.c
> +++ b/drivers/hv/ring_buffer.c
> @@ -77,68 +77,6 @@ static void hv_signal_on_write(u32 old_write, struct
> vmbus_channel *channel)
> vmbus_setevent(channel);
> }
>
> -/* Get the next write location for the specified ring buffer. */
> -static inline u32
> -hv_get_next_write_location(struct hv_ring_buffer_info *ring_info)
> -{
> - u32 next = ring_info->ring_buffer->write_index;
> -
> - return next;
> -}
> -
> -/* Set the next write location for the specified ring buffer. */
> -static inline void
> -hv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
> - u32 next_write_location)
> -{
> - ring_info->ring_buffer->write_index = next_write_location;
> -}
> -
> -/* Set the next read location for the specified ring buffer. */
> -static inline void
> -hv_set_next_read_location(struct hv_ring_buffer_info *ring_info,
> - u32 next_read_location)
> -{
> - ring_info->ring_buffer->read_index = next_read_location;
> - ring_info->priv_read_index = next_read_location;
> -}
> -
> -/* Get the size of the ring buffer. */
> -static inline u32
> -hv_get_ring_buffersize(const struct hv_ring_buffer_info *ring_info)
> -{
> - return ring_info->ring_datasize;
> -}
> -
> -/* Get the read and write indices as u64 of the specified ring buffer. */
> -static inline u64
> -hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
> -{
> - return (u64)ring_info->ring_buffer->write_index << 32;
> -}
> -
> -/*
> - * Helper routine to copy from source to ring buffer.
> - * Assume there is enough room. Handles wrap-around in dest case only!!
> - */
> -static u32 hv_copyto_ringbuffer(
> - struct hv_ring_buffer_info *ring_info,
> - u32 start_write_offset,
> - const void *src,
> - u32 srclen)
> -{
> - void *ring_buffer = hv_get_ring_buffer(ring_info);
> - u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
> -
> - memcpy(ring_buffer + start_write_offset, src, srclen);
> -
> - start_write_offset += srclen;
> - if (start_write_offset >= ring_buffer_size)
> - start_write_offset -= ring_buffer_size;
> -
> - return start_write_offset;
> -}
> -
> /* Get various debug metrics for the specified ring buffer. */
> void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info
> *ring_info,
>struct hv_ring_buffer_debug_info
> *debug_info)
> @@ -206,8 +144,6 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info
> *ring_info,
> ring_info->ring_datasize = ring_info->ring_size -
> sizeof(struct hv_ring_buffer);
>
> - spin_lock_init(&ring_info->ring_lock);
> -
> return 0;
> }
>
> @@ -217,72 +153,112 @@ void hv_ringbuffer_cleanup(struct
> hv_ring_buffer_info *ring_info)
> vunmap(ring_info->ring_buffer);
> }
>
> -/* Write to the ring buffer. */
> +/*
> + * Multiple producer lock-free ring buffer write
> + *
> + * There are two write locations: when no CPU is writing to ring both
> + * are equal.
> + * ring_buffer_info->priv_write_index - next writer's tail offset
> + * ring_buffer->write_index - reader's tail offset
> + *
> + * The write goes through three stages:
> + * 1. Reserve space in ring buffer for the new data.
> + * Writer atomically moves priv_write_index.
> + * 2. Copy the new data into the ring.
> + * 3. Update the tail of the ring (visible to host) that indicates
> + * next read location. Writer updates write_index
> + *
> + * This function can be safely called from softirq.
> + */
> int hv_ringbuffer_write(struct vmbus_channel *channel,
> const struct kvec *kv_list, u32 kv_count)
> {
> - int i;
> - u32 bytes_avail_towrite;
> - u32 totalbytes_towrite = sizeof(u64);
> - u32 next_write_location;
> - u32 old_write;
> - u64 prev_indices;
> + struct hv_ring_buffer_info *outring = &channel->outbo