On 03/28/2018 07:21 AM, Daniel Borkmann wrote:
> On 03/27/2018 07:23 PM, John Fastabend wrote:
>> Add support for the BPF_F_INGRESS flag in skb redirect helper. To
>> do this convert skb into a scatterlist and push into ingress queue.
>> This is the same logic that is used in the sk_msg redirect helper
>> so it should feel familiar.
>>
>> Signed-off-by: John Fastabend <[email protected]>
>> ---
>> include/linux/filter.h | 1 +
>> kernel/bpf/sockmap.c | 94
>> +++++++++++++++++++++++++++++++++++++++---------
>> net/core/filter.c | 2 +
>> 3 files changed, 78 insertions(+), 19 deletions(-)
> [...]
>> if (!sg->length && md->sg_start == md->sg_end) {
>> list_del(&md->list);
>> + if (md->skb)
>> + consume_skb(md->skb);
>> kfree(md);
>> }
>> }
>> @@ -1045,27 +1048,72 @@ static int smap_verdict_func(struct smap_psock
>> *psock, struct sk_buff *skb)
>> __SK_DROP;
>> }
>>
>> +static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
>> +{
>> + struct sock *sk = psock->sock;
>> + int copied = 0, num_sg;
>> + struct sk_msg_buff *r;
>> +
>> + r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
>> + if (unlikely(!r))
>> + return -EAGAIN;
>> +
>> + if (!sk_rmem_schedule(sk, skb, skb->len)) {
>> + kfree(r);
>> + return -EAGAIN;
>> + }
>> + sk_mem_charge(sk, skb->len);
>
> Usually mem accounting is based on truesize. This is not done here since
> you need the exact length of the skb for the sg list later on, right?
Correct.
>
>> + sg_init_table(r->sg_data, MAX_SKB_FRAGS);
>> + num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
>> + if (unlikely(num_sg < 0)) {
>> + kfree(r);
>
> Don't we need to undo the mem charge here in case of error?
>
Actually, I'll just move the sk_mem_charge() down below this error
then we don't need to unwind it.
>> + return num_sg;
>> + }
>> + copied = skb->len;
>> + r->sg_start = 0;
>> + r->sg_end = num_sg == MAX_SKB_FRAGS ? 0 : num_sg;
>> + r->skb = skb;
>> + list_add_tail(&r->list, &psock->ingress);
>> + sk->sk_data_ready(sk);
>> + return copied;
>> +}