On Wed, 2012-09-05 at 17:13 +0200, Paolo Bonzini wrote:
> transport_generic_get_mem's strategy of allocating pages one-by-one
> for the data scatterlist may fail for even not-so-big data transfers
> if the underlying device poses a small limit on the number of segments.
> 
> This patch fixes this problem by trying to allocate pages in relatively
> large groups (at most 512 pages right now, equivalent to 2 MB per
> allocation), but still without any slack.  The remainder is then
> satisfied with subsequent smaller allocations.  For example, if the
> data to be transferred is 132 KB, the new algorithm will attempt
> a single 128 KB allocation, followed by a 4 KB allocation.
> In case a contiguous block cannot be found, it will fall back to
> a progressively smaller order.
> 
> Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
> ---
>  drivers/target/target_core_transport.c |   58 
> +++++++++++++++++++++++++++-----
>  1 files changed, 49 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/target/target_core_transport.c 
> b/drivers/target/target_core_transport.c
> index 09028af..6e90e2c 100644
> --- a/drivers/target/target_core_transport.c
> +++ b/drivers/target/target_core_transport.c
> @@ -2058,7 +2058,7 @@ static inline void transport_free_sgl(struct 
> scatterlist *sgl, int nents)
>       int count;
>  
>       for_each_sg(sgl, sg, nents, count)
> -             __free_page(sg_page(sg));
> +             __free_pages(sg_page(sg), get_order(sg->length));
>  
>       kfree(sgl);
>  }
> @@ -2229,41 +2229,81 @@ void transport_kunmap_data_sg(struct se_cmd *cmd)
>  }
>  EXPORT_SYMBOL(transport_kunmap_data_sg);
>  
> +#define TRANSPORT_MAX_ORDER  9
> +
> +static inline int
> +get_order_no_slack(u32 length)
> +{
> +     /*
> +      * get_order gives the smallest order that length fits in.
> +      * We want instead the largest order that does not give
> +      * any slack.
> +      *
> +      * Example (with PAGE_SIZE = 4096, PAGE_SHIFT = 12):
> +      *      get_order_no_slack(0x1000) = fls(0) = 0, get_order(0x1000) = 0
> +      *      get_order_no_slack(0x1001) = fls(0) = 0, get_order(0x1001) = 1
> +      *      get_order_no_slack(0x1fff) = fls(0) = 0, get_order(0x1fff) = 1
> +      *      get_order_no_slack(0x2000) = fls(1) = 1, get_order(0x2000) = 1
> +      *      get_order_no_slack(0x3000) = fls(1) = 1, get_order(0x3000) = 2
> +      *      get_order_no_slack(0x4000) = fls(2) = 2, get_order(0x4000) = 2
> +      *      get_order_no_slack(0x8000) = fls(4) = 3, get_order(0x8000) = 3
> +      */
> +     length >>= PAGE_SHIFT + 1;
> +     return fls(length);
> +}
> +
>  static int
>  transport_generic_get_mem(struct se_cmd *cmd)
>  {
>       u32 length = cmd->data_length;
> +     u32 page_len;
>       unsigned int nents;
>       struct page *page;
>       gfp_t zero_flag;
>       int i = 0;
> +     int order, max_order;
>  
>       nents = DIV_ROUND_UP(length, PAGE_SIZE);
>       cmd->t_data_sg = kmalloc(sizeof(struct scatterlist) * nents, 
> GFP_KERNEL);
>       if (!cmd->t_data_sg)
>               return -ENOMEM;
>  
> -     cmd->t_data_nents = nents;
>       sg_init_table(cmd->t_data_sg, nents);
>  
>       zero_flag = cmd->se_cmd_flags & SCF_SCSI_DATA_CDB ? 0 : __GFP_ZERO;
>  
> +     max_order = TRANSPORT_MAX_ORDER;

Please go ahead and turn max_order into an DEF_DEV_ATTRIB within
target_core_configfs.c, so we're able to change this value on the fly
using an per device configfs attr.

>       while (length) {
> -             u32 page_len = min_t(u32, length, PAGE_SIZE);
> -             page = alloc_page(GFP_KERNEL | zero_flag);
> -             if (!page)
> -                     goto out;
> +             order = get_order_no_slack(length);
> +             for (;;) {
> +                     order = min(order, max_order);
> +                     page_len = min_t(u32, length, PAGE_SIZE << order);
> +                     page = alloc_pages(GFP_KERNEL | zero_flag, order);
> +                     if (page)
> +                             break;
> +
> +                     if (!order)
> +                             goto out;
> +
> +                     /*
> +                      * Allocation failed, only try with a smaller order
> +                      * from this point.
> +                      */
> +                     max_order = order - 1;
> +             }

Seems reasonable to back-off in reverse max_order in the face of large
order allocation failures..

>  
>               sg_set_page(&cmd->t_data_sg[i], page, page_len, 0);
>               length -= page_len;
>               i++;
>       }
> +     cmd->t_data_nents = i;
> +     sg_mark_end(&cmd->t_data_sg[i - 1]);
>       return 0;
>  
>  out:
> -     while (i > 0) {
> -             i--;
> -             __free_page(sg_page(&cmd->t_data_sg[i]));
> +     while (i-- > 0) {
> +             struct scatterlist *sg = &cmd->t_data_sg[i];
> +             __free_pages(sg_page(sg), get_order(sg->length));
>       }
>       kfree(cmd->t_data_sg);
>       cmd->t_data_sg = NULL;

So what I'd like to see for this patch in the short term is a new
target_core_fabrics_op bit that is (by default) assuming single order
SGL page allocations, that can optionally be enabled for iscsi-target +
other fabrics that support it as we bring other multi-page SGLs fabric
drivers online.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to