On 09/02/2015 12:33 PM, Ilya Dryomov wrote:
> Only ->alloc_msg() should check data_len of the incoming message
> against the preallocated ceph_msg, doing it in the messenger is not
> right.  The contract is that either ->alloc_msg() returns a ceph_msg
> which will fit all of the portions of the incoming message, or it
> returns NULL and possibly sets skip, signaling whether NULL is due to
> an -ENOMEM.  ->alloc_msg() should be the only place where we make the
> skip/no-skip decision.
> 
> I stumbled upon this while looking at con/osd ref counting.  Right now,
> if we get a non-extent message with a larger data portion than we are
> prepared for, ->alloc_msg() returns a ceph_msg, and then, when we skip
> it in the messenger, we don't put the con/osd ref acquired in
> ceph_con_in_msg_alloc() (which is normally put in process_message()),
> so this also fixes a memory leak.

Sorry for the delay reviewing this.

This looks good.  In a follow-on patch you could update the
comments at the top of ceph_con_in_msg_alloc() to suggest
that *skip may be set or not when this function returns,
without saying "if we set" (which says to me that this
function sets it directly).  Minor, but I think it could
be stated a little more clearly.  That comment block also
talks about a connection's "alloc_msg op if available"
but we assert that pointer is non-null, so that doesn't
really sound right either.

> An existing BUG_ON in ceph_msg_data_cursor_init() ensures we don't
> corrupt random memory should a buggy ->alloc_msg() return an unfit
> ceph_msg.

So this will catch the problem, just a little later.

Reviewed-by: Alex Elder <el...@linaro.org>

> While at it, I changed the "unknown tid" dout() to a pr_warn() to make
> sure all skips are seen and unified format strings.
> 
> Signed-off-by: Ilya Dryomov <idryo...@gmail.com>
> ---
>  net/ceph/messenger.c  |  7 -------
>  net/ceph/osd_client.c | 51 
> ++++++++++++++++++---------------------------------
>  2 files changed, 18 insertions(+), 40 deletions(-)
> 
> diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
> index 36757d46ac40..525f454f7531 100644
> --- a/net/ceph/messenger.c
> +++ b/net/ceph/messenger.c
> @@ -2337,13 +2337,6 @@ static int read_partial_message(struct ceph_connection 
> *con)
>                       return ret;
>  
>               BUG_ON(!con->in_msg ^ skip);
> -             if (con->in_msg && data_len > con->in_msg->data_length) {
> -                     pr_warn("%s skipping long message (%u > %zd)\n",
> -                             __func__, data_len, con->in_msg->data_length);
> -                     ceph_msg_put(con->in_msg);
> -                     con->in_msg = NULL;
> -                     skip = 1;
> -             }
>               if (skip) {
>                       /* skip this message */
>                       dout("alloc_msg said skip message\n");
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index 50033677c0fa..80b94e37c94a 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -2817,8 +2817,9 @@ out:
>  }
>  
>  /*
> - * lookup and return message for incoming reply.  set up reply message
> - * pages.
> + * Lookup and return message for incoming reply.  Don't try to do
> + * anything about a larger than preallocated data portion of the
> + * message at the moment - for now, just skip the message.
>   */
>  static struct ceph_msg *get_reply(struct ceph_connection *con,
>                                 struct ceph_msg_header *hdr,
> @@ -2836,10 +2837,10 @@ static struct ceph_msg *get_reply(struct 
> ceph_connection *con,
>       mutex_lock(&osdc->request_mutex);
>       req = __lookup_request(osdc, tid);
>       if (!req) {
> -             *skip = 1;
> +             pr_warn("%s osd%d tid %llu unknown, skipping\n",
> +                     __func__, osd->o_osd, tid);
>               m = NULL;
> -             dout("get_reply unknown tid %llu from osd%d\n", tid,
> -                  osd->o_osd);
> +             *skip = 1;
>               goto out;
>       }
>  
> @@ -2849,10 +2850,9 @@ static struct ceph_msg *get_reply(struct 
> ceph_connection *con,
>       ceph_msg_revoke_incoming(req->r_reply);
>  
>       if (front_len > req->r_reply->front_alloc_len) {
> -             pr_warn("get_reply front %d > preallocated %d (%u#%llu)\n",
> -                     front_len, req->r_reply->front_alloc_len,
> -                     (unsigned int)con->peer_name.type,
> -                     le64_to_cpu(con->peer_name.num));
> +             pr_warn("%s osd%d tid %llu front %d > preallocated %d\n",
> +                     __func__, osd->o_osd, req->r_tid, front_len,
> +                     req->r_reply->front_alloc_len);
>               m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS,
>                                false);
>               if (!m)
> @@ -2860,37 +2860,22 @@ static struct ceph_msg *get_reply(struct 
> ceph_connection *con,
>               ceph_msg_put(req->r_reply);
>               req->r_reply = m;
>       }
> -     m = ceph_msg_get(req->r_reply);
> -
> -     if (data_len > 0) {
> -             struct ceph_osd_data *osd_data;
>  

This was doing a special case test.  I'm not sure why it
was not done more generally before...

> -             /*
> -              * XXX This is assuming there is only one op containing
> -              * XXX page data.  Probably OK for reads, but this
> -              * XXX ought to be done more generally.
> -              */
> -             osd_data = osd_req_op_extent_osd_data(req, 0);
> -             if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
> -                     if (osd_data->pages &&
> -                             unlikely(osd_data->length < data_len)) {
> -
> -                             pr_warn("tid %lld reply has %d bytes we had 
> only %llu bytes ready\n",
> -                                     tid, data_len, osd_data->length);
> -                             *skip = 1;
> -                             ceph_msg_put(m);
> -                             m = NULL;
> -                             goto out;
> -                     }
> -             }
> +     if (data_len > req->r_reply->data_length) {
> +             pr_warn("%s osd%d tid %llu data %d > preallocated %zu, 
> skipping\n",
> +                     __func__, osd->o_osd, req->r_tid, data_len,
> +                     req->r_reply->data_length);
> +             m = NULL;
> +             *skip = 1;
> +             goto out;
>       }
> -     *skip = 0;
> +
> +     m = ceph_msg_get(req->r_reply);
>       dout("get_reply tid %lld %p\n", tid, m);
>  
>  out:
>       mutex_unlock(&osdc->request_mutex);
>       return m;
> -
>  }
>  
>  static struct ceph_msg *alloc_msg(struct ceph_connection *con,
> 

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to