I don't feel comfortable with posting a version that we know
can crash a kernel, and even less so if we say that's it's broken...
So I prefer it inside.

Oren

Serge E. Hallyn wrote:
> Quoting Oren Laadan ([email protected]):
>> To ensure that a module does not unload in the middle of a checkpoint
>> or a restart operation, pin (module_get) all the modules during either
>> operation.  For that, add a new memeber ->owner in ckpt_obj_ops.
>>
>> Also add a counter that keeps track of how many module_gets we have
>> going on, to properly handle new modules that register when an
>> operation is underway.
>>
>> This is a proof of concept, applies on top of the patch that introduces
>> objhash on rc7 (patch 33).
>>
>> Todo:
>>
>> - I put the initialization part in init_*_ctx(), but it could be moved
>> to ckpt_ctx_alloc() which is common to both checkpoint and restart.
>>
>> - If this is ok, then additional patches should adjust those modules
>> that indeed register...
>>
>> Signed-off-by: Oren Laadan <[email protected]>
> 
> Yes this looks good to me
> 
> Acked-by: Serge Hallyn <[email protected]>
> 
> Were you going to stick this into v21 too, or queue that up as
> first patch after the patchbomb?
> 
> thanks,
> -serge
> 
>> ---
>> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
>> index 2f5af3c..4ed8a8c 100644
>> --- a/include/linux/checkpoint.h
>> +++ b/include/linux/checkpoint.h
>> @@ -40,11 +40,13 @@ extern long do_sys_restart(pid_t pid, int fd,
>>  #define CKPT_CTX_RESTART_BIT                1
>>  #define CKPT_CTX_SUCCESS_BIT                2
>>  #define CKPT_CTX_ERROR_BIT          3
>> +#define CKPT_CTX_PINNED_BIT         4
>>
>>  #define CKPT_CTX_CHECKPOINT (1 << CKPT_CTX_CHECKPOINT_BIT)
>>  #define CKPT_CTX_RESTART    (1 << CKPT_CTX_RESTART_BIT)
>>  #define CKPT_CTX_SUCCESS    (1 << CKPT_CTX_SUCCESS_BIT)
>>  #define CKPT_CTX_ERROR              (1 << CKPT_CTX_ERROR_BIT)
>> +#define CKPT_CTX_PINNED             (1 << CKPT_CTX_PINNED_BIT)
>>
>>  /* ckpt_ctx: uflags */
>>  #define CHECKPOINT_USER_FLAGS               CHECKPOINT_SUBTREE
>> @@ -107,6 +109,9 @@ static inline int ckpt_get_error(struct ckpt_ctx *ctx)
>>  extern void restore_notify_error(struct ckpt_ctx *ctx);
>>
>>  /* obj_hash */
>> +extern int ckpt_obj_module_get(void);
>> +extern void ckpt_obj_module_put(void);
>> +
>>  extern void ckpt_obj_hash_free(struct ckpt_ctx *ctx);
>>  extern int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx);
>>
>> @@ -284,6 +289,7 @@ extern void _ckpt_msg_complete(struct ckpt_ctx *ctx);
>>
>>  struct ckpt_obj_ops;
>>  extern int register_checkpoint_obj(const struct ckpt_obj_ops *ops);
>> +extern void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops);
>>
>>  #else /* CONFIG_CHEKCPOINT */
>>
>> @@ -293,6 +299,10 @@ static inline int register_checkpoint_obj(const struct 
>> ckpt_obj_ops *ops)
>>      return 0;
>>  }
>>
>> +static inline void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops)
>> +{
>> +}
>> +
>>  #endif /* CONFIG_CHECKPOINT */
>>  #endif /* __KERNEL__ */
>>
>> diff --git a/include/linux/checkpoint_types.h 
>> b/include/linux/checkpoint_types.h
>> index 912e06a..8035556 100644
>> --- a/include/linux/checkpoint_types.h
>> +++ b/include/linux/checkpoint_types.h
>> @@ -79,6 +79,7 @@ struct ckpt_obj_ops {
>>      int (*ref_grab)(void *ptr);
>>      int (*checkpoint)(struct ckpt_ctx *ctx, void *ptr);
>>      void *(*restore)(struct ckpt_ctx *ctx);
>> +    struct module *owner;
>>  };
>>
>>
>> diff --git a/kernel/checkpoint/checkpoint.c b/kernel/checkpoint/checkpoint.c
>> index f451f8f..1a08bfb 100644
>> --- a/kernel/checkpoint/checkpoint.c
>> +++ b/kernel/checkpoint/checkpoint.c
>> @@ -473,6 +473,7 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, 
>> pid_t pid)
>>  {
>>      struct task_struct *task;
>>      struct nsproxy *nsproxy;
>> +    int ret;
>>
>>      /*
>>       * No need for explicit cleanup here, because if an error
>> @@ -514,6 +515,12 @@ static int init_checkpoint_ctx(struct ckpt_ctx *ctx, 
>> pid_t pid)
>>              return -EINVAL;  /* cleanup by ckpt_ctx_free() */
>>      }
>>
>> +    /* pin down modules - cleanup by ckpt_ctx_free() */
>> +    ret = ckpt_obj_module_get();
>> +    if (ret < 0)
>> +            return ret;
>> +    ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED);
>> +
>>      return 0;
>>  }
>>
>> diff --git a/kernel/checkpoint/objhash.c b/kernel/checkpoint/objhash.c
>> index 1ee06d0..db795e3 100644
>> --- a/kernel/checkpoint/objhash.c
>> +++ b/kernel/checkpoint/objhash.c
>> @@ -45,17 +45,80 @@ static const struct ckpt_obj_ops 
>> *ckpt_obj_ops[CKPT_OBJ_MAX] = {
>>      [CKPT_OBJ_IGNORE] = &ckpt_obj_ignored_ops,
>>  };
>>
>> +static int ckpt_obj_pinned_count;
>> +static DEFINE_SPINLOCK(ckpt_obj_pinned_lock);
>> +
>>  int register_checkpoint_obj(const struct ckpt_obj_ops *ops)
>>  {
>> +    int ret = -EINVAL;
>> +    int i;
>> +
>>      if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX)
>>              return -EINVAL;
>> -    if (ckpt_obj_ops[ops->obj_type] != NULL)
>> -            return -EINVAL;
>> -    ckpt_obj_ops[ops->obj_type] = ops;
>> -    return 0;
>> +    spin_lock(&ckpt_obj_pinned_lock);
>> +    if (ckpt_obj_ops[ops->obj_type] == NULL) {
>> +            if (ops->owner) {
>> +                    for (i = 0; i < ckpt_obj_pinned_count)
>> +                            __module_get(owner->module);
>> +            }
>> +            ckpt_obj_ops[ops->obj_type] = ops;
>> +            ret = 0;
>> +    }
>> +    spin_unlock(&ckpt_obj_pinned_lock);
>> +    return ret;
>>  }
>>  EXPORT_SYMBOL(register_checkpoint_obj);
>>
>> +void unregister_checkpoint_obj(const struct ckpt_obj_ops *ops)
>> +{
>> +    if (ops->obj_type < 0 || ops->obj_type >= CKPT_OBJ_MAX)
>> +            return;
>> +    spin_lock(&ckpt_obj_pinned_lock);
>> +    if (ckpt_obj_ops[ops->obj_type] == ops)
>> +            ckpt_obj_ops[ops->obj_type] = NULL;
>> +    spin_unlock(&ckpt_obj_pinned_lock);
>> +    return ret;
>> +}
>> +EXPORT_SYMBOL(unregister_checkpoint_obj);
>> +
>> +static void _ckpt_obj_module_put(int last)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < last; i++) {
>> +            if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner)
>> +                    continue;
>> +            module_put(ckpt_obj_ops[i]->owner);
>> +    }
>> +}
>> +void ckpt_obj_module_put(void)
>> +{
>> +    spin_lock(&ckpt_obj_pinned_lock);
>> +    _ckpt_obj_module_put(CKPT_OBJ_MAX);
>> +    ckpt_obj_pinned_count--;
>> +    spin_unlock(&ckpt_obj_pinned_lock);
>> +}
>> +
>> +int ckpt_obj_module_get(void)
>> +{
>> +    int i, ret = 0;
>> +
>> +    spin_lock(&ckpt_obj_pinned_lock);
>> +    for (i = 0; i < CKPT_OBJ_MAX; i++) {
>> +            if (!ckpt_obj_ops[i] || !ckpt_obj_ops[i]->owner)
>> +                    continue;
>> +            ret = try_module_get(ckpt_obj_ops[i]->owner);
>> +            if (ret < 0)
>> +                    break;
>> +    }
>> +    if (ret < 0)
>> +            _ckpt_obj_module_put(i);
>> +    else
>> +            ckpt_obj_pinned_count++;
>> +    spin_unlock(&ckpt_obj_pinned_lock);
>> +    return ret;
>> +}
>> +
>>  #define CKPT_OBJ_HASH_NBITS  10
>>  #define CKPT_OBJ_HASH_TOTAL  (1UL << CKPT_OBJ_HASH_NBITS)
>>
>> diff --git a/kernel/checkpoint/restart.c b/kernel/checkpoint/restart.c
>> index 437de4f..582e1f1 100644
>> --- a/kernel/checkpoint/restart.c
>> +++ b/kernel/checkpoint/restart.c
>> @@ -1084,6 +1084,12 @@ static int init_restart_ctx(struct ckpt_ctx *ctx, 
>> pid_t pid)
>>
>>      ctx->active_pid = -1;   /* see restore_activate_next, get_active_pid */
>>
>> +    /* pin down modules - cleanup by ckpt_ctx_free() */
>> +    ret = ckpt_obj_module_get();
>> +    if (ret < 0)
>> +            return ret;
>> +    ckpt_ctx_set_kflag(ctx, CKPT_CTX_PINNED);
>> +
>>      return 0;
>>  }
>>
>> diff --git a/kernel/checkpoint/sys.c b/kernel/checkpoint/sys.c
>> index 5e84915..e1a1f96 100644
>> --- a/kernel/checkpoint/sys.c
>> +++ b/kernel/checkpoint/sys.c
>> @@ -217,6 +217,10 @@ static void ckpt_ctx_free(struct ckpt_ctx *ctx)
>>
>>      kfree(ctx->pids_arr);
>>
>> +    /* un-pin modules */
>> +    if (ctx->kflags & CKPT_CTX_PINNED)
>> +            ckpt_obj_module_put();
>> +
>>      kfree(ctx);
>>  }
> 
_______________________________________________
Containers mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel

Reply via email to