On 15/07/2020 22:37, Kees Cook wrote:
> On Tue, Jul 14, 2020 at 08:16:36PM +0200, Mickaël Salaün wrote:
>> @@ -2849,7 +2855,7 @@ static int may_open(const struct path *path, int 
>> acc_mode, int flag)
>>      case S_IFLNK:
>>              return -ELOOP;
>>      case S_IFDIR:
>> -            if (acc_mode & (MAY_WRITE | MAY_EXEC))
>> +            if (acc_mode & (MAY_WRITE | MAY_EXEC | MAY_OPENEXEC))
>>                      return -EISDIR;
>>              break;
> 
> (I need to figure out where "open for reading" rejects S_IFDIR, since
> it's clearly not here...)
> 
>>      case S_IFBLK:
>> @@ -2859,13 +2865,26 @@ static int may_open(const struct path *path, int 
>> acc_mode, int flag)
>>              fallthrough;
>>      case S_IFIFO:
>>      case S_IFSOCK:
>> -            if (acc_mode & MAY_EXEC)
>> +            if (acc_mode & (MAY_EXEC | MAY_OPENEXEC))
>>                      return -EACCES;
>>              flag &= ~O_TRUNC;
>>              break;
> 
> This will immediately break a system that runs code with MAY_OPENEXEC
> set but reads from a block, char, fifo, or socket, even in the case of
> a sysadmin leaving the "file" sysctl disabled.

As documented, O_MAYEXEC is for regular files. The only legitimate use
case seems to be with pipes, which should probably be allowed when
enforcement is disabled.

> 
>>      case S_IFREG:
>> -            if ((acc_mode & MAY_EXEC) && path_noexec(path))
>> -                    return -EACCES;
>> +            if (path_noexec(path)) {
>> +                    if (acc_mode & MAY_EXEC)
>> +                            return -EACCES;
>> +                    if ((acc_mode & MAY_OPENEXEC) &&
>> +                                    (sysctl_open_mayexec_enforce & 
>> OPEN_MAYEXEC_ENFORCE_MOUNT))
>> +                            return -EACCES;
>> +            }
>> +            if ((acc_mode & MAY_OPENEXEC) &&
>> +                            (sysctl_open_mayexec_enforce & 
>> OPEN_MAYEXEC_ENFORCE_FILE))
>> +                    /*
>> +                     * Because acc_mode may change here, the next and only
>> +                     * use of acc_mode should then be by the following call
>> +                     * to inode_permission().
>> +                     */
>> +                    acc_mode |= MAY_EXEC;
>>              break;
>>      }
> 
> Likely very minor, but I'd like to avoid the path_noexec() call in the
> fast-path (it dereferences a couple pointers where as doing bit tests on
> acc_mode is fast).
> 
> Given that and the above observations, I think that may_open() likely
> needs to start with:
> 
>       if (acc_mode & MAY_OPENEXEC) {
>               /* Reject all file types when mount enforcement set. */
>               if ((sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_MOUNT) 
> &&
>                   path_noexec(path))
>                       return -EACCES;
>               /* Treat the same as MAY_EXEC. */
>               if (sysctl_open_mayexec_enforce & OPEN_MAYEXEC_ENFORCE_FILE))
>                       acc_mode |= MAY_EXEC;
>       }

OK

> 
> (Though I'm not 100% sure that path_noexec() is safe to be called for
> all file types: i.e. path->mnt and path->-mnt->mnt_sb *always* non-NULL?)

path->mnt should always be non-NULL:
https://lore.kernel.org/lkml/20200317164709.ga23...@zeniv.linux.org.uk/

> 
> This change would also imply that OPEN_MAYEXEC_ENFORCE_FILE *includes*
> OPEN_MAYEXEC_ENFORCE_MOUNT (i.e. the sysctl should not be a bitfield),
> since path_noexec() would get checked for S_ISREG. I can't come up with
> a rationale where one would want OPEN_MAYEXEC_ENFORCE_FILE but _not_
> OPEN_MAYEXEC_ENFORCE_MOUNT?

I don't see why it is an inclusion.

> 
> (I can absolutely see wanting only OPEN_MAYEXEC_ENFORCE_MOUNT, or
> suddenly one has to go mark every loaded thing with the exec bit and
> most distros haven't done this to, for example, shared libraries. But
> setting the exec bit and then NOT wanting to enforce the mount check
> seems... not sensible?)
> 
> Outside of this change, yes, I like this now -- it's much cleaner
> because we have all the checks in the same place where they belong. :)
> 
>> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
>> index db1ce7af2563..5008a2566e79 100644
>> --- a/kernel/sysctl.c
>> +++ b/kernel/sysctl.c
>> @@ -113,6 +113,7 @@ static int sixty = 60;
>>  
>>  static int __maybe_unused neg_one = -1;
>>  static int __maybe_unused two = 2;
>> +static int __maybe_unused three = 3;
>>  static int __maybe_unused four = 4;
>>  static unsigned long zero_ul;
>>  static unsigned long one_ul = 1;
> 
> Oh, are these still here? I thought they got removed (or at least made
> const). Where did that series go? Hmpf, see sysctl_vals, but yes, for
> now, this is fine.
> 
>> @@ -888,7 +889,6 @@ static int proc_taint(struct ctl_table *table, int write,
>>      return err;
>>  }
>>  
>> -#ifdef CONFIG_PRINTK
>>  static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
>>                              void *buffer, size_t *lenp, loff_t *ppos)
>>  {
>> @@ -897,7 +897,6 @@ static int proc_dointvec_minmax_sysadmin(struct 
>> ctl_table *table, int write,
>>  
>>      return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
>>  }
>> -#endif
>>  
>>  /**
>>   * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range 
>> checking structure
>> @@ -3264,6 +3263,15 @@ static struct ctl_table fs_table[] = {
>>              .extra1         = SYSCTL_ZERO,
>>              .extra2         = &two,
>>      },
>> +    {
>> +            .procname       = "open_mayexec_enforce",
>> +            .data           = &sysctl_open_mayexec_enforce,
>> +            .maxlen         = sizeof(int),
>> +            .mode           = 0600,
>> +            .proc_handler   = proc_dointvec_minmax_sysadmin,
>> +            .extra1         = SYSCTL_ZERO,
>> +            .extra2         = &three,
>> +    },
>>  #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
>>      {
>>              .procname       = "binfmt_misc",
>> -- 
>> 2.27.0
>>
> 

Reply via email to