Re: [V9fs-developer] [PATCH kernel] 9p/trans_fd: Check file mode at opening

2020-07-28 Thread Greg Kurz
Hi Alexey,

Working on 9p now ?!? ;-)

Cc'ing Dominique Martinet who appears to be the person who takes care of 9p
these days.

On Tue, 28 Jul 2020 22:41:29 +1000
Alexey Kardashevskiy  wrote:

> The "fd" transport layer uses 2 file descriptors passed externally
> and calls kernel_write()/kernel_read() on these. If files were opened
> without FMODE_WRITE/FMODE_READ, WARN_ON_ONCE() will fire.
> 
> This adds file mode checking in p9_fd_open; this returns -EBADF to
> preserve the original behavior.
> 

So this would cause open() to fail with EBADF, which might look a bit
weird to userspace since it didn't pass an fd... Is this to have a
different error than -EIO that is returned when either rfd or wfd
doesn't point to an open file descriptor ? If yes, why do we care ?

> Found by syzkaller.
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
>  net/9p/trans_fd.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
> index 13cd683a658a..62cdfbd01f0a 100644
> --- a/net/9p/trans_fd.c
> +++ b/net/9p/trans_fd.c
> @@ -797,6 +797,7 @@ static int parse_opts(char *params, struct p9_fd_opts 
> *opts)
>  
>  static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
>  {
> + bool perm;
>   struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd),
>  GFP_KERNEL);
>   if (!ts)
> @@ -804,12 +805,16 @@ static int p9_fd_open(struct p9_client *client, int 
> rfd, int wfd)
>  
>   ts->rd = fget(rfd);
>   ts->wr = fget(wfd);
> - if (!ts->rd || !ts->wr) {
> + perm = ts->rd && (ts->rd->f_mode & FMODE_READ) &&
> +ts->wr && (ts->wr->f_mode & FMODE_WRITE);
> + if (!ts->rd || !ts->wr || !perm) {
>   if (ts->rd)
>   fput(ts->rd);
>   if (ts->wr)
>   fput(ts->wr);
>   kfree(ts);
> + if (!perm)
> + return -EBADF;
>   return -EIO;
>   }
>  



Re: [RFC PATCH 3/5] mm: introduce VM_EXEC_KEEP

2020-07-28 Thread Anthony Yznaga



On 7/28/20 6:38 AM, ebied...@xmission.com wrote:
> Anthony Yznaga  writes:
>
>> A vma with the VM_EXEC_KEEP flag is preserved across exec.  For anonymous
>> vmas only.  For safety, overlap with fixed address VMAs created in the new
>> mm during exec (e.g. the stack and elf load segments) is not permitted and
>> will cause the exec to fail.
>> (We are studying how to guarantee there are no conflicts. Comments welcome.)
>>
>> diff --git a/fs/exec.c b/fs/exec.c
>> index 262112e5f9f8..1de09c4eef00 100644
>> --- a/fs/exec.c
>> +++ b/fs/exec.c
>> @@ -1069,6 +1069,20 @@ ssize_t read_code(struct file *file, unsigned long 
>> addr, loff_t pos, size_t len)
>>  EXPORT_SYMBOL(read_code);
>>  #endif
>>  
>> +static int vma_dup_some(struct mm_struct *old_mm, struct mm_struct *new_mm)
>> +{
>> +struct vm_area_struct *vma;
>> +int ret;
>> +
>> +for (vma = old_mm->mmap; vma; vma = vma->vm_next)
>> +if (vma->vm_flags & VM_EXEC_KEEP) {
>> +ret = vma_dup(vma, new_mm);
>> +if (ret)
>> +return ret;
>> +}
>> +return 0;
>> +}
>> +
>>  /*
>>   * Maps the mm_struct mm into the current task struct.
>>   * On success, this function returns with the mutex
>> @@ -1104,6 +1118,12 @@ static int exec_mmap(struct mm_struct *mm)
>>  mutex_unlock(>signal->exec_update_mutex);
>>  return -EINTR;
>>  }
>> +ret = vma_dup_some(old_mm, mm);
> ^^
>
> Ouch! An unconditional loop through all of the vmas of the execing
> process, just in case there is a VM_EXEC_KEEP vma.
>
> I know we already walk the list in exit_mmap, but I get the feeling this
> will slow exec down when this feature is not enabled, especially when
> a process with a lot of vmas is calling exec.
Patch 4 changes this to only call vma_dup_some() if the new
binary has opted in to accepting preserved memory.

Anthony
>
> 
>> +if (ret) {
>> +mmap_read_unlock(old_mm);
>> +mutex_unlock(>signal->exec_update_mutex);
>> +return ret;
>> +}
>>  }
>>  
>>  task_lock(tsk);



Re: [PATCH v3 4/4] xen: add helpers to allocate unpopulated memory

2020-07-28 Thread Roger Pau Monné
On Tue, Jul 28, 2020 at 06:12:46PM +0100, Julien Grall wrote:
> Hi Roger,
> 
> On 28/07/2020 17:59, Roger Pau Monné wrote:
> > On Tue, Jul 28, 2020 at 05:48:23PM +0100, Julien Grall wrote:
> > > Hi,
> > > 
> > > On 27/07/2020 10:13, Roger Pau Monne wrote:
> > > > To be used in order to create foreign mappings. This is based on the
> > > > ZONE_DEVICE facility which is used by persistent memory devices in
> > > > order to create struct pages and kernel virtual mappings for the IOMEM
> > > > areas of such devices. Note that on kernels without support for
> > > > ZONE_DEVICE Xen will fallback to use ballooned pages in order to
> > > > create foreign mappings.
> > > > 
> > > > The newly added helpers use the same parameters as the existing
> > > > {alloc/free}_xenballooned_pages functions, which allows for in-place
> > > > replacement of the callers. Once a memory region has been added to be
> > > > used as scratch mapping space it will no longer be released, and pages
> > > > returned are kept in a linked list. This allows to have a buffer of
> > > > pages and prevents resorting to frequent additions and removals of
> > > > regions.
> > > > 
> > > > If enabled (because ZONE_DEVICE is supported) the usage of the new
> > > > functionality untangles Xen balloon and RAM hotplug from the usage of
> > > > unpopulated physical memory ranges to map foreign pages, which is the
> > > > correct thing to do in order to avoid mappings of foreign pages depend
> > > > on memory hotplug.
> > > I think this is going to break Dom0 on Arm if the kernel has been built 
> > > with
> > > hotplug. This is because you may end up to re-use region that will be used
> > > for the 1:1 mapping of a foreign map.
> > > 
> > > Note that I don't know whether hotplug has been tested on Xen on Arm yet. 
> > > So
> > > it might be possible to be already broken.
> > > 
> > > Meanwhile, my suggestion would be to make the use of hotplug in the 
> > > balloon
> > > code conditional (maybe using CONFIG_ARM64 and CONFIG_ARM)?
> > 
> > Right, this feature (allocation of unpopulated memory separated from
> > the balloon driver) is currently gated on CONFIG_ZONE_DEVICE, which I
> > think could be used on Arm.
> > 
> > IMO the right solution seems to be to subtract the physical memory
> > regions that can be used for the identity mappings of foreign pages
> > (all RAM on the system AFAICT) from iomem_resource, as that would make
> > this and the memory hotplug done in the balloon driver safe?
> 
> Dom0 doesn't know the regions used for the identity mappings as this is only
> managed by Xen. So there is nothing you can really do here.

OK, I will add the guards to prevent this being built on Arm.

> But don't you have the same issue on x86 with "magic pages"?

Those are marked as reserved on the memory map, and hence I would
expect them to never end up in iomem_resource.

Thanks, Roger.


Re: Re: [PATCH v18 06/14] mm/damon: Implement callbacks for the virtual memory address spaces

2020-07-28 Thread Shakeel Butt
On Mon, Jul 27, 2020 at 2:03 AM SeongJae Park  wrote:
>
> On Mon, 27 Jul 2020 00:34:54 -0700 Greg Thelen  wrote:
>
> > SeongJae Park  wrote:
> >
> > > From: SeongJae Park 
> > >
> > > This commit introduces a reference implementation of the address space
> > > specific low level primitives for the virtual address space, so that
> > > users of DAMON can easily monitor the data accesses on virtual address
> > > spaces of specific processes by simply configuring the implementation to
> > > be used by DAMON.
> [...]
> > > diff --git a/mm/damon.c b/mm/damon.c
> > > index b844924b9fdb..386780739007 100644
> > > --- a/mm/damon.c
> > > +++ b/mm/damon.c
> > > @@ -9,6 +9,9 @@
> [...]
> > > +/*
> > > + * Functions for the access checking of the regions
> > > + */
> > > +
> > > +static void damon_mkold(struct mm_struct *mm, unsigned long addr)
> > > +{
> > > +   pte_t *pte = NULL;
> > > +   pmd_t *pmd = NULL;
> > > +   spinlock_t *ptl;
> > > +
> > > +   if (follow_pte_pmd(mm, addr, NULL, , , ))
> > > +   return;
> > > +
> > > +   if (pte) {
> > > +   if (pte_young(*pte)) {
> > > +   clear_page_idle(pte_page(*pte));
> > > +   set_page_young(pte_page(*pte));
> >
> > While this compiles without support for PG_young and PG_idle, I assume
> > it won't work well because it'd clear pte.young without setting
> > PG_young.  And this would mess with vmscan.
>
> You're right, thanks for catching this up!  This definitely need to be fixed 
> in
> the next spin.
>
> >
> > So this code appears to depend on PG_young and PG_idle, which are
> > currently only available via CONFIG_IDLE_PAGE_TRACKING.  DAMON could
> > depend on CONFIG_IDLE_PAGE_TRACKING via Kconfig.  But I assume that
> > CONFIG_IDLE_PAGE_TRACKING and CONFIG_DAMON cannot be concurrently used
> > because they'll stomp on each other's use of pte.young, PG_young,
> > PG_idle.
> > So I suspect we want:
> > 1. CONFIG_DAMON to depend on !CONFIG_IDLE_PAGE_TRACKING and vise-versa.
> > 2. PG_young,PG_idle and related helpers to depend on
> >CONFIG_DAMON||CONFIG_IDLE_PAGE_TRACKING.
>
> Awesome insights and suggestions, thanks!
>
> I would like to note that DAMON could be interfered by IDLE_PAGE_TRACKING and
> vmscan, but not vice versa, as DAMON respects PG_idle and PG_young.  This
> design came from the weak goal of DAMON.  DAMON aims to provide not perfect
> monitoring but only best effort accuracy that would be sufficient for
> performance-centric DRAM level memory management.  So, at that time, I thought
> being interfered by IDLE_PAGE_TRACKING and the reclaim logic would not be a
> real problem but letting IDLE_PAGE_TRACKING coexist is somehow beneficial.
> That said, I couldn't find a real benefit of the coexistance yet, and the
> problem of being interference now seems bigger as we will support more cases
> including the page granularity.
>
> Maybe we could make IDLE_PAGE_TRACKING and DAMON coexist but mutual exclusive
> in runtime, if the beneficial of coexistance turns out big.  However, I would
> like to make it simple first and optimize the case later if real requirement
> found.

If you are planning to have support for tracking at page granularity
and physical memory monitoring in DAMON then I don't see any benefit
of coexistence of DAMON with IDLE_PAGE_TRACKING. Though I will not
push you to go that route if the code with coexistence is simple
enough.


Re: [PATCH v3 4/4] xen: add helpers to allocate unpopulated memory

2020-07-28 Thread Roger Pau Monné
On Tue, Jul 28, 2020 at 06:06:25PM +0100, Andrew Cooper wrote:
> On 28/07/2020 17:59, Roger Pau Monné wrote:
> > On Tue, Jul 28, 2020 at 05:48:23PM +0100, Julien Grall wrote:
> >> Hi,
> >>
> >> On 27/07/2020 10:13, Roger Pau Monne wrote:
> >>> To be used in order to create foreign mappings. This is based on the
> >>> ZONE_DEVICE facility which is used by persistent memory devices in
> >>> order to create struct pages and kernel virtual mappings for the IOMEM
> >>> areas of such devices. Note that on kernels without support for
> >>> ZONE_DEVICE Xen will fallback to use ballooned pages in order to
> >>> create foreign mappings.
> >>>
> >>> The newly added helpers use the same parameters as the existing
> >>> {alloc/free}_xenballooned_pages functions, which allows for in-place
> >>> replacement of the callers. Once a memory region has been added to be
> >>> used as scratch mapping space it will no longer be released, and pages
> >>> returned are kept in a linked list. This allows to have a buffer of
> >>> pages and prevents resorting to frequent additions and removals of
> >>> regions.
> >>>
> >>> If enabled (because ZONE_DEVICE is supported) the usage of the new
> >>> functionality untangles Xen balloon and RAM hotplug from the usage of
> >>> unpopulated physical memory ranges to map foreign pages, which is the
> >>> correct thing to do in order to avoid mappings of foreign pages depend
> >>> on memory hotplug.
> >> I think this is going to break Dom0 on Arm if the kernel has been built 
> >> with
> >> hotplug. This is because you may end up to re-use region that will be used
> >> for the 1:1 mapping of a foreign map.
> >>
> >> Note that I don't know whether hotplug has been tested on Xen on Arm yet. 
> >> So
> >> it might be possible to be already broken.
> >>
> >> Meanwhile, my suggestion would be to make the use of hotplug in the balloon
> >> code conditional (maybe using CONFIG_ARM64 and CONFIG_ARM)?
> > Right, this feature (allocation of unpopulated memory separated from
> > the balloon driver) is currently gated on CONFIG_ZONE_DEVICE, which I
> > think could be used on Arm.
> >
> > IMO the right solution seems to be to subtract the physical memory
> > regions that can be used for the identity mappings of foreign pages
> > (all RAM on the system AFAICT) from iomem_resource, as that would make
> > this and the memory hotplug done in the balloon driver safe?
> 
> The right solution is a mechanism for translated guests to query Xen to
> find regions of guest physical address space which are unused, and can
> be safely be used for foreign/grant/other  mappings.
> 
> Please don't waste any more time applying more duct tape to a broken
> system, and instead spend the time organising some proper foundations.

The piece added here (using ZONE_DEVICE) will be relevant when Xen can
provide the space to map foreign pages, it's just that right now it
relies on iomem_resource instead of a Xen specific resource map that
should be provided by the hypervisor. It should indeed be fixed, but
right now this patch should allow a PVH dom0 to work slightly better.
When Xen provides such areas Linux just needs to populate a custom Xen
resource with them and use it instead of iomem_resurce.

The Arm stuff I'm certainly not familiar with, and can't provide much
insight on that. If it's best to just disable it and continue to rely
on ballooned out pages that's fine.

Roger.


Re: [RFC PATCH 5/9] PCI/AER: Apply function level reset to RCiEP on fatal error

2020-07-28 Thread Sean V Kelley

On 28 Jul 2020, at 10:02, Jonathan Cameron wrote:


On Tue, 28 Jul 2020 09:14:11 -0700
Sean V Kelley  wrote:


On 28 Jul 2020, at 6:27, Zhuo, Qiuxu wrote:


From: Jonathan Cameron 
Sent: Monday, July 27, 2020 7:17 PM
To: Kelley, Sean V 
Cc: bhelg...@google.com; r...@rjwysocki.net; ashok@kernel.org;
Luck,
Tony ;
sathyanarayanan.kuppusw...@linux.intel.com;
linux-...@vger.kernel.org;
linux-kernel@vger.kernel.org; Zhuo, Qiuxu 
Subject: Re: [RFC PATCH 5/9] PCI/AER: Apply function level reset to
RCiEP
on fatal error

On Fri, 24 Jul 2020 10:22:19 -0700
Sean V Kelley  wrote:


From: Qiuxu Zhuo 

Attempt to do function level reset for an RCiEP associated with an
RCEC device on fatal error.


I'd like to understand more on your reasoning for flr here.
Is it simply that it is all we can do, or is there some basis in a
spec
somewhere?



Yes. Though there isn't the link reset for the RCiEP here, I think 
we
should still be able to reset the RCiEP via FLR on fatal error, if 
the

RCiEP supports FLR.

-Qiuxu



Also see PCIe 5.0-1, Sec. 6.6.2 Function Level Reset (FLR)

Implementation of FLR is optional (not required), but is strongly
recommended. For an example use case consider CXL. Function 0 DVSEC
instances control for the CXL functionality of the entire CXL device.
FLR may succeed in recovering from CXL.io domain errors.


That feels a little bit of a weak argument in favour.  PCI spec lists 
examples
of use only for FLR and I can't see this matching any of them, but 
then they
are only examples, so we could argue it doesn't exclude this use. It's 
not
allowed to affect the link state, but I guess it 'might' recover from 
some

other type of error?

I'd have read the statement in the CXL spec you are referring to as 
matching

with the first example in the PCIe spec which is about recovering from
software errors.  For example, unexpected VM tear down.


From my perspective, it can add value as the point is to address device 
functions and their associated software states. As the section in the 
spec goes on to state:


“The FLR mechanism enables software to quiesce and reset Endpoint 
hardware with Function-level granularity. Three example usage models 
illustrate the benefits of this feature:…”


Later changes in CXL 2.0 Section 9.8 (as of 0.9 draft) further look to 
extend FLR with an eFLR or now referred to as CXL Reset.


“All Functions in a CXL 2.0 (Single Logical Device) SLD that 
participate in CXL.cache or CXL.mem are required to support either FLR 
or CXL Reset. MLDs (Multiple Logical Devices), on the other hand, are 
required to support CXL Reset.”


In my mind the question is whether this change is too limited in scope 
with this patch series (RCiEP) and whether FLR should be considered in a 
broader, i.e., EP, as a ‘hammer’ so to speak.


Thanks,

Sean



@Bjorn / All.  What's your view on using FLR as a reset to do when you 
don't

have any other hammers to use?

Personally I don't have a particular problem with this, it just 
doesn't fit
with my mental model of what FLR is for (which may well need adjusting 
:)


Jonathan




Thanks,

Sean



Signed-off-by: Qiuxu Zhuo 
---
 drivers/pci/pcie/err.c | 31 ++-
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index
044df004f20b..9b3ec94bdf1d 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -170,6 +170,17 @@ static void pci_walk_dev_affected(struct

pci_dev *dev, int (*cb)(struct pci_dev

 }
 }

+static enum pci_channel_state flr_on_rciep(struct pci_dev *dev) {
+if (!pcie_has_flr(dev))
+return PCI_ERS_RESULT_NONE;
+
+if (pcie_flr(dev))
+return PCI_ERS_RESULT_DISCONNECT;
+
+return PCI_ERS_RESULT_RECOVERED;
+}
+
 pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 enum pci_channel_state state,
 pci_ers_result_t (*reset_link)(struct pci_dev *pdev))

@@ -191,15

+202,17 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
 if (state == pci_channel_io_frozen) {
 pci_walk_dev_affected(dev, report_frozen_detected,

);

 if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) {
-pci_warn(dev, "link reset not possible for RCiEP\n");
-status = PCI_ERS_RESULT_NONE;
-goto failed;
-}
-
-status = reset_link(dev);
-if (status != PCI_ERS_RESULT_RECOVERED) {
-pci_warn(dev, "link reset failed\n");
-goto failed;
+status = flr_on_rciep(dev);
+if (status != PCI_ERS_RESULT_RECOVERED) {
+pci_warn(dev, "function level reset failed\n");
+goto failed;
+}
+} else {
+status = reset_link(dev);
+if (status != PCI_ERS_RESULT_RECOVERED) {
+pci_warn(dev, "link reset failed\n");
+goto failed;
+}
 }
 } else {
 pci_walk_dev_affected(dev, report_normal_detected,

);



Re: [PATCH] media: usbvision: fixed coding style

2020-07-28 Thread kernel test robot
Hi Dhiraj,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linuxtv-media/master]
[also build test ERROR on staging/staging-testing soc/for-next v5.8-rc7 
next-20200728]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Dhiraj-Sharma/media-usbvision-fixed-coding-style/20200728-223404
base:   git://linuxtv.org/media_tree.git master
config: x86_64-allyesconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-14) 9.3.0
reproduce (this is a W=1 build):
# save the attached .config to linux build tree
make W=1 ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

>> drivers/staging/media/usbvision/usbvision-video.c:157:56: error: macro 
>> "DEVICE_ATTR_RO" passed 4 arguments, but takes just 1
 157 | static DEVICE_ATTR_RO(version, 0444, version_show, NULL);
 |^
   In file included from include/linux/acpi.h:15,
from include/linux/i2c.h:13,
from drivers/staging/media/usbvision/usbvision-video.c:43:
   include/linux/device.h:131: note: macro "DEVICE_ATTR_RO" defined here
 131 | #define DEVICE_ATTR_RO(_name) \
 | 
>> drivers/staging/media/usbvision/usbvision-video.c:157:8: error: type 
>> defaults to 'int' in declaration of 'DEVICE_ATTR_RO' [-Werror=implicit-int]
 157 | static DEVICE_ATTR_RO(version, 0444, version_show, NULL);
 |^~
   drivers/staging/media/usbvision/usbvision-video.c:168:52: error: macro 
"DEVICE_ATTR_RO" passed 4 arguments, but takes just 1
 168 | static DEVICE_ATTR_RO(model, 0444, model_show, NULL);
 |^
   In file included from include/linux/acpi.h:15,
from include/linux/i2c.h:13,
from drivers/staging/media/usbvision/usbvision-video.c:43:
   include/linux/device.h:131: note: macro "DEVICE_ATTR_RO" defined here
 131 | #define DEVICE_ATTR_RO(_name) \
 | 
   drivers/staging/media/usbvision/usbvision-video.c:168:8: error: type 
defaults to 'int' in declaration of 'DEVICE_ATTR_RO' [-Werror=implicit-int]
 168 | static DEVICE_ATTR_RO(model, 0444, model_show, NULL);
 |^~
   drivers/staging/media/usbvision/usbvision-video.c:180:48: error: macro 
"DEVICE_ATTR_RO" passed 4 arguments, but takes just 1
 180 | static DEVICE_ATTR_RO(hue, 0444, hue_show, NULL);
 |^
   In file included from include/linux/acpi.h:15,
from include/linux/i2c.h:13,
from drivers/staging/media/usbvision/usbvision-video.c:43:
   include/linux/device.h:131: note: macro "DEVICE_ATTR_RO" defined here
 131 | #define DEVICE_ATTR_RO(_name) \
 | 
   drivers/staging/media/usbvision/usbvision-video.c:180:8: error: type 
defaults to 'int' in declaration of 'DEVICE_ATTR_RO' [-Werror=implicit-int]
 180 | static DEVICE_ATTR_RO(hue, 0444, hue_show, NULL);
 |^~
   drivers/staging/media/usbvision/usbvision-video.c:192:58: error: macro 
"DEVICE_ATTR_RO" passed 4 arguments, but takes just 1
 192 | static DEVICE_ATTR_RO(contrast, 0444, contrast_show, NULL);
 |  ^
   In file included from include/linux/acpi.h:15,
from include/linux/i2c.h:13,
from drivers/staging/media/usbvision/usbvision-video.c:43:
   include/linux/device.h:131: note: macro "DEVICE_ATTR_RO" defined here
 131 | #define DEVICE_ATTR_RO(_name) \
 | 
   drivers/staging/media/usbvision/usbvision-video.c:192:8: error: type 
defaults to 'int' in declaration of 'DEVICE_ATTR_RO' [-Werror=implicit-int]
 192 | static DEVICE_ATTR_RO(contrast, 0444, contrast_show, NULL);
 |^~
   drivers/staging/media/usbvision/usbvision-video.c:204:62: error: macro 
"DEVICE_ATTR_RO" passed 4 arguments, but takes just 1
 204 | static DEVICE_ATTR_RO(brightness, 0444, brightness_show, NULL);
 |  ^
   In file included from include/linux/acpi.h:15,
from include/linux/i2c.h:13,
from drivers/staging/media/usbvision/usbvision-video.c:43:
   include/linux/device.h:131: note: macro "DEVICE_ATTR_RO" defined here
 131 | #define DEVICE_ATTR_RO(_name) \
 | 
   drivers/staging/media/usbvision/usbvision-video.c:204:8: error: type 
defaults to 'int' 

Re: [PATCH v1 0/4] [RFC] Implement Trampoline File Descriptor

2020-07-28 Thread Andy Lutomirski
> On Jul 28, 2020, at 6:11 AM, madve...@linux.microsoft.com wrote:
>
> From: "Madhavan T. Venkataraman" 
>

> The kernel creates the trampoline mapping without any permissions. When
> the trampoline is executed by user code, a page fault happens and the
> kernel gets control. The kernel recognizes that this is a trampoline
> invocation. It sets up the user registers based on the specified
> register context, and/or pushes values on the user stack based on the
> specified stack context, and sets the user PC to the requested target
> PC. When the kernel returns, execution continues at the target PC.
> So, the kernel does the work of the trampoline on behalf of the
> application.

This is quite clever, but now I’m wondering just how much kernel help
is really needed. In your series, the trampoline is an non-executable
page.  I can think of at least two alternative approaches, and I'd
like to know the pros and cons.

1. Entirely userspace: a return trampoline would be something like:

1:
pushq %rax
pushq %rbc
pushq %rcx
...
pushq %r15
movq %rsp, %rdi # pointer to saved regs
leaq 1b(%rip), %rsi # pointer to the trampoline itself
callq trampoline_handler # see below

You would fill a page with a bunch of these, possibly compacted to get
more per page, and then you would remap as many copies as needed.  The
'callq trampoline_handler' part would need to be a bit clever to make
it continue to work despite this remapping.  This will be *much*
faster than trampfd. How much of your use case would it cover?  For
the inverse, it's not too hard to write a bit of asm to set all
registers and jump somewhere.

2. Use existing kernel functionality.  Raise a signal, modify the
state, and return from the signal.  This is very flexible and may not
be all that much slower than trampfd.

3. Use a syscall.  Instead of having the kernel handle page faults,
have the trampoline code push the syscall nr register, load a special
new syscall nr into the syscall nr register, and do a syscall. On
x86_64, this would be:

pushq %rax
movq __NR_magic_trampoline, %rax
syscall

with some adjustment if the stack slot you're clobbering is important.


Also, will using trampfd cause issues with various unwinders?  I can
easily imagine unwinders expecting code to be readable, although this
is slowly going away for other reasons.

All this being said, I think that the kernel should absolutely add a
sensible interface for JITs to use to materialize their code.  This
would integrate sanely with LSMs and wouldn't require hacks like using
files, etc.  A cleverly designed JIT interface could function without
seriailization IPIs, and even lame architectures like x86 could
potentially avoid shootdown IPIs if the interface copied code instead
of playing virtual memory games.  At its very simplest, this could be:

void *jit_create_code(const void *source, size_t len);

and the result would be a new anonymous mapping that contains exactly
the code requested.  There could also be:

int jittfd_create(...);

that does something similar but creates a memfd.  A nicer
implementation for short JIT sequences would allow appending more code
to an existing JIT region.  On x86, an appendable JIT region would
start filled with 0xCC, and I bet there's a way to materialize new
code into a previously 0xcc-filled virtual page wthout any
synchronization.  One approach would be to start with:


0xcc
0xcc
...
0xcc

and to create a whole new page like:



0xcc
...
0xcc

so that the only difference is that some code changed to some more
code.  Then replace the PTE to swap from the old page to the new page,
and arrange to avoid freeing the old page until we're sure it's gone
from all TLBs.  This may not work if  spans a page
boundary.  The #BP fixup would zap the TLB and retry.  Even just
directly copying code over some 0xcc bytes almost works, but there's a
nasty corner case involving instructions that fetch I$ fetch
boundaries.  I'm not sure to what extent I$ snooping helps.

--Andy


Re: [PATCH] ASoC: Intel: Atom: use hardware counter to update hw_ptr

2020-07-28 Thread Pierre-Louis Bossart




On 7/28/20 12:02 PM, Lu, Brent wrote:


So if there are already quirks in atom machine drivers to change the period
size, why is this patch necessary?



The story is: google implemented the constraint but doesn't know why it works
so asked us to explain. After checking the two counters I realized the increase 
of
ring buffer pointer follows the period size setting in hw_param (256) but the
period of interrupt is always 5ms instead of 5.33 so it's running little bit 
too fast.
It seems the LPE keeps tracking the difference of two counters. When the
difference exceeds 2160 samples, the next interrupt will be canceled so the
hardware counter could catch up a little.

[   43.208299] intel_sst_acpi 808622A8:00: mrfld ring_buffer_counter 107520 
hardware_counter 98880 pcm delay 8640 (in bytes)
[   43.208306] intel_sst_acpi 808622A8:00: buffer ptr 26880 pcm_delay rep: 2160
[   43.208321] sound pcmC1D0p: [Q] pos 26880 hw_ptr 26880 appl_ptr 4 avail 
191680
=> one interrupt is skipped.
[   43.218299] intel_sst_acpi 808622A8:00: mrfld ring_buffer_counter 108544 
hardware_counter 100800 pcm delay 7744 (in bytes)
[   43.218307] intel_sst_acpi 808622A8:00: buffer ptr 27136 pcm_delay rep: 1936
[   43.218336] sound pcmC1D0p: [Q] pos 27136 hw_ptr 27136 appl_ptr 4 avail 
191936

So I think why not using the hardware counter? It increases 240 samples every 
5ms
perfectly match the 48000 sample rate. The test result is good but I know there 
must
be a reason for the original designer to use ring buffer counter instead of 
hardware
counter. I uploaded this patch to see if anyone still remember the reason and 
share
some insight with me.

I totally agree that we shouldn't touch this part of design. Do you think it 
make sense
to add a constraint to enforce the period size in machine driver? If yes then I 
would
upload patches for Chrome atom machines for google.


I think it'd make sense to add this constraint, either in the machine 
driver or in the platform driver, so that we don't change the position 
updates and introduce more issues by accident by doing so. As you 
rightly said, I don't think anyone tested periods multiple of 256 
samples so it's not a regression, more aligning with the internal design.


Re: Kernel panic - not syncing: IO-APIC + timer doesn't work!

2020-07-28 Thread Scott Branden
Hi Thomas,

On 2020-07-28 1:22 a.m., Thomas Gleixner wrote:
> Scott,
>
> Scott Branden  writes:
>> Bios now updated to latest.  Same kernel panic issue.  Log below.
>>
>> I think it is related to power cycling quickly.
>> Should APIC work if PC power cycled in a few seconds or is that the
>> problem?
> Yes, emphasis on should. Just to clarify, if you reboot it works and
> cold start works as well if power was off long enough?
So far I have only been able to reproduce the issue by cold start with power 
off for only a few seconds
before re-powering the system.  It has not failed via reboot yet that I 
remember.
Will have to keep my eye on whether using reboot is an issue or not.
And also keeping power off longer when doing a cold start.
>
>> [    0.00] Linux version 5.8.0-rc6 (oe-user@oe-host) 
>> (x86_64-poky-linux-gcc (GCC) 10.1.0, GNU ld (GNU Binutils) 2.34.0.20200220) 
>> #1 SMP Sat Jul 25 03:55:25 UTC 2020
>> [    0.00] Command line: BOOT_IMAGE=/bzImage ima_policy=tcb
>> apic=debug ip=dhcp raid=noautodetect console=ttyS0,115200
>> root=/dev/nfs nfsroot=192.168.1.100:/nfs/vxc,hard,tcp,intr,v3 rootwait
>> nfsrootdebug
> The working dmesg and the failing console log are hard to compare
> because the latter does not contain debug level printks. Please add
> 'ignore_loglevel' to the command line parameters.
Please find attached the failed console log with ignore_loglevel.
>
> Thanks,
>
> tglx
Regards,
Scott
Loading /bzImage... ok
[0.00] Linux version 5.8.0-rc6 (oe-user@oe-host) (x86_64-poky-linux-gcc 
(GCC) 10.1.0, GNU ld (GNU Binutils) 2.34.0.20200220) #1 SMP Sat Jul 25 03:55:25 
UTC 2020
[0.00] Command line: BOOT_IMAGE=/bzImage ignore_loglevel ima_policy=tcb 
apic=debug ip=dhcp raid=noautodetect console=ttyS0,115200 root=/dev/nfs 
nfsroot=192.168.1.100:/nfs/vxc,hard,tcp,intr,v3 rootwait nfsrootdebug
[0.00] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point 
registers'
[0.00] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers'
[0.00] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers'
[0.00] x86/fpu: xstate_offset[2]:  576, xstate_sizes[2]:  256
[0.00] x86/fpu: Enabled xstate features 0x7, context size is 832 bytes, 
using 'compacted' format.
[0.00] BIOS-provided physical RAM map:
[0.00] BIOS-e820: [mem 0x-0x0009d3ff] usable
[0.00] BIOS-e820: [mem 0x0009d400-0x0009] reserved
[0.00] BIOS-e820: [mem 0x000e-0x000f] reserved
[0.00] BIOS-e820: [mem 0x0010-0x09bfefff] usable
[0.00] BIOS-e820: [mem 0x09bff000-0x09ff] reserved
[0.00] BIOS-e820: [mem 0x0a00-0x0a1f] usable
[0.00] BIOS-e820: [mem 0x0a20-0x0a20] ACPI NVS
[0.00] BIOS-e820: [mem 0x0a21-0xca47] usable
[0.00] BIOS-e820: [mem 0xca48-0xca7acfff] reserved
[0.00] BIOS-e820: [mem 0xca7ad000-0xca8fcfff] ACPI data
[0.00] BIOS-e820: [mem 0xca8fd000-0xcafb4fff] ACPI NVS
[0.00] BIOS-e820: [mem 0xcafb5000-0xcbbfefff] reserved
[0.00] BIOS-e820: [mem 0xcbbff000-0xccff] usable
[0.00] BIOS-e820: [mem 0xcd00-0xcfff] reserved
[0.00] BIOS-e820: [mem 0xf000-0xf7ff] reserved
[0.00] BIOS-e820: [mem 0xfd20-0xfd2f] reserved
[0.00] BIOS-e820: [mem 0xfd40-0xfd5f] reserved
[0.00] BIOS-e820: [mem 0xfea0-0xfea0] reserved
[0.00] BIOS-e820: [mem 0xfeb8-0xfec01fff] reserved
[0.00] BIOS-e820: [mem 0xfec1-0xfec10fff] reserved
[0.00] BIOS-e820: [mem 0xfed0-0xfed00fff] reserved
[0.00] BIOS-e820: [mem 0xfed4-0xfed44fff] reserved
[0.00] BIOS-e820: [mem 0xfed8-0xfed8] reserved
[0.00] BIOS-e820: [mem 0xfedc2000-0xfedc] reserved
[0.00] BIOS-e820: [mem 0xfedd4000-0xfedd5fff] reserved
[0.00] BIOS-e820: [mem 0xff00-0x] reserved
[0.00] BIOS-e820: [mem 0x0001-0x00042f37] usable
[0.00] BIOS-e820: [mem 0x00042f38-0x00042fff] reserved
[0.00] printk: debug: ignoring loglevel setting.
[0.00] NX (Execute Disable) protection: active
[0.00] SMBIOS 3.2.0 present.
[0.00] DMI: System manufacturer System Product Name/PRIME X570-P, BIOS 
2407 07/01/2020
[0.00] tsc: Fast TSC calibration failed
[0.00] e820: update [mem 0x-0x0fff] usable ==> reserved
[0.00] e820: remove [mem 0x000a-0x000f] usable
[0.00] last_pfn = 0x42f380 

Re: [PATCH] mwifiex: don't call del_timer_sync() on uninitialized timer

2020-07-28 Thread Andy Shevchenko
On Tue, Jul 28, 2020 at 4:46 AM Tetsuo Handa
 wrote:
>
> syzbot is reporting that del_timer_sync() is called from
> mwifiex_usb_cleanup_tx_aggr() from mwifiex_unregister_dev() without
> checking timer_setup() from mwifiex_usb_tx_init() was called [1].
> Since mwifiex_usb_prepare_tx_aggr_skb() is calling del_timer() if
> is_hold_timer_set == true, use the same condition for del_timer_sync().
>
> [1] 
> https://syzkaller.appspot.com/bug?id=fdeef9cf7348be8b8ab5b847f2ed993aba8ea7b6
>

Can you use BugLink: tag for above?

> Reported-by: syzbot 
> Cc: Ganapathi Bhat 
> Signed-off-by: Tetsuo Handa 
> ---
> A patch from Ganapathi Bhat ( https://patchwork.kernel.org/patch/10990275/ ) 
> is stalling
> at 
> https://lore.kernel.org/linux-usb/mn2pr18mb2637d7c742bc235fe38367f0a0...@mn2pr18mb2637.namprd18.prod.outlook.com/
>  .
> syzbot by now got this report for 1 times. Do we want to go with this 
> simple patch?
>
>  drivers/net/wireless/marvell/mwifiex/usb.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c 
> b/drivers/net/wireless/marvell/mwifiex/usb.c
> index 6f3cfde..04a1461 100644
> --- a/drivers/net/wireless/marvell/mwifiex/usb.c
> +++ b/drivers/net/wireless/marvell/mwifiex/usb.c
> @@ -1353,7 +1353,8 @@ static void mwifiex_usb_cleanup_tx_aggr(struct 
> mwifiex_adapter *adapter)
> skb_dequeue(>tx_aggr.aggr_list)))
> mwifiex_write_data_complete(adapter, skb_tmp,
> 0, -1);
> -   del_timer_sync(>tx_aggr.timer_cnxt.hold_timer);
> +   if (port->tx_aggr.timer_cnxt.is_hold_timer_set)
> +   del_timer_sync(>tx_aggr.timer_cnxt.hold_timer);
> port->tx_aggr.timer_cnxt.is_hold_timer_set = false;
> port->tx_aggr.timer_cnxt.hold_tmo_msecs = 0;
> }
> --
> 1.8.3.1
>


-- 
With Best Regards,
Andy Shevchenko


Re: [External] Re: [PATCH 2/2] ftrace: setup correct flags before replace code of module rec

2020-07-28 Thread Chengming Zhou


在 2020/7/28 下午9:02, Steven Rostedt 写道:
> On Tue, 28 Jul 2020 18:27:20 +0800
> Chengming Zhou  wrote:
>
>> When module loaded and enabled, we will use __ftrace_replace_code
>> for module if any ftrace_ops referenced it found. But we will get
>> wrong ftrace_addr for module rec in ftrace_get_addr_new, because
>> rec->flags has not been setup correctly.
>> So setup correct rec->flags when we call referenced_filters to find
>> ftrace_ops references it.
> This is somewhat correct ;-)
>
>> Signed-off-by: Chengming Zhou 
>> Signed-off-by: Muchun Song 
>> ---
>>  kernel/trace/ftrace.c | 16 +---
>>  1 file changed, 13 insertions(+), 3 deletions(-)
>>
>> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
>> index fca01a168ae5..00087dea0174 100644
>> --- a/kernel/trace/ftrace.c
>> +++ b/kernel/trace/ftrace.c
>> @@ -6190,8 +6190,17 @@ static int referenced_filters(struct dyn_ftrace *rec)
>>  int cnt = 0;
>>  
>>  for (ops = ftrace_ops_list; ops != _list_end; ops = ops->next) {
>> -if (ops_references_rec(ops, rec))
>> -cnt++;
>> +if (ops_references_rec(ops, rec)) {
>> +cnt++;
>> +if (ops->flags & FTRACE_OPS_FL_DIRECT)
>> +rec->flags |= FTRACE_FL_DIRECT;
> The above should be:
>
>   if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_DIRECT))
>   continue;
>   cnt++;
>
> The direct flag is *very* special, and should not be set automatically
> like this.
>
> Probably should add the same kind of warning and skip for
> FTRACE_OPS_FL_IPMODIFY.
Ok, I think it's fine to warn and skip these ops.
>> +if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
>> +rec->flags |= FTRACE_FL_REGS;
> The above is definitely a bug fix. I'm thinking this patch should be
> broken up into two. One with just this update (and the clear below),
> and the rest later. As this should be backported to stable.

Yes, this bug cause a kernel crash on our server...

So I will send a bugfix patch just including this update and the clear
below.

>> +if (cnt == 1 && ops->trampoline)
>> +rec->flags |= FTRACE_FL_TRAMP;
>> +else
>> +rec->flags &= ~FTRACE_FL_TRAMP;
> The above is correct, but not critical that it would need to be
> backported.

I will put the rest in the second patch later.

Thanks!

>
> Thanks!
>
> -- Steve
>
>> +}
>>  }
>>  
>>  return cnt;
>> @@ -6373,7 +6382,8 @@ void ftrace_module_enable(struct module *mod)
>>  cnt += referenced_filters(rec);
>>  
>>  /* This clears FTRACE_FL_DISABLED */
>> -rec->flags = cnt;
>> +rec->flags &= ~FTRACE_FL_DISABLED;
>> +rec->flags += cnt;
>>  
>>  if (ftrace_start_up && cnt) {
>>  int failed = __ftrace_replace_code(rec, 1);


Re: [PATCH RFC leds + net-next v4 1/2] net: phy: add API for LEDs controlled by PHY HW

2020-07-28 Thread Marek Behun
On Tue, 28 Jul 2020 18:18:00 +0200
Andrew Lunn  wrote:

> > +static int of_phy_register_led(struct phy_device *phydev, struct 
> > device_node *np)
> > +{
> > +   struct led_init_data init_data = {};
> > +   struct phy_device_led *led;
> > +   u32 reg;
> > +   int ret;
> > +
> > +   ret = of_property_read_u32(np, "reg", );
> > +   if (ret < 0)
> > +   return ret;
> > +
> > +   led = devm_kzalloc(>mdio.dev, sizeof(struct phy_device_led), 
> > GFP_KERNEL);
> > +   if (!led)
> > +   return -ENOMEM;
> > +
> > +   led->cdev.brightness_set_blocking = phy_led_brightness_set;
> > +   led->cdev.trigger_type = _hw_led_trig_type;
> > +   led->addr = reg;
> > +
> > +   of_property_read_string(np, "linux,default-trigger", 
> > >cdev.default_trigger);  
> 
> Hi Marek
> 
> I think we need one more optional property. If the trigger has been
> set to the PHY hardware trigger, we then should be able to set which
> of the different blink patterns we want the LED to use. I guess most
> users will never actually make use of the sys/class/led interface, if
> the default in device tree is sensible. But that requires DT can fully
> configure the LED.
> 
>Andrew

Yes, I also thought about that. We have the linux,default-trigger
property, so maybe we could add linux,default-hw-control-mode property
as well.

Marek


Re: [PATCH v4 10/10] module: Reorder functions

2020-07-28 Thread Jessica Yu

+++ Kristen Carlson Accardi [17/07/20 10:00 -0700]:

Introduce a new config option to allow modules to be re-ordered
by function. This option can be enabled independently of the
kernel text KASLR or FG_KASLR settings so that it can be used
by architectures that do not support either of these features.
This option will be selected by default if CONFIG_FG_KASLR is
selected.

If a module has functions split out into separate text sections
(i.e. compiled with the -ffunction-sections flag), reorder the
functions to provide some code diversification to modules.

Signed-off-by: Kristen Carlson Accardi 
Reviewed-by: Kees Cook 
Acked-by: Ard Biesheuvel 
Tested-by: Ard Biesheuvel 
Reviewed-by: Tony Luck 
Tested-by: Tony Luck 


Hi Kristen!

I've boot tested this on x86, (un)loaded some modules, and checked
their resulting section addresses as a quick sanity test. Feel free
to add my:

Acked-by: Jessica Yu 
Tested-by: Jessica Yu 

Thank you!

Jessica


Re: [RFC PATCH 0/5] madvise MADV_DOEXEC

2020-07-28 Thread Anthony Yznaga



On 7/28/20 4:34 AM, Kirill Tkhai wrote:
> On 27.07.2020 20:11, Anthony Yznaga wrote:
>> This patchset adds support for preserving an anonymous memory range across
>> exec(3) using a new madvise MADV_DOEXEC argument.  The primary benefit for
>> sharing memory in this manner, as opposed to re-attaching to a named shared
>> memory segment, is to ensure it is mapped at the same virtual address in
>> the new process as it was in the old one.  An intended use for this is to
>> preserve guest memory for guests using vfio while qemu exec's an updated
>> version of itself.  By ensuring the memory is preserved at a fixed address,
> So, the goal is an update of QEMU binary without a stopping of virtual 
> machine?
Essentially, yes.  The VM is paused very briefly.

Anthony
>
>> vfio mappings and their associated kernel data structures can remain valid.
>> In addition, for the qemu use case, qemu instances that back guest RAM with
>> anonymous memory can be updated.
>>
>> Patches 1 and 2 ensure that loading of ELF load segments does not silently
>> clobber existing VMAS, and remove assumptions that the stack is the only
>> VMA in the mm when the stack is set up.  Patch 1 re-introduces the use of
>> MAP_FIXED_NOREPLACE to load ELF binaries that addresses the previous issues
>> and could be considered on its own.
>>
>> Patches 3, 4, and 5 introduce the feature and an opt-in method for its use
>> using an ELF note.
>>
>> Anthony Yznaga (5):
>>   elf: reintroduce using MAP_FIXED_NOREPLACE for elf executable mappings
>>   mm: do not assume only the stack vma exists in setup_arg_pages()
>>   mm: introduce VM_EXEC_KEEP
>>   exec, elf: require opt-in for accepting preserved mem
>>   mm: introduce MADV_DOEXEC
>>
>>  arch/x86/Kconfig   |   1 +
>>  fs/binfmt_elf.c| 196 
>> +
>>  fs/exec.c  |  33 +-
>>  include/linux/binfmts.h|   7 +-
>>  include/linux/mm.h |   5 +
>>  include/uapi/asm-generic/mman-common.h |   3 +
>>  kernel/fork.c  |   2 +-
>>  mm/madvise.c   |  25 +
>>  mm/mmap.c  |  47 
>>  9 files changed, 266 insertions(+), 53 deletions(-)
>>



Re: [PATCH RFC leds + net-next v4 1/2] net: phy: add API for LEDs controlled by PHY HW

2020-07-28 Thread Marek Behun
On Tue, 28 Jul 2020 18:28:16 +0200
Andrew Lunn  wrote:

> > > @@ -736,6 +777,16 @@ struct phy_driver {
> > >   int (*set_loopback)(struct phy_device *dev, bool enable);
> > >   int (*get_sqi)(struct phy_device *dev);
> > >   int (*get_sqi_max)(struct phy_device *dev);
> > > +
> > > + /* PHY LED support */
> > > + int (*led_init)(struct phy_device *dev, struct
> > > phy_device_led *led);
> > > + int (*led_brightness_set)(struct phy_device *dev, struct
> > > phy_device_led *led,
> > > +   enum led_brightness brightness);
> > > + const char *(*led_iter_hw_mode)(struct phy_device *dev,
> > > struct phy_device_led *led,
> > > + void ** iter);
> > > + int (*led_set_hw_mode)(struct phy_device *dev, struct
> > > phy_device_led *led,
> > > +const char *mode);
> > > + const char *(*led_get_hw_mode)(struct phy_device *dev,
> > > struct phy_device_led *led); };
> > >  #define to_phy_driver(d)
> > > container_of(to_mdio_common_driver(d),\ struct
> > > phy_driver, mdiodrv)  
> > 
> > The problem here is that the same code will have to be added to DSA
> > switch ops structure, which is not OK.  
> 
> Not necessarily. DSA drivers do have access to the phydev structure.
> 
> I think putting these members into a structure is a good idea. That
> structure can be part of phy_driver and initialised just like other
> members. But on probing the phy, it can be copied over to the
> phy_device structure. And we can provide an API which DSA drivers can
> use to register there own structure of ops to be placed into
> phy_device, which would call into the DSA driver.
> 
>   Andrew

On Marvell switches there are LEDs that do not necesarrily blink on
events on a specific port, but instead on the whole switch. Ie a LED
can be put into a mode "act on any port". Vendors may create devices
with this as intender mode for a LED, and such a LED may be on the
other side of the device from where the ports are, or something. Such a
LED should be described in the device tree not as a child of any PHY or
port, but instead as a child of the switch itself. And since all the
LEDs on Marvell switches are technically controlled by the switch, not
it's internal PHYs, I think all of them should be children of the
switch node (or a "leds" node which is a child of the switch node),
instead of being descended from the internal PHYs.

Marek


Re: [PATCH 1/1] USB: PHY: JZ4770: Fix static checker warning.

2020-07-28 Thread Andy Shevchenko
On Tue, Jul 28, 2020 at 8:09 PM 周琰杰 (Zhou Yanjie)
 wrote:
>
> The commit 2a6c0b82e651 ("USB: PHY: JZ4770: Add support for new
> Ingenic SoCs.") introduced the initialization function for different
> chips, but left the relevant code involved in the resetting process
> in the original function, resulting in uninitialized variable calls.
> This problem can be solved by putting this part of the code into the
> initialization function for each chip. Although the four processors
> currently supported have the same reset code, let us can solve this

'can' -> ''

> problem by adding the initialization of the reg variable to the
> original function, but when other processors with different reset
> methods (such as X2000) are introduced in the future, it will cause
> inevitable condition judgments to complicate the function, which
> violates the original intention of introducing initialization
> functions for each processor.
>
> Fixes: 2a6c0b82e651 ("USB: PHY: JZ4770: Add support for new
> Ingenic SoCs.").

No period at the end

>

No blank line in the tag block.

> Reported-by: Colin Ian King 
> Reported-by: Dan Carpenter 
> Signed-off-by: 周琰杰 (Zhou Yanjie) 

...

> +   /* Wait for PHY to reset */
> +   usleep_range(30, 300);
> +   writel(reg & ~USBPCR_POR, priv->base + REG_USBPCR_OFFSET);
> +   usleep_range(300, 1000);

Instead of copy'n'paste 4 times, you may provide a helper function.

-- 
With Best Regards,
Andy Shevchenko


Re: [PATCH 16/19] perf metric: Make compute_single function more precise

2020-07-28 Thread Ian Rogers
On Tue, Jul 28, 2020 at 5:36 AM Arnaldo Carvalho de Melo
 wrote:
>
> Em Sun, Jul 19, 2020 at 08:13:17PM +0200, Jiri Olsa escreveu:
> > So far compute_single function relies on the fact, that
> > there's only single metric defined within evlist in all
> > tests. In following patch we will add test for metric
> > group, so we need to be able to compute metric by given
> > name.
> >
> > Adding the name argument to compute_single and iterating
> > evlist and evsel's expression to find the given metric.
>
> Applied, thanks.
>
> Ian, Kajol, I didn't notice your Acked-by or Reviewed-by, like for the
> other patches, can you check?


Acked-by: Ian Rogers 

Thanks,
Ian

> - Arnaldo
>
> > Signed-off-by: Jiri Olsa 
> > ---
> >  tools/perf/tests/parse-metric.c | 22 +-
> >  1 file changed, 13 insertions(+), 9 deletions(-)
> >
> > diff --git a/tools/perf/tests/parse-metric.c 
> > b/tools/perf/tests/parse-metric.c
> > index 01370ccb9ed9..5ac32f80f8ea 100644
> > --- a/tools/perf/tests/parse-metric.c
> > +++ b/tools/perf/tests/parse-metric.c
> > @@ -108,17 +108,21 @@ static void load_runtime_stat(struct runtime_stat 
> > *st, struct evlist *evlist,
> >  }
> >
> >  static double compute_single(struct rblist *metric_events, struct evlist 
> > *evlist,
> > -  struct runtime_stat *st)
> > +  struct runtime_stat *st, const char *name)
> >  {
> > - struct evsel *evsel = evlist__first(evlist);
> > + struct metric_expr *mexp;
> >   struct metric_event *me;
> > + struct evsel *evsel;
> >
> > - me = metricgroup__lookup(metric_events, evsel, false);
> > - if (me != NULL) {
> > - struct metric_expr *mexp;
> > -
> > - mexp = list_first_entry(>head, struct metric_expr, nd);
> > - return test_generic_metric(mexp, 0, st);
> > + evlist__for_each_entry(evlist, evsel) {
> > + me = metricgroup__lookup(metric_events, evsel, false);
> > + if (me != NULL) {
> > + list_for_each_entry (mexp, >head, nd) {
> > + if (strcmp(mexp->metric_name, name))
> > + continue;
> > + return test_generic_metric(mexp, 0, st);
> > + }
> > + }
> >   }
> >   return 0.;
> >  }
> > @@ -162,7 +166,7 @@ static int compute_metric(const char *name, struct 
> > value *vals, double *ratio)
> >   load_runtime_stat(, evlist, vals);
> >
> >   /* And execute the metric */
> > - *ratio = compute_single(_events, evlist, );
> > + *ratio = compute_single(_events, evlist, , name);
> >
> >   /* ... clenup. */
> >   metricgroup__rblist_exit(_events);
> > --
> > 2.25.4
> >
>
> --
>
> - Arnaldo


Re: [External] Re: [PATCH 1/2] ftrace: clear module from hash of all ftrace ops

2020-07-28 Thread Steven Rostedt
On Wed, 29 Jul 2020 00:59:33 +0800
Chengming Zhou  wrote:


> > i.e.
> >
> >   # echo some_module_function > set_ftrace_filter
> >   # rmmod module_with_that_function
> >   # insmod module_with_same_address_of_function
> >   # echo function > current_tracer
> >
> > Now the tr->ops->hash would still have the function of the original
> > module.  
> 
> I thought all ftrace_ops has non empty func_hash are on the ftrace 
> global list...

Nope, the two are disjoint.

> 
> Well, so I just leave this function unmodified.
> 
> Just call that new function register_ftrace_ops_hash() from 
> ftrace_release_mod.

I would say to have anything that uses one of the
ftrace_set_filter/notrace* functions, to also register itself for
module removal.

register_ftrace_mod_removal(struct ftrace_ops *ops);

and then also have a unregister_ftrace_mod_removal() as there needs to
be a way to remove it from the list before the ops gets freed.

Then these functions would add the ops to a list, and this list is
traversed to remove modules. The trace_arrays can register their ops
too, so you can update that function.

-- Steve


> 
> Thanks!
> 
> >
> > Either have all owners of ftrace_ops handle this case, or add a helper
> > function to handle it for them. But using ftarce_ops_list is the wrong
> > place to do it.
> >
> > -- Steve
> >
> >  
> >> +  mutex_lock(_lock);
> >> +
> >> +  do_for_each_ftrace_op(op, ftrace_ops_list) {
> >> +  if (!op->func_hash)
> >>continue;
> >> -  mutex_lock(>ops->func_hash->regex_lock);
> >> -  clear_mod_from_hash(pg, tr->ops->func_hash->filter_hash);
> >> -  clear_mod_from_hash(pg, tr->ops->func_hash->notrace_hash);
> >> -  mutex_unlock(>ops->func_hash->regex_lock);
> >> -  }
> >> -  mutex_unlock(_types_lock);
> >> +  mutex_lock(>func_hash->regex_lock);
> >> +  clear_mod_from_hash(pg, op->func_hash->filter_hash);
> >> +  clear_mod_from_hash(pg, op->func_hash->notrace_hash);
> >> +  mutex_unlock(>func_hash->regex_lock);
> >> +  } while_for_each_ftrace_op(op);
> >> +
> >> +  mutex_unlock(_lock);
> >>   }
> >>   
> >>   static void ftrace_free_mod_map(struct rcu_head *rcu)  



Re: [PATCH] pci: vc: Fix kerneldoc

2020-07-28 Thread Bjorn Helgaas
Patch looks fine, but can you run "git log --oneline drivers/pci/vc.c"
and match the subject line style?

On Tue, Jul 28, 2020 at 07:10:45PM +0200, Krzysztof Kozlowski wrote:
> Fix W=1 compile warnings (invalid kerneldoc):
> 
> drivers/pci/vc.c:188: warning: Excess function parameter 'name' 
> description in 'pci_vc_do_save_buffer'
> 
> Signed-off-by: Krzysztof Kozlowski 
> ---
>  drivers/pci/vc.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
> index 5486f8768c86..5fc59ac31145 100644
> --- a/drivers/pci/vc.c
> +++ b/drivers/pci/vc.c
> @@ -172,7 +172,6 @@ static void pci_vc_enable(struct pci_dev *dev, int pos, 
> int res)
>   * @dev: device
>   * @pos: starting position of VC capability (VC/VC9/MFVC)
>   * @save_state: buffer for save/restore
> - * @name: for error message
>   * @save: if provided a buffer, this indicates what to do with it
>   *
>   * Walking Virtual Channel config space to size, save, or restore it
> -- 
> 2.17.1
> 


Re: linux-next: build failure after merge of the char-misc tree

2020-07-28 Thread Greg KH
On Tue, Jul 28, 2020 at 07:17:05PM +0200, Greg KH wrote:
> On Tue, Jul 28, 2020 at 06:23:59PM +1000, Stephen Rothwell wrote:
> > Hi Greg,
> > 
> > On Tue, 28 Jul 2020 09:53:36 +0200 Greg KH  wrote:
> > >
> > > On Tue, Jul 28, 2020 at 05:33:31PM +1000, Stephen Rothwell wrote:
> > > > Hi Greg,
> > > > 
> > > > On Mon, 27 Jul 2020 11:24:48 +0200 Greg KH  wrote:  
> > > > >
> > > > > On Mon, Jul 27, 2020 at 06:08:31PM +1000, Stephen Rothwell wrote:  
> > > > > > Hi all,
> > > > > > 
> > > > > > After merging the char-misc tree, today's linux-next build (x86_64
> > > > > > allmodconfig) failed like this:
> > > > > > 
> > > > > > In file included from drivers/misc/habanalabs/goya/goya.c:8:
> > > > > > drivers/misc/habanalabs/goya/goyaP.h:12:10: fatal error: 
> > > > > > habanalabs.h: No such file or directory
> > > > > >12 | #include "habanalabs.h"
> > > > > >   |  ^~
> > > > > > In file included from 
> > > > > > drivers/misc/habanalabs/goya/goya_security.c:8:
> > > > > > drivers/misc/habanalabs/goya/goyaP.h:12:10: fatal error: 
> > > > > > habanalabs.h: No such file or directory
> > > > > >12 | #include "habanalabs.h"
> > > > > >   |  ^~
> > > > > > 
> > > > > > Presumably caused by commit
> > > > > > 
> > > > > >   70b2f993ea4a ("habanalabs: create common folder")
> > > > > > 
> > > > > > I have used the char-misc tree from next-20200724 for today.
> > > > > 
> > > > > Ugh, this is a mess of a merge with this driver.
> > > > > 
> > > > > Oded, I'll take Stephen's merge resolutions here and push out a new
> > > > > version, and try to resolve this error, but if you could verify I got 
> > > > > it
> > > > > correct, that would be great.  
> > > > 
> > > > The conflicts are gone, but I still get these errors.  
> > > 
> > > Very odd, I can not duplicate this at all here.  I just did a clean
> > > checkout of the char-misc-next branch and a full 'make allmodconfig' for
> > > x86_64, and it worked just fine.
> > > 
> > > Are you sure it's not coming from some other tree?
> > 
> > Do you build with a separate object tree?  I always use make O=...
> > which makes the difference.  I tested with just your tree.
> 
> Ah, no, I do not.  Odd, let me see what kind of crazy they are doing
> with include build directives...
> 
> thanks for the hint.

Ok, fix sent out, that should resolve this.

thanks,

greg k-h


[PATCH] habanalabs: fix up absolute include instructions

2020-07-28 Thread Greg Kroah-Hartman
There's no need to try to be cute with the include file locations in the
Makefile, so just specify exactly where the files are.

Bonus is this fixes the problem of building with O= as well as trying to
just build the subdirectory alone.

Reported-by: Stephen Rothwell 
Cc: Oded Gabbay 
Cc: Omer Shpigelman 
Cc: Tomer Tayar 
Cc: Moti Haimovski 
Cc: Ofir Bitton 
Cc: Ben Segal 
Cc: Christine Gharzuzi 
Cc: Pawel Piskorski 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/misc/habanalabs/common/Makefile |  2 --
 drivers/misc/habanalabs/common/debugfs.c|  2 +-
 drivers/misc/habanalabs/common/firmware_if.c|  2 +-
 drivers/misc/habanalabs/common/habanalabs.h |  4 ++--
 drivers/misc/habanalabs/common/memory.c |  2 +-
 drivers/misc/habanalabs/common/mmu.c|  2 +-
 drivers/misc/habanalabs/common/pci.c|  2 +-
 drivers/misc/habanalabs/gaudi/Makefile  |  2 --
 drivers/misc/habanalabs/gaudi/gaudi.c   | 12 ++--
 drivers/misc/habanalabs/gaudi/gaudiP.h  | 10 +-
 drivers/misc/habanalabs/gaudi/gaudi_coresight.c |  6 +++---
 drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c |  2 +-
 drivers/misc/habanalabs/gaudi/gaudi_security.c  |  2 +-
 drivers/misc/habanalabs/goya/Makefile   |  2 --
 drivers/misc/habanalabs/goya/goya.c |  8 
 drivers/misc/habanalabs/goya/goyaP.h| 12 ++--
 drivers/misc/habanalabs/goya/goya_coresight.c   |  6 +++---
 drivers/misc/habanalabs/goya/goya_security.c|  2 +-
 18 files changed, 37 insertions(+), 43 deletions(-)

diff --git a/drivers/misc/habanalabs/common/Makefile 
b/drivers/misc/habanalabs/common/Makefile
index 97d03b5c8683..b984bfa4face 100644
--- a/drivers/misc/habanalabs/common/Makefile
+++ b/drivers/misc/habanalabs/common/Makefile
@@ -1,6 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-subdir-ccflags-y += -I$(src)/common
-
 HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/asid.o common/habanalabs_ioctl.o \
common/command_buffer.o common/hw_queue.o common/irq.o \
diff --git a/drivers/misc/habanalabs/common/debugfs.c 
b/drivers/misc/habanalabs/common/debugfs.c
index 0bc036e01ee8..c50c6fc9e905 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -6,7 +6,7 @@
  */
 
 #include "habanalabs.h"
-#include "include/hw_ip/mmu/mmu_general.h"
+#include "../include/hw_ip/mmu/mmu_general.h"
 
 #include 
 #include 
diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 5981dbd8c6df..f70302cdab1b 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -6,7 +6,7 @@
  */
 
 #include "habanalabs.h"
-#include "include/common/hl_boot_if.h"
+#include "../include/common/hl_boot_if.h"
 
 #include 
 #include 
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index eb42aa5476a9..018d9d67e8e6 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -8,8 +8,8 @@
 #ifndef HABANALABSP_H_
 #define HABANALABSP_H_
 
-#include "include/common/armcp_if.h"
-#include "include/common/qman_if.h"
+#include "../include/common/armcp_if.h"
+#include "../include/common/qman_if.h"
 #include 
 
 #include 
diff --git a/drivers/misc/habanalabs/common/memory.c 
b/drivers/misc/habanalabs/common/memory.c
index e4e1693e5c6c..dce9273e557a 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -7,7 +7,7 @@
 
 #include 
 #include "habanalabs.h"
-#include "include/hw_ip/mmu/mmu_general.h"
+#include "../include/hw_ip/mmu/mmu_general.h"
 
 #include 
 #include 
diff --git a/drivers/misc/habanalabs/common/mmu.c 
b/drivers/misc/habanalabs/common/mmu.c
index 04303950e630..edcc11d5eaf1 100644
--- a/drivers/misc/habanalabs/common/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu.c
@@ -6,7 +6,7 @@
  */
 
 #include "habanalabs.h"
-#include "include/hw_ip/mmu/mmu_general.h"
+#include "../include/hw_ip/mmu/mmu_general.h"
 
 #include 
 #include 
diff --git a/drivers/misc/habanalabs/common/pci.c 
b/drivers/misc/habanalabs/common/pci.c
index 1791f6623c69..7bd3737571f3 100644
--- a/drivers/misc/habanalabs/common/pci.c
+++ b/drivers/misc/habanalabs/common/pci.c
@@ -6,7 +6,7 @@
  */
 
 #include "habanalabs.h"
-#include "include/hw_ip/pci/pci_general.h"
+#include "../include/hw_ip/pci/pci_general.h"
 
 #include 
 #include 
diff --git a/drivers/misc/habanalabs/gaudi/Makefile 
b/drivers/misc/habanalabs/gaudi/Makefile
index 75104ae74e2b..c9f4703cff24 100644
--- a/drivers/misc/habanalabs/gaudi/Makefile
+++ b/drivers/misc/habanalabs/gaudi/Makefile
@@ -1,5 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
-subdir-ccflags-y += -I$(src)/common
-
 HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_hwmgr.o gaudi/gaudi_security.o \
gaudi/gaudi_coresight.o
diff 

Re: linux-next: build failure after merge of the char-misc tree

2020-07-28 Thread Greg KH
On Tue, Jul 28, 2020 at 06:23:59PM +1000, Stephen Rothwell wrote:
> Hi Greg,
> 
> On Tue, 28 Jul 2020 09:53:36 +0200 Greg KH  wrote:
> >
> > On Tue, Jul 28, 2020 at 05:33:31PM +1000, Stephen Rothwell wrote:
> > > Hi Greg,
> > > 
> > > On Mon, 27 Jul 2020 11:24:48 +0200 Greg KH  wrote:  
> > > >
> > > > On Mon, Jul 27, 2020 at 06:08:31PM +1000, Stephen Rothwell wrote:  
> > > > > Hi all,
> > > > > 
> > > > > After merging the char-misc tree, today's linux-next build (x86_64
> > > > > allmodconfig) failed like this:
> > > > > 
> > > > > In file included from drivers/misc/habanalabs/goya/goya.c:8:
> > > > > drivers/misc/habanalabs/goya/goyaP.h:12:10: fatal error: 
> > > > > habanalabs.h: No such file or directory
> > > > >12 | #include "habanalabs.h"
> > > > >   |  ^~
> > > > > In file included from drivers/misc/habanalabs/goya/goya_security.c:8:
> > > > > drivers/misc/habanalabs/goya/goyaP.h:12:10: fatal error: 
> > > > > habanalabs.h: No such file or directory
> > > > >12 | #include "habanalabs.h"
> > > > >   |  ^~
> > > > > 
> > > > > Presumably caused by commit
> > > > > 
> > > > >   70b2f993ea4a ("habanalabs: create common folder")
> > > > > 
> > > > > I have used the char-misc tree from next-20200724 for today.
> > > > 
> > > > Ugh, this is a mess of a merge with this driver.
> > > > 
> > > > Oded, I'll take Stephen's merge resolutions here and push out a new
> > > > version, and try to resolve this error, but if you could verify I got it
> > > > correct, that would be great.  
> > > 
> > > The conflicts are gone, but I still get these errors.  
> > 
> > Very odd, I can not duplicate this at all here.  I just did a clean
> > checkout of the char-misc-next branch and a full 'make allmodconfig' for
> > x86_64, and it worked just fine.
> > 
> > Are you sure it's not coming from some other tree?
> 
> Do you build with a separate object tree?  I always use make O=...
> which makes the difference.  I tested with just your tree.

Ah, no, I do not.  Odd, let me see what kind of crazy they are doing
with include build directives...

thanks for the hint.

greg k-h


Re: [PATCH v1 0/4] [RFC] Implement Trampoline File Descriptor

2020-07-28 Thread Andy Lutomirski
On Tue, Jul 28, 2020 at 9:32 AM Madhavan T. Venkataraman
 wrote:
>
> Thanks. See inline..
>
> On 7/28/20 10:13 AM, David Laight wrote:
> > From:  madve...@linux.microsoft.com
> >> Sent: 28 July 2020 14:11
> > ...
> >> The kernel creates the trampoline mapping without any permissions. When
> >> the trampoline is executed by user code, a page fault happens and the
> >> kernel gets control. The kernel recognizes that this is a trampoline
> >> invocation. It sets up the user registers based on the specified
> >> register context, and/or pushes values on the user stack based on the
> >> specified stack context, and sets the user PC to the requested target
> >> PC. When the kernel returns, execution continues at the target PC.
> >> So, the kernel does the work of the trampoline on behalf of the
> >> application.
> > Isn't the performance of this going to be horrid?
>
> It takes about the same amount of time as getpid(). So, it is
> one quick trip into the kernel. I expect that applications will
> typically not care about this extra overhead as long as
> they are able to run.

What did you test this on?  A page fault on any modern x86_64 system
is much, much, much, much slower than a syscall.

--Andy


Re: [PATCH] KVM: x86: Deflect unknown MSR accesses to user space

2020-07-28 Thread Jim Mattson
On Tue, Jul 28, 2020 at 5:41 AM Alexander Graf  wrote:
>
>
>
> On 28.07.20 10:15, Vitaly Kuznetsov wrote:
> >
> > Alexander Graf  writes:
> >
> >> MSRs are weird. Some of them are normal control registers, such as EFER.
> >> Some however are registers that really are model specific, not very
> >> interesting to virtualization workloads, and not performance critical.
> >> Others again are really just windows into package configuration.
> >>
> >> Out of these MSRs, only the first category is necessary to implement in
> >> kernel space. Rarely accessed MSRs, MSRs that should be fine tunes against
> >> certain CPU models and MSRs that contain information on the package level
> >> are much better suited for user space to process. However, over time we 
> >> have
> >> accumulated a lot of MSRs that are not the first category, but still 
> >> handled
> >> by in-kernel KVM code.
> >>
> >> This patch adds a generic interface to handle WRMSR and RDMSR from user
> >> space. With this, any future MSR that is part of the latter categories can
> >> be handled in user space.

This sounds similar to Peter Hornyack's RFC from 5 years ago:
https://www.mail-archive.com/kvm@vger.kernel.org/msg124448.html.

> >> Furthermore, it allows us to replace the existing "ignore_msrs" logic with
> >> something that applies per-VM rather than on the full system. That way you
> >> can run productive VMs in parallel to experimental ones where you don't 
> >> care
> >> about proper MSR handling.
> >>
> >
> > In theory, we can go further: userspace will give KVM the list of MSRs
> > it is interested in. This list may even contain MSRs which are normally
> > handled by KVM, in this case userspace gets an option to mangle KVM's
> > reply (RDMSR) or do something extra (WRMSR). I'm not sure if there is a
> > real need behind this, just an idea.
> >
> > The problem with this approach is: if currently some MSR is not
> > implemented in KVM you will get an exit. When later someone comes with a
> > patch to implement this MSR your userspace handling will immediately get
> > broken so the list of not implemented MSRs effectively becomes an API :-)

Indeed. This is a legitimate concern. At Google, we have experienced
this problem already, using Peter Hornyack's approach. We ended up
commenting out some MSRs from kvm, which is less than ideal.

> Yeah, I'm not quite sure how to do this without bloating the kernel's
> memory footprint too much though.
>
> One option would be to create a shared bitmap with user space. But that
> would need to be sparse and quite big to be able to address all of
> today's possible MSR indexes. From a quick glimpse at Linux's MSR
> defines, there are:
>
>0x - 0x1000 (Intel)
>0x1000 - 0x2000 (VIA)
>0x4000 - 0x5000 (PV)
>0xc000 - 0xc0003000 (AMD)
>0xc001 - 0xc0012000 (AMD)
>0x8086 - 0x8087 (Transmeta)
>
> Another idea would be to turn the logic around and implement an
> allowlist in KVM with all of the MSRs that KVM should handle. In that
> API we could ask for an array of KVM supported MSRs into user space.
> User space could then bounce that array back to KVM to have all in-KVM
> supported MSRs handled. Or it could remove entries that it wants to
> handle on its own.
>
> KVM internally could then save the list as a dense bitmap, translating
> every list entry into its corresponding bit.
>
> While it does feel a bit overengineered, it would solve the problem that
> we're turning in-KVM handled MSRs into an ABI.

It seems unlikely that userspace is going to know what to do with a
large number of MSRs. I suspect that a small enumerated list will
suffice. In fact, +Aaron Lewis is working on upstreaming a local
Google patch set that does just that.


Re: [PATCH v3 4/4] xen: add helpers to allocate unpopulated memory

2020-07-28 Thread Julien Grall

Hi Roger,

On 28/07/2020 17:59, Roger Pau Monné wrote:

On Tue, Jul 28, 2020 at 05:48:23PM +0100, Julien Grall wrote:

Hi,

On 27/07/2020 10:13, Roger Pau Monne wrote:

To be used in order to create foreign mappings. This is based on the
ZONE_DEVICE facility which is used by persistent memory devices in
order to create struct pages and kernel virtual mappings for the IOMEM
areas of such devices. Note that on kernels without support for
ZONE_DEVICE Xen will fallback to use ballooned pages in order to
create foreign mappings.

The newly added helpers use the same parameters as the existing
{alloc/free}_xenballooned_pages functions, which allows for in-place
replacement of the callers. Once a memory region has been added to be
used as scratch mapping space it will no longer be released, and pages
returned are kept in a linked list. This allows to have a buffer of
pages and prevents resorting to frequent additions and removals of
regions.

If enabled (because ZONE_DEVICE is supported) the usage of the new
functionality untangles Xen balloon and RAM hotplug from the usage of
unpopulated physical memory ranges to map foreign pages, which is the
correct thing to do in order to avoid mappings of foreign pages depend
on memory hotplug.

I think this is going to break Dom0 on Arm if the kernel has been built with
hotplug. This is because you may end up to re-use region that will be used
for the 1:1 mapping of a foreign map.

Note that I don't know whether hotplug has been tested on Xen on Arm yet. So
it might be possible to be already broken.

Meanwhile, my suggestion would be to make the use of hotplug in the balloon
code conditional (maybe using CONFIG_ARM64 and CONFIG_ARM)?


Right, this feature (allocation of unpopulated memory separated from
the balloon driver) is currently gated on CONFIG_ZONE_DEVICE, which I
think could be used on Arm.

IMO the right solution seems to be to subtract the physical memory
regions that can be used for the identity mappings of foreign pages
(all RAM on the system AFAICT) from iomem_resource, as that would make
this and the memory hotplug done in the balloon driver safe?


Dom0 doesn't know the regions used for the identity mappings as this is 
only managed by Xen. So there is nothing you can really do here.


But don't you have the same issue on x86 with "magic pages"?

Cheers,

--
Julien Grall


[PATCH 2/4] mm: swap: Fix kerneldoc of swap_vma_readahead()

2020-07-28 Thread Krzysztof Kozlowski
Fix W=1 compile warnings (invalid kerneldoc):

mm/swap_state.c:742: warning: Function parameter or member 'fentry' not 
described in 'swap_vma_readahead'
mm/swap_state.c:742: warning: Excess function parameter 'entry' description 
in 'swap_vma_readahead'

Signed-off-by: Krzysztof Kozlowski 
---
 mm/swap_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 66e750f361ed..d034dbf9d0d5 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -725,7 +725,7 @@ static void swap_ra_info(struct vm_fault *vmf,
 
 /**
  * swap_vma_readahead - swap in pages in hope we need them soon
- * @entry: swap entry of this memory
+ * @fentry: swap entry of this memory
  * @gfp_mask: memory allocation flags
  * @vmf: fault information
  *
-- 
2.17.1



[PATCH 4/4] mm: mmu_notifier: Fix and extend kerneldoc

2020-07-28 Thread Krzysztof Kozlowski
Fix W=1 compile warnings (invalid kerneldoc):

mm/mmu_notifier.c:187: warning: Function parameter or member 'interval_sub' 
not described in 'mmu_interval_read_bgin'
mm/mmu_notifier.c:708: warning: Function parameter or member 'subscription' 
not described in 'mmu_notifier_registr'
mm/mmu_notifier.c:708: warning: Excess function parameter 'mn' description 
in 'mmu_notifier_register'
mm/mmu_notifier.c:880: warning: Function parameter or member 'subscription' 
not described in 'mmu_notifier_put'
mm/mmu_notifier.c:880: warning: Excess function parameter 'mn' description 
in 'mmu_notifier_put'
mm/mmu_notifier.c:982: warning: Function parameter or member 'ops' not 
described in 'mmu_interval_notifier_insert'

Signed-off-by: Krzysztof Kozlowski 
---
 mm/mmu_notifier.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 352bb9f3ecc0..4fc918163dd3 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -166,7 +166,7 @@ static void mn_itree_inv_end(struct 
mmu_notifier_subscriptions *subscriptions)
 /**
  * mmu_interval_read_begin - Begin a read side critical section against a VA
  *   range
- * interval_sub: The interval subscription
+ * @interval_sub: The interval subscription
  *
  * mmu_iterval_read_begin()/mmu_iterval_read_retry() implement a
  * collision-retry scheme similar to seqcount for the VA range under
@@ -686,7 +686,7 @@ EXPORT_SYMBOL_GPL(__mmu_notifier_register);
 
 /**
  * mmu_notifier_register - Register a notifier on a mm
- * @mn: The notifier to attach
+ * @subscription: The notifier to attach
  * @mm: The mm to attach the notifier to
  *
  * Must not hold mmap_lock nor any other VM related lock when calling
@@ -856,7 +856,7 @@ static void mmu_notifier_free_rcu(struct rcu_head *rcu)
 
 /**
  * mmu_notifier_put - Release the reference on the notifier
- * @mn: The notifier to act on
+ * @subscription: The notifier to act on
  *
  * This function must be paired with each mmu_notifier_get(), it releases the
  * reference obtained by the get. If this is the last reference then process
@@ -965,7 +965,8 @@ static int __mmu_interval_notifier_insert(
  * @interval_sub: Interval subscription to register
  * @start: Starting virtual address to monitor
  * @length: Length of the range to monitor
- * @mm : mm_struct to attach to
+ * @mm: mm_struct to attach to
+ * @ops: Interval notifier operations to be called on matching events
  *
  * This function subscribes the interval notifier for notifications from the
  * mm.  Upon return the ops related to mmu_interval_notifier will be called
-- 
2.17.1



[PATCH 3/4] mm: mempolicy: Fix kerneldoc of numa_map_to_online_node()

2020-07-28 Thread Krzysztof Kozlowski
Fix W=1 compile warnings (invalid kerneldoc):

mm/mempolicy.c:137: warning: Function parameter or member 'node' not 
described in 'numa_map_to_online_node'
mm/mempolicy.c:137: warning: Excess function parameter 'nid' description in 
'numa_map_to_online_node'

Signed-off-by: Krzysztof Kozlowski 
---
 mm/mempolicy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 93fcfc1f2fa2..9894bb2f7452 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -129,7 +129,7 @@ static struct mempolicy preferred_node_policy[MAX_NUMNODES];
 
 /**
  * numa_map_to_online_node - Find closest online node
- * @nid: Node id to start the search
+ * @node: Node id to start the search
  *
  * Lookup the next closest node by distance if @nid is not online.
  */
-- 
2.17.1



[PATCH 1/4] anon_inodes: Make _anon_inode_getfile() static

2020-07-28 Thread Krzysztof Kozlowski
_anon_inode_getfile() function is not used outside so make it static to
fix W=1 warning:

fs/anon_inodes.c:80:14: warning: no previous prototype for 
'_anon_inode_getfile' [-Wmissing-prototypes]
   80 | struct file *_anon_inode_getfile(const char *name,

Signed-off-by: Krzysztof Kozlowski 
---
 fs/anon_inodes.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 25d92c64411e..90b022960027 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -77,11 +77,11 @@ static struct inode *anon_inode_make_secure_inode(
return inode;
 }
 
-struct file *_anon_inode_getfile(const char *name,
-const struct file_operations *fops,
-void *priv, int flags,
-const struct inode *context_inode,
-bool secure)
+static struct file *_anon_inode_getfile(const char *name,
+   const struct file_operations *fops,
+   void *priv, int flags,
+   const struct inode *context_inode,
+   bool secure)
 {
struct inode *inode;
struct file *file;
-- 
2.17.1



[PATCH] pci: vc: Fix kerneldoc

2020-07-28 Thread Krzysztof Kozlowski
Fix W=1 compile warnings (invalid kerneldoc):

drivers/pci/vc.c:188: warning: Excess function parameter 'name' description 
in 'pci_vc_do_save_buffer'

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/pci/vc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
index 5486f8768c86..5fc59ac31145 100644
--- a/drivers/pci/vc.c
+++ b/drivers/pci/vc.c
@@ -172,7 +172,6 @@ static void pci_vc_enable(struct pci_dev *dev, int pos, int 
res)
  * @dev: device
  * @pos: starting position of VC capability (VC/VC9/MFVC)
  * @save_state: buffer for save/restore
- * @name: for error message
  * @save: if provided a buffer, this indicates what to do with it
  *
  * Walking Virtual Channel config space to size, save, or restore it
-- 
2.17.1



[PATCH] reset: Fix and extend kerneldoc

2020-07-28 Thread Krzysztof Kozlowski
Fix W=1 compile warnings (invalid kerneldoc):

drivers/reset/core.c:50: warning: Function parameter or member 'array' not 
described in 'reset_control'
drivers/reset/core.c:50: warning: Function parameter or member 
'deassert_count' not described in 'reset_control'

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/reset/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index 01c0c7aa835c..a2df88e90011 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -32,7 +32,8 @@ static LIST_HEAD(reset_lookup_list);
  * @refcnt: Number of gets of this reset_control
  * @acquired: Only one reset_control may be acquired for a given rcdev and id.
  * @shared: Is this a shared (1), or an exclusive (0) reset_control?
- * @deassert_cnt: Number of times this reset line has been deasserted
+ * @array: Is this an array of reset controls (1)?
+ * @deassert_count: Number of times this reset line has been deasserted
  * @triggered_count: Number of times this reset line has been reset. Currently
  *   only used for shared resets, which means that the value
  *   will be either 0 or 1.
-- 
2.17.1



[PATCH] KEYS: asymmetric: Fix kerneldoc

2020-07-28 Thread Krzysztof Kozlowski
Fix W=1 compile warnings (invalid kerneldoc):

crypto/asymmetric_keys/asymmetric_type.c:160: warning: Function parameter 
or member 'kid1' not described in 'asymmetric_key_id_same'
crypto/asymmetric_keys/asymmetric_type.c:160: warning: Function parameter 
or member 'kid2' not described in 'asymmetric_key_id_same'
crypto/asymmetric_keys/asymmetric_type.c:160: warning: Excess function 
parameter 'kid_1' description in 'asymmetric_key_id_same'
crypto/asymmetric_keys/asymmetric_type.c:160: warning: Excess function 
parameter 'kid_2' description in 'asymmetric_key_id_same'

Signed-off-by: Krzysztof Kozlowski 
---
 crypto/asymmetric_keys/asymmetric_type.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/crypto/asymmetric_keys/asymmetric_type.c 
b/crypto/asymmetric_keys/asymmetric_type.c
index 33e77d846caa..ad8af3d70ac0 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -152,7 +152,8 @@ EXPORT_SYMBOL_GPL(asymmetric_key_generate_id);
 
 /**
  * asymmetric_key_id_same - Return true if two asymmetric keys IDs are the 
same.
- * @kid_1, @kid_2: The key IDs to compare
+ * @kid1: The key ID to compare
+ * @kid2: The key ID to compare
  */
 bool asymmetric_key_id_same(const struct asymmetric_key_id *kid1,
const struct asymmetric_key_id *kid2)
@@ -168,7 +169,8 @@ EXPORT_SYMBOL_GPL(asymmetric_key_id_same);
 /**
  * asymmetric_key_id_partial - Return true if two asymmetric keys IDs
  * partially match
- * @kid_1, @kid_2: The key IDs to compare
+ * @kid1: The key ID to compare
+ * @kid2: The key ID to compare
  */
 bool asymmetric_key_id_partial(const struct asymmetric_key_id *kid1,
   const struct asymmetric_key_id *kid2)
-- 
2.17.1



[PATCH] cpufreq: intel_pstate: Fix EPP setting via sysfs in active mode

2020-07-28 Thread Rafael J. Wysocki
From: Rafael J. Wysocki 

Because intel_pstate_set_energy_pref_index() reads and writes the
MSR_HWP_REQUEST register without using the cached value of it used by
intel_pstate_hwp_boost_up() and intel_pstate_hwp_boost_down(), those
functions may overwrite the value written by it and so the EPP value
set via sysfs may be lost.

To avoid that, make intel_pstate_set_energy_pref_index() take the
cached value of MSR_HWP_REQUEST just like the other two routines
mentioned above and update it with the new EPP value coming from
user space in addition to updating the MSR.

Note that the MSR itself still needs to be updated too in case
hwp_boost is unset or the boosting mechanism is not active at the
EPP change time.

Fixes: e0efd5be63e8 ("cpufreq: intel_pstate: Add HWP boost utility and sched 
util hooks")
Reported-by: Francisco Jerez 
Signed-off-by: Rafael J. Wysocki 
---

This patch is on top of https://patchwork.kernel.org/patch/11689347/

---
 drivers/cpufreq/intel_pstate.c |   17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

Index: linux-pm/drivers/cpufreq/intel_pstate.c
===
--- linux-pm.orig/drivers/cpufreq/intel_pstate.c
+++ linux-pm/drivers/cpufreq/intel_pstate.c
@@ -653,11 +653,12 @@ static int intel_pstate_set_energy_pref_
epp = cpu_data->epp_default;
 
if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
-   u64 value;
-
-   ret = rdmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, );
-   if (ret)
-   return ret;
+   /*
+* Use the cached HWP Request MSR value, because the register
+* itself may be updated by intel_pstate_hwp_boost_up() or
+* intel_pstate_hwp_boost_down() at any time.
+*/
+   u64 value = READ_ONCE(cpu_data->hwp_req_cached);
 
value &= ~GENMASK_ULL(31, 24);
 
@@ -667,6 +668,12 @@ static int intel_pstate_set_energy_pref_
epp = epp_values[pref_index - 1];
 
value |= (u64)epp << 24;
+   /*
+* The only other updater of hwp_req_cached in the active mode,
+* intel_pstate_hwp_set(), is called under the same lock as this
+* function, so it cannot run in parallel with the update below.
+*/
+   WRITE_ONCE(cpu_data->hwp_req_cached, value);
ret = wrmsrl_on_cpu(cpu_data->cpu, MSR_HWP_REQUEST, value);
} else {
if (epp == -EINVAL)





[PATCH] dmaengine: ti: omap-dma: Drop of_match_ptr to fix -Wunused-const-variable

2020-07-28 Thread Krzysztof Kozlowski
The of_device_id is included unconditionally by of.h header and used
in the driver as well.  Remove of_match_ptr to fix W=1 compile test
warning with !CONFIG_OF:

drivers/dma/ti/omap-dma.c:1892:34: warning: 'omap_dma_match' defined but 
not used [-Wunused-const-variable=]
 1892 | static const struct of_device_id omap_dma_match[] = {

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/dma/ti/omap-dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c
index 918301e17552..c9fe5e3a6b55 100644
--- a/drivers/dma/ti/omap-dma.c
+++ b/drivers/dma/ti/omap-dma.c
@@ -1904,7 +1904,7 @@ static struct platform_driver omap_dma_driver = {
.remove = omap_dma_remove,
.driver = {
.name = "omap-dma-engine",
-   .of_match_table = of_match_ptr(omap_dma_match),
+   .of_match_table = omap_dma_match,
},
 };
 
-- 
2.17.1



[PATCH 3/3] iommu: qcom: Drop of_match_ptr to fix -Wunused-const-variable

2020-07-28 Thread Krzysztof Kozlowski
The of_device_id is included unconditionally by of.h header and used
in the driver as well.  Remove of_match_ptr to fix W=1 compile test
warning with !CONFIG_OF:

drivers/iommu/qcom_iommu.c:910:34: warning: 'qcom_iommu_of_match' defined 
but not used [-Wunused-const-variable=]
  910 | static const struct of_device_id qcom_iommu_of_match[] = {

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/iommu/qcom_iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index af6bec3ace00..9535a6af7553 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -752,7 +752,7 @@ static const struct of_device_id ctx_of_match[] = {
 static struct platform_driver qcom_iommu_ctx_driver = {
.driver = {
.name   = "qcom-iommu-ctx",
-   .of_match_table = of_match_ptr(ctx_of_match),
+   .of_match_table = ctx_of_match,
},
.probe  = qcom_iommu_ctx_probe,
.remove = qcom_iommu_ctx_remove,
@@ -915,7 +915,7 @@ static const struct of_device_id qcom_iommu_of_match[] = {
 static struct platform_driver qcom_iommu_driver = {
.driver = {
.name   = "qcom-iommu",
-   .of_match_table = of_match_ptr(qcom_iommu_of_match),
+   .of_match_table = qcom_iommu_of_match,
.pm = _iommu_pm_ops,
},
.probe  = qcom_iommu_device_probe,
-- 
2.17.1



Re: [PATCH v1 0/4] [RFC] Implement Trampoline File Descriptor

2020-07-28 Thread Madhavan T. Venkataraman



On 7/28/20 12:05 PM, James Morris wrote:
> On Tue, 28 Jul 2020, Casey Schaufler wrote:
>
>> You could make a separate LSM to do these checks instead of limiting
>> it to SELinux. Your use case, your call, of course.
> It's not limited to SELinux. This is hooked via the LSM API and 
> implementable by any LSM (similar to execmem, execstack etc.)

Yes. I have an implementation that I am testing right now that
defines the hook for exectramp and implements it for
SELinux. That is why I mentioned SELinux.

Madhavan


[PATCH 1/2] dma-buf: Fix kerneldoc of dma_buf_set_name()

2020-07-28 Thread Krzysztof Kozlowski
Fix W=1 compile warnings (invalid kerneldoc):

drivers/dma-buf/dma-buf.c:328: warning: Function parameter or member 
'dmabuf' not described in 'dma_buf_set_name'

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/dma-buf/dma-buf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 1699a8e309ef..58564d82a3a2 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -316,9 +316,9 @@ static __poll_t dma_buf_poll(struct file *file, poll_table 
*poll)
  * name of the dma-buf if the same piece of memory is used for multiple
  * purpose between different devices.
  *
- * @dmabuf [in] dmabuf buffer that will be renamed.
- * @buf:   [in] A piece of userspace memory that contains the name of
- *  the dma-buf.
+ * @dmabuf: [in] dmabuf buffer that will be renamed.
+ * @buf:[in] A piece of userspace memory that contains the name of
+ *   the dma-buf.
  *
  * Returns 0 on success. If the dma-buf buffer is already attached to
  * devices, return -EBUSY.
-- 
2.17.1



[PATCH 2/3] iommu: intel: Drop kerneldoc marker from regular comment

2020-07-28 Thread Krzysztof Kozlowski
Fix W=1 compile warnings (invalid kerneldoc):

drivers/iommu/intel/dmar.c:389: warning: Function parameter or member 
'header' not described in 'dmar_parse_one_drhd'

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/iommu/intel/dmar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 93e6345f3414..ba47edf03941 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -380,7 +380,7 @@ dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
return NULL;
 }
 
-/**
+/*
  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
  * structure which uniquely represent one DMA remapping hardware unit
  * present in the platform
-- 
2.17.1



[PATCH 1/3] iommu: amd: Fix kerneldoc

2020-07-28 Thread Krzysztof Kozlowski
Fix W=1 compile warnings (invalid kerneldoc):

drivers/iommu/amd/init.c:1586: warning: Function parameter or member 'ivrs' 
not described in 'get_highest_supported_ivhd_type'
drivers/iommu/amd/init.c:1938: warning: Function parameter or member 
'iommu' not described in 'iommu_update_intcapxt'

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/iommu/amd/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 958050c213f9..4a37169b1b1b 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1578,7 +1578,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, 
struct ivhd_header *h)
 
 /**
  * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
- * @ivrs  Pointer to the IVRS header
+ * @ivrs: Pointer to the IVRS header
  *
  * This function search through all IVDB of the maximum supported IVHD
  */
@@ -1929,7 +1929,7 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
 #define XT_INT_VEC(x)  (((x) & 0xFFULL) << 32)
 #define XT_INT_DEST_HI(x)  x) >> 24) & 0xFFULL) << 56)
 
-/**
+/*
  * Setup the IntCapXT registers with interrupt routing information
  * based on the PCI MSI capability block registers, accessed via
  * MMIO MSI address low/hi and MSI data registers.
-- 
2.17.1



[PATCH 2/2] dma-buf: fence-chain: Document missing dma_fence_chain_init() parameter in kerneldoc

2020-07-28 Thread Krzysztof Kozlowski
Fix W=1 compile warnings (invalid kerneldoc):

drivers/dma-buf/dma-fence-chain.c:233: warning: Function parameter or 
member 'seqno' not described in 'dma_fence_chain_init'

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/dma-buf/dma-fence-chain.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma-buf/dma-fence-chain.c 
b/drivers/dma-buf/dma-fence-chain.c
index 3d123502ff12..7d129e68ac70 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -222,6 +222,7 @@ EXPORT_SYMBOL(dma_fence_chain_ops);
  * @chain: the chain node to initialize
  * @prev: the previous fence
  * @fence: the current fence
+ * @seqno: the sequence number to use for the fence chain
  *
  * Initialize a new chain node and either start a new chain or add the node to
  * the existing chain of the previous fence.
-- 
2.17.1



Re: [PATCH] amdgpu_dm: fix nonblocking atomic commit use-after-free

2020-07-28 Thread Kazlauskas, Nicholas

On 2020-07-28 5:22 a.m., Paul Menzel wrote:

Dear Linux folks,


Am 25.07.20 um 07:20 schrieb Mazin Rezk:

On Saturday, July 25, 2020 12:59 AM, Duncan wrote:


On Sat, 25 Jul 2020 03:03:52 + Mazin Rezk wrote:


Am 24.07.20 um 19:33 schrieb Kees Cook:


There was a fix to disable the async path for this driver that
worked around the bug too, yes? That seems like a safer and more
focused change that doesn't revert the SLUB defense for all
users, and would actually provide a complete, I think, workaround


That said, I haven't seen the async disabling patch. If you could
link to it, I'd be glad to test it out and perhaps we can use that
instead.


I'm confused. Not to put words in Kees' mouth; /I/ am confused (which
admittedly could well be just because I make no claims to be a
coder and am simply reading the bug and thread, but I'd appreciate some
"unconfusing" anyway).

My interpretation of the "async disabling" reference was that it was to
comment #30 on the bug:

https://bugzilla.kernel.org/show_bug.cgi?id=207383#c30 



... which (if I'm not confused on this point too) appears to be yours.
There it was stated...

I've also found that this bug exclusively occurs when commit_work is on
the workqueue. After forcing drm_atomic_helper_commit to run all of the
commits without adding to the workqueue and running the OS, the issue
seems to have disappeared.


Would not forcing all commits to run directly, without placing them on
the workqueue, be "async disabling"? That's what I /thought/ he was
referencing.


Oh, I thought he was referring to a different patch. Kees, could I get
your confirmation on this?

The change I made actually affected all of the DRM code, although this 
could

easily be changed to be specific to amdgpu. (By forcing blocking on
amdgpu_dm's non-blocking commit code)

That said, I'd still need to test further because I only did test it 
for a

couple of hours then. Although it should work in theory.


OTOH your base/context swap idea sounds like a possibly "less
disturbance" workaround, if it works, and given the point in the
commit cycle... (But if it's out Sunday it's likely too late to test
and get it in now anyway; if it's another week, tho...)


The base/context swap idea should make the use-after-free behave how it
did in 5.6. Since the bug doesn't cause an issue in 5.6, it's less of a
"less disturbance" workaround and more of a "no disturbance" workaround.


Sorry for bothering, but is there now a solution, besides reverting the 
commits, to avoid freezes/crashes *without* performance regressions?



Kind regards,

Paul


Mazin's "drm/amd/display: Clear dm_state for fast updates" change 
accomplishes this, at least as a temporary hack.


I've started work on a more large scale fix that we could get in in after.

Regards,
Nicholas Kazlauskas


KASAN: out-of-bounds Read in ath9k_hif_usb_rx_cb (2)

2020-07-28 Thread syzbot
Hello,

syzbot found the following issue on:

HEAD commit:25252919 xhci: dbgtty: Make some functions static
git tree:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git 
usb-testing
console output: https://syzkaller.appspot.com/x/log.txt?x=164adf2890
kernel config:  https://syzkaller.appspot.com/x/.config?x=fb6677a3d4f11788
dashboard link: https://syzkaller.appspot.com/bug?extid=dbcf296f0cfda711b5c4
compiler:   gcc (GCC) 10.1.0-syz 20200507

Unfortunately, I don't have any reproducer for this issue yet.

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+dbcf296f0cfda711b...@syzkaller.appspotmail.com

==
BUG: KASAN: out-of-bounds in ath9k_hif_usb_rx_stream 
drivers/net/wireless/ath/ath9k/hif_usb.c:637 [inline]
BUG: KASAN: out-of-bounds in ath9k_hif_usb_rx_cb+0xe82/0xf80 
drivers/net/wireless/ath/ath9k/hif_usb.c:671
Read of size 4 at addr 8881a248c098 by task kworker/0:5/3249

CPU: 0 PID: 3249 Comm: kworker/0:5 Not tainted 5.8.0-rc7-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Workqueue: events request_firmware_work_func
Call Trace:
 
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0xf6/0x16e lib/dump_stack.c:118
 print_address_description.constprop.0+0x1a/0x210 mm/kasan/report.c:383
 __kasan_report mm/kasan/report.c:513 [inline]
 kasan_report.cold+0x37/0x7c mm/kasan/report.c:530
 ath9k_hif_usb_rx_stream drivers/net/wireless/ath/ath9k/hif_usb.c:637 [inline]
 ath9k_hif_usb_rx_cb+0xe82/0xf80 drivers/net/wireless/ath/ath9k/hif_usb.c:671
 __usb_hcd_giveback_urb+0x32d/0x560 drivers/usb/core/hcd.c:1650
 usb_hcd_giveback_urb+0x367/0x410 drivers/usb/core/hcd.c:1716
 dummy_timer+0x11f2/0x3240 drivers/usb/gadget/udc/dummy_hcd.c:1967
 call_timer_fn+0x1ac/0x6e0 kernel/time/timer.c:1415
 expire_timers kernel/time/timer.c:1460 [inline]
 __run_timers.part.0+0x54c/0x9e0 kernel/time/timer.c:1784
 __run_timers kernel/time/timer.c:1756 [inline]
 run_timer_softirq+0x80/0x120 kernel/time/timer.c:1797
 __do_softirq+0x222/0x95b kernel/softirq.c:292
 asm_call_on_stack+0xf/0x20 arch/x86/entry/entry_64.S:711
 
 __run_on_irqstack arch/x86/include/asm/irq_stack.h:22 [inline]
 run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:48 [inline]
 do_softirq_own_stack+0xed/0x140 arch/x86/kernel/irq_64.c:77
 invoke_softirq kernel/softirq.c:387 [inline]
 __irq_exit_rcu kernel/softirq.c:417 [inline]
 irq_exit_rcu+0x150/0x1f0 kernel/softirq.c:429
 sysvec_apic_timer_interrupt+0x49/0xc0 arch/x86/kernel/apic/apic.c:1091
 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:585
RIP: 0010:arch_local_irq_restore arch/x86/include/asm/irqflags.h:85 [inline]
RIP: 0010:console_unlock+0xbe2/0xcd0 kernel/printk/printk.c:2497
Code: fc ff ff e8 10 31 16 00 0f 0b e9 b1 fd ff ff e8 04 31 16 00 0f 0b e9 04 
fe ff ff e8 f8 30 16 00 e8 43 bc 1b 00 ff 74 24 30 9d  6b fc ff ff e8 04 e0 
3f 00 e9 65 f6 ff ff e8 0a e0 3f 00 e9 1f
RSP: 0018:8881c8a3fa18 EFLAGS: 0293
RAX: 00465681 RBX: 0200 RCX: 0006
RDX:  RSI:  RDI: 8129790d
RBP: 0001 R08:  R09: 
R10: 0001 R11:  R12: 82b05a80
R13: 876f53b0 R14: 0042 R15: dc00
 vprintk_emit+0x1b2/0x460 kernel/printk/printk.c:2021
 vprintk_func+0x8b/0x133 kernel/printk/printk_safe.c:393
 printk+0xba/0xed kernel/printk/printk.c:2070
 ath9k_htc_hw_init.cold+0x17/0x2a drivers/net/wireless/ath/ath9k/htc_hst.c:502
 ath9k_hif_usb_firmware_cb+0x274/0x530 
drivers/net/wireless/ath/ath9k/hif_usb.c:1220
 request_firmware_work_func+0x126/0x250 drivers/base/firmware_loader/main.c:1001
 process_one_work+0x94c/0x15f0 kernel/workqueue.c:2269
 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
 kthread+0x392/0x470 kernel/kthread.c:291
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293

general protection fault, probably for non-canonical address 
0xdead0400:  [#1] SMP KASAN
CPU: 0 PID: 3249 Comm: kworker/0:5 Not tainted 5.8.0-rc7-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Workqueue: events request_firmware_work_func
RIP: 0010:nearest_obj include/linux/slub_def.h:176 [inline]
RIP: 0010:print_address_description.constprop.0+0x18e/0x210 
mm/kasan/report.c:388
Code: c4 60 5b 5d 41 5c 41 5d c3 4c 89 e6 48 2b 35 41 e2 a3 05 48 89 e8 49 8b 
5c 24 18 48 c1 fe 06 48 c1 e6 0c 48 03 35 3a e2 a3 05 <8b> 4b 18 48 29 f0 48 99 
48 89 cf 48 f7 f9 41 0f b7 44 24 2a 48 89
RSP: 0018:8881db209838 EFLAGS: 00010086
RAX: 8881a248c098 RBX: dead0400 RCX: 
RDX: 0001 RSI: 8881a248c000 RDI: ed103b6412f9
RBP: 8881a248c098 R08:  R09: 8881db21fe8b
R10:  R11: 0004 R12: ea0006892300
R13: 

Re: [PATCH v3 4/4] xen: add helpers to allocate unpopulated memory

2020-07-28 Thread Andrew Cooper
On 28/07/2020 17:59, Roger Pau Monné wrote:
> On Tue, Jul 28, 2020 at 05:48:23PM +0100, Julien Grall wrote:
>> Hi,
>>
>> On 27/07/2020 10:13, Roger Pau Monne wrote:
>>> To be used in order to create foreign mappings. This is based on the
>>> ZONE_DEVICE facility which is used by persistent memory devices in
>>> order to create struct pages and kernel virtual mappings for the IOMEM
>>> areas of such devices. Note that on kernels without support for
>>> ZONE_DEVICE Xen will fallback to use ballooned pages in order to
>>> create foreign mappings.
>>>
>>> The newly added helpers use the same parameters as the existing
>>> {alloc/free}_xenballooned_pages functions, which allows for in-place
>>> replacement of the callers. Once a memory region has been added to be
>>> used as scratch mapping space it will no longer be released, and pages
>>> returned are kept in a linked list. This allows to have a buffer of
>>> pages and prevents resorting to frequent additions and removals of
>>> regions.
>>>
>>> If enabled (because ZONE_DEVICE is supported) the usage of the new
>>> functionality untangles Xen balloon and RAM hotplug from the usage of
>>> unpopulated physical memory ranges to map foreign pages, which is the
>>> correct thing to do in order to avoid mappings of foreign pages depend
>>> on memory hotplug.
>> I think this is going to break Dom0 on Arm if the kernel has been built with
>> hotplug. This is because you may end up to re-use region that will be used
>> for the 1:1 mapping of a foreign map.
>>
>> Note that I don't know whether hotplug has been tested on Xen on Arm yet. So
>> it might be possible to be already broken.
>>
>> Meanwhile, my suggestion would be to make the use of hotplug in the balloon
>> code conditional (maybe using CONFIG_ARM64 and CONFIG_ARM)?
> Right, this feature (allocation of unpopulated memory separated from
> the balloon driver) is currently gated on CONFIG_ZONE_DEVICE, which I
> think could be used on Arm.
>
> IMO the right solution seems to be to subtract the physical memory
> regions that can be used for the identity mappings of foreign pages
> (all RAM on the system AFAICT) from iomem_resource, as that would make
> this and the memory hotplug done in the balloon driver safe?

The right solution is a mechanism for translated guests to query Xen to
find regions of guest physical address space which are unused, and can
be safely be used for foreign/grant/other  mappings.

Please don't waste any more time applying more duct tape to a broken
system, and instead spend the time organising some proper foundations.

~Andrew


[PATCH 0/1] Fix static checker warning.

2020-07-28 Thread Zhou Yanjie
Fix the warning that appears during Static analysis.

周琰杰 (Zhou Yanjie) (1):
  USB: PHY: JZ4770: Fix static checker warning.

 drivers/usb/phy/phy-jz4770.c | 26 --
 1 file changed, 20 insertions(+), 6 deletions(-)

-- 
2.11.0



[PATCH 1/1] USB: PHY: JZ4770: Fix static checker warning.

2020-07-28 Thread Zhou Yanjie
The commit 2a6c0b82e651 ("USB: PHY: JZ4770: Add support for new
Ingenic SoCs.") introduced the initialization function for different
chips, but left the relevant code involved in the resetting process
in the original function, resulting in uninitialized variable calls.
This problem can be solved by putting this part of the code into the
initialization function for each chip. Although the four processors
currently supported have the same reset code, let us can solve this
problem by adding the initialization of the reg variable to the
original function, but when other processors with different reset
methods (such as X2000) are introduced in the future, it will cause
inevitable condition judgments to complicate the function, which
violates the original intention of introducing initialization
functions for each processor.

Fixes: 2a6c0b82e651 ("USB: PHY: JZ4770: Add support for new
Ingenic SoCs.").

Reported-by: Colin Ian King 
Reported-by: Dan Carpenter 
Signed-off-by: 周琰杰 (Zhou Yanjie) 
---
 drivers/usb/phy/phy-jz4770.c | 26 --
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/phy/phy-jz4770.c b/drivers/usb/phy/phy-jz4770.c
index d4ee3cb721ea..23d38cbc150e 100644
--- a/drivers/usb/phy/phy-jz4770.c
+++ b/drivers/usb/phy/phy-jz4770.c
@@ -158,7 +158,6 @@ static int ingenic_usb_phy_init(struct usb_phy *phy)
 {
struct jz4770_phy *priv = phy_to_jz4770_phy(phy);
int err;
-   u32 reg;
 
err = regulator_enable(priv->vcc_supply);
if (err) {
@@ -174,11 +173,6 @@ static int ingenic_usb_phy_init(struct usb_phy *phy)
 
priv->soc_info->usb_phy_init(phy);
 
-   /* Wait for PHY to reset */
-   usleep_range(30, 300);
-   writel(reg & ~USBPCR_POR, priv->base + REG_USBPCR_OFFSET);
-   usleep_range(300, 1000);
-
return 0;
 }
 
@@ -205,6 +199,11 @@ static void jz4770_usb_phy_init(struct usb_phy *phy)
USBPCR_TXFSLSTUNE_DFT | USBPCR_TXRISETUNE_DFT | 
USBPCR_TXVREFTUNE_DFT |
USBPCR_POR;
writel(reg, priv->base + REG_USBPCR_OFFSET);
+
+   /* Wait for PHY to reset */
+   usleep_range(30, 300);
+   writel(reg & ~USBPCR_POR, priv->base + REG_USBPCR_OFFSET);
+   usleep_range(300, 1000);
 }
 
 static void jz4780_usb_phy_init(struct usb_phy *phy)
@@ -218,6 +217,11 @@ static void jz4780_usb_phy_init(struct usb_phy *phy)
 
reg = USBPCR_TXPREEMPHTUNE | USBPCR_COMMONONN | USBPCR_POR;
writel(reg, priv->base + REG_USBPCR_OFFSET);
+
+   /* Wait for PHY to reset */
+   usleep_range(30, 300);
+   writel(reg & ~USBPCR_POR, priv->base + REG_USBPCR_OFFSET);
+   usleep_range(300, 1000);
 }
 
 static void x1000_usb_phy_init(struct usb_phy *phy)
@@ -232,6 +236,11 @@ static void x1000_usb_phy_init(struct usb_phy *phy)
USBPCR_TXHSXVTUNE_DCR_15MV | USBPCR_TXVREFTUNE_INC_25PPT |
USBPCR_COMMONONN | USBPCR_POR;
writel(reg, priv->base + REG_USBPCR_OFFSET);
+
+   /* Wait for PHY to reset */
+   usleep_range(30, 300);
+   writel(reg & ~USBPCR_POR, priv->base + REG_USBPCR_OFFSET);
+   usleep_range(300, 1000);
 }
 
 static void x1830_usb_phy_init(struct usb_phy *phy)
@@ -249,6 +258,11 @@ static void x1830_usb_phy_init(struct usb_phy *phy)
reg = USBPCR_IDPULLUP_OTG | USBPCR_VBUSVLDEXT | USBPCR_TXPREEMPHTUNE |
USBPCR_COMMONONN | USBPCR_POR;
writel(reg, priv->base + REG_USBPCR_OFFSET);
+
+   /* Wait for PHY to reset */
+   usleep_range(30, 300);
+   writel(reg & ~USBPCR_POR, priv->base + REG_USBPCR_OFFSET);
+   usleep_range(300, 1000);
 }
 
 static const struct ingenic_soc_info jz4770_soc_info = {
-- 
2.11.0



Re: [PATCH v1 0/4] [RFC] Implement Trampoline File Descriptor

2020-07-28 Thread James Morris
On Tue, 28 Jul 2020, Casey Schaufler wrote:

> You could make a separate LSM to do these checks instead of limiting
> it to SELinux. Your use case, your call, of course.

It's not limited to SELinux. This is hooked via the LSM API and 
implementable by any LSM (similar to execmem, execstack etc.)


-- 
James Morris




Re: [RFC PATCH 5/9] PCI/AER: Apply function level reset to RCiEP on fatal error

2020-07-28 Thread Jonathan Cameron
On Tue, 28 Jul 2020 09:14:11 -0700
Sean V Kelley  wrote:

> On 28 Jul 2020, at 6:27, Zhuo, Qiuxu wrote:
> 
> >> From: Jonathan Cameron 
> >> Sent: Monday, July 27, 2020 7:17 PM
> >> To: Kelley, Sean V 
> >> Cc: bhelg...@google.com; r...@rjwysocki.net; ashok@kernel.org; 
> >> Luck,
> >> Tony ;
> >> sathyanarayanan.kuppusw...@linux.intel.com; 
> >> linux-...@vger.kernel.org;
> >> linux-kernel@vger.kernel.org; Zhuo, Qiuxu 
> >> Subject: Re: [RFC PATCH 5/9] PCI/AER: Apply function level reset to 
> >> RCiEP
> >> on fatal error
> >>
> >> On Fri, 24 Jul 2020 10:22:19 -0700
> >> Sean V Kelley  wrote:
> >>  
> >>> From: Qiuxu Zhuo 
> >>>
> >>> Attempt to do function level reset for an RCiEP associated with an
> >>> RCEC device on fatal error.  
> >>
> >> I'd like to understand more on your reasoning for flr here.
> >> Is it simply that it is all we can do, or is there some basis in a 
> >> spec
> >> somewhere?
> >>  
> >
> > Yes. Though there isn't the link reset for the RCiEP here, I think we 
> > should still be able to reset the RCiEP via FLR on fatal error, if the 
> > RCiEP supports FLR.
> >
> > -Qiuxu
> >  
> 
> Also see PCIe 5.0-1, Sec. 6.6.2 Function Level Reset (FLR)
> 
> Implementation of FLR is optional (not required), but is strongly 
> recommended. For an example use case consider CXL. Function 0 DVSEC 
> instances control for the CXL functionality of the entire CXL device. 
> FLR may succeed in recovering from CXL.io domain errors.

That feels a little bit of a weak argument in favour.  PCI spec lists examples
of use only for FLR and I can't see this matching any of them, but then they
are only examples, so we could argue it doesn't exclude this use. It's not
allowed to affect the link state, but I guess it 'might' recover from some
other type of error?

I'd have read the statement in the CXL spec you are referring to as matching
with the first example in the PCIe spec which is about recovering from
software errors.  For example, unexpected VM tear down.

@Bjorn / All.  What's your view on using FLR as a reset to do when you don't
have any other hammers to use?

Personally I don't have a particular problem with this, it just doesn't fit
with my mental model of what FLR is for (which may well need adjusting :)

Jonathan


> 
> Thanks,
> 
> Sean
> 
> >>>
> >>> Signed-off-by: Qiuxu Zhuo 
> >>> ---
> >>>  drivers/pci/pcie/err.c | 31 ++-
> >>>  1 file changed, 22 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index
> >>> 044df004f20b..9b3ec94bdf1d 100644
> >>> --- a/drivers/pci/pcie/err.c
> >>> +++ b/drivers/pci/pcie/err.c
> >>> @@ -170,6 +170,17 @@ static void pci_walk_dev_affected(struct  
> >> pci_dev *dev, int (*cb)(struct pci_dev  
> >>>  }
> >>>  }
> >>>
> >>> +static enum pci_channel_state flr_on_rciep(struct pci_dev *dev) {
> >>> +if (!pcie_has_flr(dev))
> >>> +return PCI_ERS_RESULT_NONE;
> >>> +
> >>> +if (pcie_flr(dev))
> >>> +return PCI_ERS_RESULT_DISCONNECT;
> >>> +
> >>> +return PCI_ERS_RESULT_RECOVERED;
> >>> +}
> >>> +
> >>>  pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
> >>>  enum pci_channel_state state,
> >>>  pci_ers_result_t (*reset_link)(struct pci_dev *pdev))  
> >> @@ -191,15  
> >>> +202,17 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
> >>>  if (state == pci_channel_io_frozen) {
> >>>  pci_walk_dev_affected(dev, report_frozen_detected,  
> >> );  
> >>>  if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) {
> >>> -pci_warn(dev, "link reset not possible for RCiEP\n");
> >>> -status = PCI_ERS_RESULT_NONE;
> >>> -goto failed;
> >>> -}
> >>> -
> >>> -status = reset_link(dev);
> >>> -if (status != PCI_ERS_RESULT_RECOVERED) {
> >>> -pci_warn(dev, "link reset failed\n");
> >>> -goto failed;
> >>> +status = flr_on_rciep(dev);
> >>> +if (status != PCI_ERS_RESULT_RECOVERED) {
> >>> +pci_warn(dev, "function level reset failed\n");
> >>> +goto failed;
> >>> +}
> >>> +} else {
> >>> +status = reset_link(dev);
> >>> +if (status != PCI_ERS_RESULT_RECOVERED) {
> >>> +pci_warn(dev, "link reset failed\n");
> >>> +goto failed;
> >>> +}
> >>>  }
> >>>  } else {
> >>>  pci_walk_dev_affected(dev, report_normal_detected,  
> >> );
> >>  




RE: [PATCH] ASoC: Intel: Atom: use hardware counter to update hw_ptr

2020-07-28 Thread Lu, Brent
> 
> So if there are already quirks in atom machine drivers to change the period
> size, why is this patch necessary?
> 

The story is: google implemented the constraint but doesn't know why it works
so asked us to explain. After checking the two counters I realized the increase 
of
ring buffer pointer follows the period size setting in hw_param (256) but the
period of interrupt is always 5ms instead of 5.33 so it's running little bit 
too fast.
It seems the LPE keeps tracking the difference of two counters. When the
difference exceeds 2160 samples, the next interrupt will be canceled so the
hardware counter could catch up a little.

[   43.208299] intel_sst_acpi 808622A8:00: mrfld ring_buffer_counter 107520 
hardware_counter 98880 pcm delay 8640 (in bytes)
[   43.208306] intel_sst_acpi 808622A8:00: buffer ptr 26880 pcm_delay rep: 2160
[   43.208321] sound pcmC1D0p: [Q] pos 26880 hw_ptr 26880 appl_ptr 4 avail 
191680
=> one interrupt is skipped.
[   43.218299] intel_sst_acpi 808622A8:00: mrfld ring_buffer_counter 108544 
hardware_counter 100800 pcm delay 7744 (in bytes)
[   43.218307] intel_sst_acpi 808622A8:00: buffer ptr 27136 pcm_delay rep: 1936
[   43.218336] sound pcmC1D0p: [Q] pos 27136 hw_ptr 27136 appl_ptr 4 avail 
191936

So I think why not using the hardware counter? It increases 240 samples every 
5ms
perfectly match the 48000 sample rate. The test result is good but I know there 
must
be a reason for the original designer to use ring buffer counter instead of 
hardware
counter. I uploaded this patch to see if anyone still remember the reason and 
share
some insight with me.

I totally agree that we shouldn't touch this part of design. Do you think it 
make sense
to add a constraint to enforce the period size in machine driver? If yes then I 
would
upload patches for Chrome atom machines for google.


Regards,
Brent

> > I'm curious why not just using hardware counter to update hw_ptr and
> > get rid of the period setting in hw_param? It seems to me the ring
> > buffer counter does not reflect the real status.
> 
> I don't recall precisely what this hardware counter does. I vaguely recall 
> it's
> tied to the 19.2MHz external timer which is also used to schedule the 1ms
> SBA mixer and the SSP IOs. And by comparing with the ring buffer pointer
> you can infer the delay inside the DSP. I think you are also making an
> assumption that all streams are tied to the output rate, but that's most 
> likely
> a bad assumption. The hard-coded topology supported media, speech and
> compressed data and the consumption rate on the DMA side could be faster
> with some buffering happening in the DSP.
> It's not a passthrough DMA in all cases.
> 
> This is really legacy code that no one really fully understands nor plans on
> improving, it'd be a bad idea to change the pcm pointer reports now, 6 years
> after the initial code release and after all initial contributors moved on. 
> It's
> what it is.
> 



Re: [PATCH 04/23] devtmpfs: refactor devtmpfsd()

2020-07-28 Thread Greg Kroah-Hartman
On Tue, Jul 28, 2020 at 06:33:57PM +0200, Christoph Hellwig wrote:
> Split the main worker loop into a separate function.  This allows
> devtmpfsd_setup to be marked __init, which will allows us to call
> __init routines for the setup work.  devtmpfѕ itself needs a __ref
> marker for that to work, and a comment explaining why it works.
> 
> Signed-off-by: Christoph Hellwig 

Reviewed-by: Greg Kroah-Hartman 


Re: [PATCH v3 4/4] xen: add helpers to allocate unpopulated memory

2020-07-28 Thread Roger Pau Monné
On Tue, Jul 28, 2020 at 05:48:23PM +0100, Julien Grall wrote:
> Hi,
> 
> On 27/07/2020 10:13, Roger Pau Monne wrote:
> > To be used in order to create foreign mappings. This is based on the
> > ZONE_DEVICE facility which is used by persistent memory devices in
> > order to create struct pages and kernel virtual mappings for the IOMEM
> > areas of such devices. Note that on kernels without support for
> > ZONE_DEVICE Xen will fallback to use ballooned pages in order to
> > create foreign mappings.
> > 
> > The newly added helpers use the same parameters as the existing
> > {alloc/free}_xenballooned_pages functions, which allows for in-place
> > replacement of the callers. Once a memory region has been added to be
> > used as scratch mapping space it will no longer be released, and pages
> > returned are kept in a linked list. This allows to have a buffer of
> > pages and prevents resorting to frequent additions and removals of
> > regions.
> > 
> > If enabled (because ZONE_DEVICE is supported) the usage of the new
> > functionality untangles Xen balloon and RAM hotplug from the usage of
> > unpopulated physical memory ranges to map foreign pages, which is the
> > correct thing to do in order to avoid mappings of foreign pages depend
> > on memory hotplug.
> I think this is going to break Dom0 on Arm if the kernel has been built with
> hotplug. This is because you may end up to re-use region that will be used
> for the 1:1 mapping of a foreign map.
> 
> Note that I don't know whether hotplug has been tested on Xen on Arm yet. So
> it might be possible to be already broken.
> 
> Meanwhile, my suggestion would be to make the use of hotplug in the balloon
> code conditional (maybe using CONFIG_ARM64 and CONFIG_ARM)?

Right, this feature (allocation of unpopulated memory separated from
the balloon driver) is currently gated on CONFIG_ZONE_DEVICE, which I
think could be used on Arm.

IMO the right solution seems to be to subtract the physical memory
regions that can be used for the identity mappings of foreign pages
(all RAM on the system AFAICT) from iomem_resource, as that would make
this and the memory hotplug done in the balloon driver safe?

Thanks, Roger.


Re: [PATCH] media: usbvision: fixed coding style

2020-07-28 Thread Dhiraj Sharma
Alright sorry, I will ignore this patch and will commit the new patch
in another file.


On Tue, Jul 28, 2020 at 10:28 PM Greg KH  wrote:
>
> On Tue, Jul 28, 2020 at 10:13:22PM +0530, Dhiraj Sharma wrote:
> > > As the bot said, only do one type of thing per patch, and "fix all
> > > checkpatch errors/warnings" is not one type of thing.
> >
> > So should I send a fresh patch with minimal fixes? instead of replying
> > to this mail with [PATCH 01]
>
> Why are you ignoring what Hans said?


RE: [RFC 0/7] Add support to process rx packets in thread

2020-07-28 Thread Rakesh Pillai



> -Original Message-
> From: David Laight 
> Sent: Sunday, July 26, 2020 4:46 PM
> To: 'Sebastian Gottschall' ; Hillf Danton
> 
> Cc: Andrew Lunn ; Rakesh Pillai ;
> net...@vger.kernel.org; linux-wirel...@vger.kernel.org; linux-
> ker...@vger.kernel.org; ath...@lists.infradead.org;
> diand...@chromium.org; Markus Elfring ;
> evgr...@chromium.org; k...@kernel.org; johan...@sipsolutions.net;
> da...@davemloft.net; kv...@codeaurora.org
> Subject: RE: [RFC 0/7] Add support to process rx packets in thread
> 
> From: Sebastian Gottschall 
> > Sent: 25 July 2020 16:42
> > >> i agree. i just can say that i tested this patch recently due this
> > >> discussion here. and it can be changed by sysfs. but it doesnt work for
> > >> wifi drivers which are mainly using dummy netdev devices. for this i
> > >> made a small patch to get them working using napi_set_threaded
> manually
> > >> hardcoded in the drivers. (see patch bellow)
> 
> > > By CONFIG_THREADED_NAPI, there is no need to consider what you did
> here
> > > in the napi core because device drivers know better and are responsible
> > > for it before calling napi_schedule(n).
> 
> > yeah. but that approach will not work for some cases. some stupid
> > drivers are using locking context in the napi poll function.
> > in that case the performance will runto shit. i discovered this with the
> > mvneta eth driver (marvell) and mt76 tx polling (rx  works)
> > for mvneta is will cause very high latencies and packet drops. for mt76
> > it causes packet stop. doesnt work simply (on all cases no crashes)
> > so the threading will only work for drivers which are compatible with
> > that approach. it cannot be used as drop in replacement from my point of
> > view.
> > its all a question of the driver design
> 
> Why should it make (much) difference whether the napi callbacks (etc)
> are done in the context of the interrupted process or that of a
> specific kernel thread.
> The process flags (or whatever) can even be set so that it appears
> to be the expected 'softint' context.
> 
> In any case running NAPI from a thread will just show up the next
> piece of code that runs for ages in softint context.
> I think I've seen the tail end of memory being freed under rcu
> finally happening under softint and taking absolutely ages.
> 
>   David
> 

Hi All,

Is the threaded NAPI change posted to kernel ? 
Is the conclusion of this discussion that " we cannot use threads for 
processing packets " ??


> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes,
> MK1 1PT, UK
> Registration No: 1397386 (Wales)



Re: [External] Re: [PATCH 1/2] ftrace: clear module from hash of all ftrace ops

2020-07-28 Thread Chengming Zhou



在 2020/7/28 下午8:53, Steven Rostedt 写道:

On Tue, 28 Jul 2020 18:27:19 +0800
Chengming Zhou  wrote:


We should clear module from hash of all ops on ftrace_ops_list when
module going, or the ops including these modules will be matched
wrongly by new module loaded later.

This is really up to the owner of the hash and not the registered
system.


Agreed!

But ftrace is a very core and independent mechanism of kernel, it's hard to

make sure any user of ftrace, like kprobe, livepatch will handle module 
going correctly.


At least for now, kprobe does not handle that correctly...

So I think it's safer to fix it too in ftrace : )



If we want, we could register some kind of callback table for all
ftrace_ops to have this updated, but the current code is incorrect.

Like:

  register_ftrace_ops_hash()

Where the hash will get updated on module removal.


Thanks for the suggestion, so in this new function, all ftrace_ops 
func_hash on


ftrace_ops_list will get updated on module removal.


Signed-off-by: Chengming Zhou 
Signed-off-by: Muchun Song 
---
  kernel/trace/ftrace.c | 22 --
  1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1903b80db6eb..fca01a168ae5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6223,18 +6223,20 @@ clear_mod_from_hash(struct ftrace_page *pg, struct 
ftrace_hash *hash)
  /* Clear any records from hashs */
  static void clear_mod_from_hashes(struct ftrace_page *pg)
  {
-   struct trace_array *tr;
+   struct ftrace_ops *op;
  
-	mutex_lock(_types_lock);

-   list_for_each_entry(tr, _trace_arrays, list) {
-   if (!tr->ops || !tr->ops->func_hash)

The tr->ops hashes are persistent without being registered. They match
what's in set_ftrace_filter and similar files.

Your patch just introduced a bug, because those hashes now would not
get updated if the ops were not registered.

i.e.

  # echo some_module_function > set_ftrace_filter
  # rmmod module_with_that_function
  # insmod module_with_same_address_of_function
  # echo function > current_tracer

Now the tr->ops->hash would still have the function of the original
module.


I thought all ftrace_ops has non empty func_hash are on the ftrace 
global list...


Well, so I just leave this function unmodified.

Just call that new function register_ftrace_ops_hash() from 
ftrace_release_mod.


Thanks!



Either have all owners of ftrace_ops handle this case, or add a helper
function to handle it for them. But using ftarce_ops_list is the wrong
place to do it.

-- Steve



+   mutex_lock(_lock);
+
+   do_for_each_ftrace_op(op, ftrace_ops_list) {
+   if (!op->func_hash)
continue;
-   mutex_lock(>ops->func_hash->regex_lock);
-   clear_mod_from_hash(pg, tr->ops->func_hash->filter_hash);
-   clear_mod_from_hash(pg, tr->ops->func_hash->notrace_hash);
-   mutex_unlock(>ops->func_hash->regex_lock);
-   }
-   mutex_unlock(_types_lock);
+   mutex_lock(>func_hash->regex_lock);
+   clear_mod_from_hash(pg, op->func_hash->filter_hash);
+   clear_mod_from_hash(pg, op->func_hash->notrace_hash);
+   mutex_unlock(>func_hash->regex_lock);
+   } while_for_each_ftrace_op(op);
+
+   mutex_unlock(_lock);
  }
  
  static void ftrace_free_mod_map(struct rcu_head *rcu)


Re: [PATCH] media: usbvision: fixed coding style

2020-07-28 Thread Greg KH
On Tue, Jul 28, 2020 at 10:13:22PM +0530, Dhiraj Sharma wrote:
> > As the bot said, only do one type of thing per patch, and "fix all
> > checkpatch errors/warnings" is not one type of thing.
> 
> So should I send a fresh patch with minimal fixes? instead of replying
> to this mail with [PATCH 01]

Why are you ignoring what Hans said?


[PATCH v3] Bluetooth: Fix suspend notifier race

2020-07-28 Thread Abhishek Pandit-Subedi
Unregister from suspend notifications and cancel suspend preparations
before running hci_dev_do_close. Otherwise, the suspend notifier may
race with unregister and cause cmd_timeout even after hdev has been
freed.

Below is the trace from when this panic was seen:

[  832.578518] Bluetooth: hci_core.c:hci_cmd_timeout() hci0: command 0x0c05 tx 
timeout
[  832.586200] BUG: kernel NULL pointer dereference, address: 
[  832.586203] #PF: supervisor read access in kernel mode
[  832.586205] #PF: error_code(0x) - not-present page
[  832.586206] PGD 0 P4D 0
[  832.586210] PM: suspend exit
[  832.608870] Oops:  [#1] PREEMPT SMP NOPTI
[  832.613232] CPU: 3 PID: 10755 Comm: kworker/3:7 Not tainted 
5.4.44-04894-g1e9dbb96a161 #1
[  832.630036] Workqueue: events hci_cmd_timeout [bluetooth]
[  832.630046] RIP: 0010:__queue_work+0xf0/0x374
[  832.630051] RSP: 0018:9b5285f1fdf8 EFLAGS: 00010046
[  832.674033] RAX: 8a97681bac00 RBX:  RCX: 8a976a000600
[  832.681162] RDX:  RSI: 0009 RDI: 8a976a000748
[  832.688289] RBP: 9b5285f1fe38 R08:  R09: 8a97681bac00
[  832.695418] R10: 0002 R11: 8a976a0006d8 R12: 8a9745107600
[  832.698045] usb 1-6: new full-speed USB device number 119 using xhci_hcd
[  832.702547] R13: 8a9673658850 R14: 0040 R15: 001e
[  832.702549] FS:  () GS:8a976af8() 
knlGS:
[  832.702550] CS:  0010 DS:  ES:  CR0: 80050033
[  832.702550] CR2:  CR3: 00010415a000 CR4: 003406e0
[  832.702551] Call Trace:
[  832.702558]  queue_work_on+0x3f/0x68
[  832.702562]  process_one_work+0x1db/0x396
[  832.747397]  worker_thread+0x216/0x375
[  832.751147]  kthread+0x138/0x140
[  832.754377]  ? pr_cont_work+0x58/0x58
[  832.758037]  ? kthread_blkcg+0x2e/0x2e
[  832.761787]  ret_from_fork+0x22/0x40
[  832.846191] ---[ end trace fa93f466da517212 ]---

Fixes: 9952d90ea2885 ("Bluetooth: Handle PM_SUSPEND_PREPARE and 
PM_POST_SUSPEND")
Signed-off-by: Abhishek Pandit-Subedi 
Reviewed-by: Miao-chen Chou 
---
Hi Marcel,

This fixes a race between hci_unregister_dev and the suspend notifier.

The suspend notifier handler seemed to be scheduling commands even after
it was cleaned up and this was resulting in a panic in cmd_timeout (when
it tries to requeue the cmd_timer).

This was tested on 5.4 kernel with a suspend+resume stress test for 500+
iterations. I also confirmed that after a usb disconnect, the suspend
notifier times out before the USB device is probed again (fixing the
original race between the usb_disconnect + probe and the notifier).

Thanks
Abhishek


Changes in v3:
* Added fixes tag

Changes in v2:
* Moved oops into commit message

 net/bluetooth/hci_core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5394ab56c915a9..4ba23b821cbf4a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3767,9 +3767,10 @@ void hci_unregister_dev(struct hci_dev *hdev)
 
cancel_work_sync(>power_on);
 
-   hci_dev_do_close(hdev);
-
unregister_pm_notifier(>suspend_notifier);
+   cancel_work_sync(>suspend_prepare);
+
+   hci_dev_do_close(hdev);
 
if (!test_bit(HCI_INIT, >flags) &&
!hci_dev_test_flag(hdev, HCI_SETUP) &&
-- 
2.28.0.rc0.142.g3c755180ce-goog



Re: [PATCH] /proc/PID/smaps: Consistent whitespace output format

2020-07-28 Thread Yang Shi




On 7/28/20 1:32 AM, Michal Koutný wrote:

The keys in smaps output are padded to fixed width with spaces.
All except for THPeligible that uses tabs (only since
commit c06306696f83 ("mm: thp: fix false negative of shmem vma's THP
eligibility")).
Unify the output formatting to save time debugging some naïve parsers.
(Part of the unification is also aligning FilePmdMapped with others.)


I recalled someone else submitted similar patch before. But my memory is 
vague. Anyway it looks fine to me to make the parsers happy. Acked-by: 
Yang Shi 




Signed-off-by: Michal Koutný 
---
  fs/proc/task_mmu.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index dbda4499a859..5066b0251ed8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -786,7 +786,7 @@ static void __show_smap(struct seq_file *m, const struct 
mem_size_stats *mss,
SEQ_PUT_DEC(" kB\nLazyFree:   ", mss->lazyfree);
SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
-   SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
+   SEQ_PUT_DEC(" kB\nFilePmdMapped:  ", mss->file_thp);
SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
  mss->private_hugetlb >> 10, 7);
@@ -816,7 +816,7 @@ static int show_smap(struct seq_file *m, void *v)
  
  	__show_smap(m, , false);
  
-	seq_printf(m, "THPeligible:		%d\n",

+   seq_printf(m, "THPeligible:%d\n",
   transparent_hugepage_enabled(vma));
  
  	if (arch_pkeys_enabled())




[no subject]

2020-07-28 Thread Mrs Nicole Marois Benoite
Dear Beloved

I am Mrs Nicole Benoite Marois and i have been suffering from ovarian
cancer disease and the doctor says that i have just few days to leave.
I am from (Paris) France but based in Africa Burkina Faso since eight
years ago as a business woman dealing with gold exportation.

Now that i am about to end the race like this, without any family
members and no child. I have $3 Million US DOLLARS in Africa
Development Bank (ADB) Burkina Faso which i instructed the bank to remit and
give to Orphanage & Teaching Volunteer Work in Burkina Faso.But my
mind is not at
rest because i am writing this letter now through the help of my
computer beside my sick bed.

I also have $4.5 Million US Dollars at Eco-Bank here in Burkina Faso
and i instructed the bank to transfer the fund to you as foreigner
that will apply to the bank after i have gone, that they should
release the fund to him/her,but you will assure me that you will take
50% of the fund and give 50% to the orphanages home in your country
for my heart to rest.

Respond to me immediately via my private email address
(mrsnicole.france1...@outlook.com) for further details since I have just
few days to end my life due to the ovarian cancer disease, hoping you
will understand my point

Yours fairly friend,

Mrs Nicole Benoite Marois.


Re: [PATCH v1] block: Remove callback typedefs for blk_mq_ops

2020-07-28 Thread Bart Van Assche
On 2020-07-28 09:20, Daniel Wagner wrote:
> No need to define typedefs for the callbacks, because there is not a
> single user except blk_mq_ops.

Thanks for having done this work.

Reviewed-by: Bart Van Assche 


Re: [PATCH v2] Bluetooth: Fix suspend notifier race

2020-07-28 Thread Abhishek Pandit-Subedi
I sent this a bit too quick without a Fixes tag. Please disregard. v3 coming up.

On Tue, Jul 28, 2020 at 9:53 AM Abhishek Pandit-Subedi
 wrote:
>
> Unregister from suspend notifications and cancel suspend preparations
> before running hci_dev_do_close. Otherwise, the suspend notifier may
> race with unregister and cause cmd_timeout even after hdev has been
> freed.
>
> Below is the trace from when this panic was seen:
>
> [  832.578518] Bluetooth: hci_core.c:hci_cmd_timeout() hci0: command 0x0c05 
> tx timeout
> [  832.586200] BUG: kernel NULL pointer dereference, address: 
> [  832.586203] #PF: supervisor read access in kernel mode
> [  832.586205] #PF: error_code(0x) - not-present page
> [  832.586206] PGD 0 P4D 0
> [  832.586210] PM: suspend exit
> [  832.608870] Oops:  [#1] PREEMPT SMP NOPTI
> [  832.613232] CPU: 3 PID: 10755 Comm: kworker/3:7 Not tainted 
> 5.4.44-04894-g1e9dbb96a161 #1
> [  832.630036] Workqueue: events hci_cmd_timeout [bluetooth]
> [  832.630046] RIP: 0010:__queue_work+0xf0/0x374
> [  832.630051] RSP: 0018:9b5285f1fdf8 EFLAGS: 00010046
> [  832.674033] RAX: 8a97681bac00 RBX:  RCX: 
> 8a976a000600
> [  832.681162] RDX:  RSI: 0009 RDI: 
> 8a976a000748
> [  832.688289] RBP: 9b5285f1fe38 R08:  R09: 
> 8a97681bac00
> [  832.695418] R10: 0002 R11: 8a976a0006d8 R12: 
> 8a9745107600
> [  832.698045] usb 1-6: new full-speed USB device number 119 using xhci_hcd
> [  832.702547] R13: 8a9673658850 R14: 0040 R15: 
> 001e
> [  832.702549] FS:  () GS:8a976af8() 
> knlGS:
> [  832.702550] CS:  0010 DS:  ES:  CR0: 80050033
> [  832.702550] CR2:  CR3: 00010415a000 CR4: 
> 003406e0
> [  832.702551] Call Trace:
> [  832.702558]  queue_work_on+0x3f/0x68
> [  832.702562]  process_one_work+0x1db/0x396
> [  832.747397]  worker_thread+0x216/0x375
> [  832.751147]  kthread+0x138/0x140
> [  832.754377]  ? pr_cont_work+0x58/0x58
> [  832.758037]  ? kthread_blkcg+0x2e/0x2e
> [  832.761787]  ret_from_fork+0x22/0x40
> [  832.846191] ---[ end trace fa93f466da517212 ]---
>
> Signed-off-by: Abhishek Pandit-Subedi 
> Reviewed-by: Miao-chen Chou 
> ---
> Hi Marcel,
>
> This fixes a race between hci_unregister_dev and the suspend notifier.
>
> The suspend notifier handler seemed to be scheduling commands even after
> it was cleaned up and this was resulting in a panic in cmd_timeout (when
> it tries to requeue the cmd_timer).
>
> This was tested on 5.4 kernel with a suspend+resume stress test for 500+
> iterations. I also confirmed that after a usb disconnect, the suspend
> notifier times out before the USB device is probed again (fixing the
> original race between the usb_disconnect + probe and the notifier).
>
> Thanks
> Abhishek
>
>
> Changes in v2:
> * Moved oops into commit message
>
>  net/bluetooth/hci_core.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
> index 5394ab56c915a9..4ba23b821cbf4a 100644
> --- a/net/bluetooth/hci_core.c
> +++ b/net/bluetooth/hci_core.c
> @@ -3767,9 +3767,10 @@ void hci_unregister_dev(struct hci_dev *hdev)
>
> cancel_work_sync(>power_on);
>
> -   hci_dev_do_close(hdev);
> -
> unregister_pm_notifier(>suspend_notifier);
> +   cancel_work_sync(>suspend_prepare);
> +
> +   hci_dev_do_close(hdev);
>
> if (!test_bit(HCI_INIT, >flags) &&
> !hci_dev_test_flag(hdev, HCI_SETUP) &&
> --
> 2.28.0.rc0.142.g3c755180ce-goog
>


[PATCH v2] Bluetooth: Fix suspend notifier race

2020-07-28 Thread Abhishek Pandit-Subedi
Unregister from suspend notifications and cancel suspend preparations
before running hci_dev_do_close. Otherwise, the suspend notifier may
race with unregister and cause cmd_timeout even after hdev has been
freed.

Below is the trace from when this panic was seen:

[  832.578518] Bluetooth: hci_core.c:hci_cmd_timeout() hci0: command 0x0c05 tx 
timeout
[  832.586200] BUG: kernel NULL pointer dereference, address: 
[  832.586203] #PF: supervisor read access in kernel mode
[  832.586205] #PF: error_code(0x) - not-present page
[  832.586206] PGD 0 P4D 0
[  832.586210] PM: suspend exit
[  832.608870] Oops:  [#1] PREEMPT SMP NOPTI
[  832.613232] CPU: 3 PID: 10755 Comm: kworker/3:7 Not tainted 
5.4.44-04894-g1e9dbb96a161 #1
[  832.630036] Workqueue: events hci_cmd_timeout [bluetooth]
[  832.630046] RIP: 0010:__queue_work+0xf0/0x374
[  832.630051] RSP: 0018:9b5285f1fdf8 EFLAGS: 00010046
[  832.674033] RAX: 8a97681bac00 RBX:  RCX: 8a976a000600
[  832.681162] RDX:  RSI: 0009 RDI: 8a976a000748
[  832.688289] RBP: 9b5285f1fe38 R08:  R09: 8a97681bac00
[  832.695418] R10: 0002 R11: 8a976a0006d8 R12: 8a9745107600
[  832.698045] usb 1-6: new full-speed USB device number 119 using xhci_hcd
[  832.702547] R13: 8a9673658850 R14: 0040 R15: 001e
[  832.702549] FS:  () GS:8a976af8() 
knlGS:
[  832.702550] CS:  0010 DS:  ES:  CR0: 80050033
[  832.702550] CR2:  CR3: 00010415a000 CR4: 003406e0
[  832.702551] Call Trace:
[  832.702558]  queue_work_on+0x3f/0x68
[  832.702562]  process_one_work+0x1db/0x396
[  832.747397]  worker_thread+0x216/0x375
[  832.751147]  kthread+0x138/0x140
[  832.754377]  ? pr_cont_work+0x58/0x58
[  832.758037]  ? kthread_blkcg+0x2e/0x2e
[  832.761787]  ret_from_fork+0x22/0x40
[  832.846191] ---[ end trace fa93f466da517212 ]---

Signed-off-by: Abhishek Pandit-Subedi 
Reviewed-by: Miao-chen Chou 
---
Hi Marcel,

This fixes a race between hci_unregister_dev and the suspend notifier.

The suspend notifier handler seemed to be scheduling commands even after
it was cleaned up and this was resulting in a panic in cmd_timeout (when
it tries to requeue the cmd_timer).

This was tested on 5.4 kernel with a suspend+resume stress test for 500+
iterations. I also confirmed that after a usb disconnect, the suspend
notifier times out before the USB device is probed again (fixing the
original race between the usb_disconnect + probe and the notifier).

Thanks
Abhishek


Changes in v2:
* Moved oops into commit message

 net/bluetooth/hci_core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5394ab56c915a9..4ba23b821cbf4a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3767,9 +3767,10 @@ void hci_unregister_dev(struct hci_dev *hdev)
 
cancel_work_sync(>power_on);
 
-   hci_dev_do_close(hdev);
-
unregister_pm_notifier(>suspend_notifier);
+   cancel_work_sync(>suspend_prepare);
+
+   hci_dev_do_close(hdev);
 
if (!test_bit(HCI_INIT, >flags) &&
!hci_dev_test_flag(hdev, HCI_SETUP) &&
-- 
2.28.0.rc0.142.g3c755180ce-goog



Re: [PATCH v4 4/5] arm64: dts: sdm845: Add OPP tables and power-domains for venus

2020-07-28 Thread Lina Iyer

On Mon, Jul 27 2020 at 18:45 -0600, Stephen Boyd wrote:

Quoting Lina Iyer (2020-07-24 09:28:25)

On Fri, Jul 24 2020 at 03:03 -0600, Rajendra Nayak wrote:
>Hi Maulik/Lina,
>
>On 7/23/2020 11:36 PM, Stanimir Varbanov wrote:
>>Hi Rajendra,
>>
>>After applying 2,3 and 4/5 patches on linaro-integration v5.8-rc2 I see
>>below messages on db845:
>>
>>qcom-venus aa0.video-codec: dev_pm_opp_set_rate: failed to find
>>current OPP for freq 53397 (-34)
>>
>>^^^ This one is new.
>>
>>qcom_rpmh TCS Busy, retrying RPMH message send: addr=0x3
>>
>>^^^ and this message is annoying, can we make it pr_debug in rpmh?
>
How annoyingly often do you see this message?
Usually, this is an indication of bad system state either on remote
processors in the SoC or in Linux itself. On a smooth sailing build you
should not see this 'warning'.

>Would you be fine with moving this message to a pr_debug? Its currently
>a pr_info_ratelimited()
I would rather not, moving this out of sight will mask a lot serious
issues that otherwise bring attention to the developers.



I removed this warning message in my patch posted to the list[1]. If
it's a serious problem then I suppose a timeout is more appropriate, on
the order of several seconds or so and then a pr_warn() and bail out of
the async call with an error.


The warning used to capture issues that happen within a second and it
helps capture system related issues. Timing out after many seconds
overlooks the system issues that generally tend to resolve itself, but
nevertheless need to be investigated.

--Lina


[1] https://lore.kernel.org/r/20200724211711.810009-1-sb...@kernel.org


drivers/video/fbdev/imsttfb.c:1538:20: sparse: sparse: incorrect type in argument 1 (different address spaces)

2020-07-28 Thread kernel test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   92ed301919932f13b9172e525674157e983d
commit: 670d0a4b10704667765f7d18f7592993d02783aa sparse: use identifiers to 
define address spaces
date:   6 weeks ago
config: arm-randconfig-s032-20200728 (attached as .config)
compiler: arm-linux-gnueabi-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.2-94-geb6779f6-dirty
git checkout 670d0a4b10704667765f7d18f7592993d02783aa
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 
CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=arm 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 


sparse warnings: (new ones prefixed by >>)

   drivers/video/fbdev/imsttfb.c:1513:30: sparse: sparse: cast removes address 
space '__iomem' of expression
   drivers/video/fbdev/imsttfb.c:1513:27: sparse: sparse: incorrect type in 
assignment (different address spaces) @@ expected char [noderef] __iomem 
*screen_base @@ got unsigned char [usertype] * @@
   drivers/video/fbdev/imsttfb.c:1513:27: sparse: expected char [noderef] 
__iomem *screen_base
   drivers/video/fbdev/imsttfb.c:1513:27: sparse: got unsigned char 
[usertype] *
   drivers/video/fbdev/imsttfb.c:1523:27: sparse: sparse: cast removes address 
space '__iomem' of expression
>> drivers/video/fbdev/imsttfb.c:1538:20: sparse: sparse: incorrect type in 
>> argument 1 (different address spaces) @@ expected void volatile 
>> [noderef] __iomem *iomem_cookie @@ got unsigned char [usertype] 
>> *cmap_regs @@
>> drivers/video/fbdev/imsttfb.c:1538:20: sparse: expected void volatile 
>> [noderef] __iomem *iomem_cookie
   drivers/video/fbdev/imsttfb.c:1538:20: sparse: got unsigned char 
[usertype] *cmap_regs
   drivers/video/fbdev/imsttfb.c:1360:15: sparse: sparse: cast removes address 
space '__iomem' of expression
   drivers/video/fbdev/imsttfb.c:1361:16: sparse: sparse: cast removes address 
space '__iomem' of expression
--
>> drivers/scsi/pcmcia/nsp_cs.c:1669:34: sparse: sparse: incorrect type in 
>> argument 1 (different address spaces) @@ expected void volatile 
>> [noderef] __iomem *iomem_cookie @@ got void * @@
>> drivers/scsi/pcmcia/nsp_cs.c:1669:34: sparse: expected void volatile 
>> [noderef] __iomem *iomem_cookie
   drivers/scsi/pcmcia/nsp_cs.c:1669:34: sparse: got void *
   drivers/scsi/pcmcia/nsp_cs.c: note: in included file:
>> drivers/scsi/pcmcia/nsp_io.h:231:24: sparse: sparse: incorrect type in 
>> argument 1 (different address spaces) @@ expected void const volatile 
>> [noderef] __iomem *addr @@ got unsigned long *ptr @@
>> drivers/scsi/pcmcia/nsp_io.h:231:24: sparse: expected void const 
>> volatile [noderef] __iomem *addr
   drivers/scsi/pcmcia/nsp_io.h:231:24: sparse: got unsigned long *ptr
>> drivers/scsi/pcmcia/nsp_io.h:257:17: sparse: sparse: incorrect type in 
>> argument 2 (different address spaces) @@ expected void volatile 
>> [noderef] __iomem *addr @@ got unsigned long *ptr @@
>> drivers/scsi/pcmcia/nsp_io.h:257:17: sparse: expected void volatile 
>> [noderef] __iomem *addr
   drivers/scsi/pcmcia/nsp_io.h:257:17: sparse: got unsigned long *ptr

vim +1538 drivers/video/fbdev/imsttfb.c

^1da177e4c3f41 drivers/video/imsttfb.c Linus Torvalds 2005-04-16  1530  
48c68c4f1b5424 drivers/video/imsttfb.c Greg Kroah-Hartman 2012-12-21  1531  
static void imsttfb_remove(struct pci_dev *pdev)
^1da177e4c3f41 drivers/video/imsttfb.c Linus Torvalds 2005-04-16  1532  {
^1da177e4c3f41 drivers/video/imsttfb.c Linus Torvalds 2005-04-16  1533  
struct fb_info *info = pci_get_drvdata(pdev);
94f9e09ce531d4 drivers/video/imsttfb.c Antonino A. Daplas 2006-01-09  1534  
struct imstt_par *par = info->par;
^1da177e4c3f41 drivers/video/imsttfb.c Linus Torvalds 2005-04-16  1535  
int size = pci_resource_len(pdev, 0);
^1da177e4c3f41 drivers/video/imsttfb.c Linus Torvalds 2005-04-16  1536  
^1da177e4c3f41 drivers/video/imsttfb.c Linus Torvalds 2005-04-16  1537  
unregister_framebuffer(info);
^1da177e4c3f41 drivers/video/imsttfb.c Linus Torvalds 2005-04-16 @1538  
iounmap(par->cmap_regs);
^1da177e4c3f41 drivers/video/imsttfb.c Linus Torvalds 2005-04-16  1539  
iounmap(par->dc_regs);
^1da177e4c3f41 drivers/video/imsttfb.c Linus Torvalds 2005-04-16  1540  
iounmap(info->screen_base);
^1da177e4c3f41 drivers/video/imsttfb.c Linus Torvalds 2005-04-16  1541  
release_mem_region(info->fix.smem_start, size);
94f9e09ce531d4 drivers/video/imsttf

Re: [PATCH v1 0/4] [RFC] Implement Trampoline File Descriptor

2020-07-28 Thread Madhavan T. Venkataraman
Thanks.

On 7/28/20 11:05 AM, Casey Schaufler wrote:
>> In this solution, the kernel recognizes certain sequences of instructions
>> as "well-known" trampolines. When such a trampoline is executed, a page
>> fault happens because the trampoline page does not have execute permission.
>> The kernel recognizes the trampoline and emulates it. Basically, the
>> kernel does the work of the trampoline on behalf of the application.
> What prevents a malicious process from using the "well-known" trampoline
> to its own purposes? I expect it is obvious, but I'm not seeing it. Old
> eyes, I suppose.

You are quite right. As I note below, the attack surface is the
buffer that contains the trampoline code. Since the kernel does
check the instruction sequence, the sequence cannot be
changed by a hacker. But the hacker can presumably change
the register values and redirect the PC to his desired location.

The assumption with trampoline emulation is that the
system will have security settings that will prevent pages from
having both write and execute permissions. So, a hacker
cannot load his own code in a page and redirect the PC to
it and execute his own code. But he can probably set the
PC to point to arbitrary locations. For instance, jump to
the middle of a C library function.
>
>> Here, the attack surface is the buffer that contains the trampoline.
>> The attack surface is narrower than before. A hacker may still be able to
>> modify what gets loaded in the registers or modify the target PC to point
>> to arbitrary locations.
...
>> Work that is pending
>> 
>>
>> - I am working on implementing an SELinux setting called "exectramp"
>>   similar to "execmem" to allow the use of trampfd on a per application
>>   basis.
> You could make a separate LSM to do these checks instead of limiting
> it to SELinux. Your use case, your call, of course.

OK. I will research this.

Madhavan


Re: [PATCH v3 4/4] xen: add helpers to allocate unpopulated memory

2020-07-28 Thread Julien Grall

Hi,

On 27/07/2020 10:13, Roger Pau Monne wrote:

To be used in order to create foreign mappings. This is based on the
ZONE_DEVICE facility which is used by persistent memory devices in
order to create struct pages and kernel virtual mappings for the IOMEM
areas of such devices. Note that on kernels without support for
ZONE_DEVICE Xen will fallback to use ballooned pages in order to
create foreign mappings.

The newly added helpers use the same parameters as the existing
{alloc/free}_xenballooned_pages functions, which allows for in-place
replacement of the callers. Once a memory region has been added to be
used as scratch mapping space it will no longer be released, and pages
returned are kept in a linked list. This allows to have a buffer of
pages and prevents resorting to frequent additions and removals of
regions.

If enabled (because ZONE_DEVICE is supported) the usage of the new
functionality untangles Xen balloon and RAM hotplug from the usage of
unpopulated physical memory ranges to map foreign pages, which is the
correct thing to do in order to avoid mappings of foreign pages depend
on memory hotplug.
I think this is going to break Dom0 on Arm if the kernel has been built 
with hotplug. This is because you may end up to re-use region that will 
be used for the 1:1 mapping of a foreign map.


Note that I don't know whether hotplug has been tested on Xen on Arm 
yet. So it might be possible to be already broken.


Meanwhile, my suggestion would be to make the use of hotplug in the 
balloon code conditional (maybe using CONFIG_ARM64 and CONFIG_ARM)?


Cheers,

--
Julien Grall


Re: [PATCH v4] mm/hugetlb: add mempolicy check in the reservation routine

2020-07-28 Thread Mike Kravetz
On 7/28/20 6:24 AM, Baoquan He wrote:
> Hi Muchun,
> 
> On 07/28/20 at 11:49am, Muchun Song wrote:
>> In the reservation routine, we only check whether the cpuset meets
>> the memory allocation requirements. But we ignore the mempolicy of
>> MPOL_BIND case. If someone mmap hugetlb succeeds, but the subsequent
>> memory allocation may fail due to mempolicy restrictions and receives
>> the SIGBUS signal. This can be reproduced by the follow steps.
>>
>>  1) Compile the test case.
>> cd tools/testing/selftests/vm/
>> gcc map_hugetlb.c -o map_hugetlb
>>
>>  2) Pre-allocate huge pages. Suppose there are 2 numa nodes in the
>> system. Each node will pre-allocate one huge page.
>> echo 2 > /proc/sys/vm/nr_hugepages
>>
>>  3) Run test case(mmap 4MB). We receive the SIGBUS signal.
>> numactl --membind=0 ./map_hugetlb 4
> 
> I think supporting the  mempolicy of MPOL_BIND case is a good idea.
> I am wondering what about the other mempolicy cases, e.g MPOL_INTERLEAVE,
> MPOL_PREFERRED. Asking these because we already have similar handling in
> sysfs, proc nr_hugepages_mempolicy writting. Please see
> __nr_hugepages_store_common() for detail.

There is a high level difference in the function of this code and the code
called by the sysfs and proc interfaces.  This patch is dealing with reserving
huge pages in the pool for later use.  The sysfs and proc interfaces are
allocating huge pages to be added to the pool.

Using mempolicy to decide how to allocate huge pages is pretty straight
forward.  Using mempolicy to reserve pages is almost impossible to get
correct.  The comment at the beginning of hugetlb_acct_memory() and modified
by this patch summarizes the issues.

IMO, at this time it makes little sense to perform checks for more than
MPOL_BIND at reservation time.  If we ever take on the monumental task of
supporting mempolicy directed per-node reservations throughout the life of
a process, support for other policies will need to be taken into account.

-- 
Mike Kravetz


Re: [PATCH net-next RFC 01/13] devlink: Add reload level option to devlink reload command

2020-07-28 Thread Jacob Keller



On 7/28/2020 6:58 AM, Jiri Pirko wrote:
> Tue, Jul 28, 2020 at 02:58:02AM CEST, k...@kernel.org wrote:
>> On Mon, 27 Jul 2020 14:02:21 +0300 Moshe Shemesh wrote:
>>> Add devlink reload level to allow the user to request a specific reload
>>> level. The level parameter is optional, if not specified then driver's
>>> default reload level is used (backward compatible).
>>
>> Please don't leave space for driver-specific behavior. The OS is
>> supposed to abstract device differences away.
> 
> But this is needed to maintain the existing behaviour which is different
> for different drivers.
> 

Which drivers behave differently here?


[PATCH] ASoC: tlv320adcx140: Fix various style errors and warnings

2020-07-28 Thread Dan Murphy
Fix white space issues and remove else case where it was not needed.
Convert "static const char *" to "static const char * const"

Fixes: 689c7655b50 ("ASoC: tlv320adcx140: Add the tlv320adcx140 codec driver 
family")
Signed-off-by: Dan Murphy 
---
 sound/soc/codecs/tlv320adcx140.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sound/soc/codecs/tlv320adcx140.c b/sound/soc/codecs/tlv320adcx140.c
index 49dcdd72e5c6..938c5ef17e61 100644
--- a/sound/soc/codecs/tlv320adcx140.c
+++ b/sound/soc/codecs/tlv320adcx140.c
@@ -218,8 +218,8 @@ static const struct snd_kcontrol_new 
in4_resistor_controls[] = {
 };
 
 /* Analog/Digital Selection */
-static const char *adcx140_mic_sel_text[] = {"Analog", "Line In", "Digital"};
-static const char *adcx140_analog_sel_text[] = {"Analog", "Line In"};
+static const char * const adcx140_mic_sel_text[] = {"Analog", "Line In", 
"Digital"};
+static const char * const adcx140_analog_sel_text[] = {"Analog", "Line In"};
 
 static SOC_ENUM_SINGLE_DECL(adcx140_mic1p_enum,
ADCX140_CH1_CFG0, 5,
@@ -598,7 +598,7 @@ static int adcx140_reset(struct adcx140_priv *adcx140)
gpiod_direction_output(adcx140->gpio_reset, 1);
} else {
ret = regmap_write(adcx140->regmap, ADCX140_SW_RESET,
- ADCX140_RESET);
+  ADCX140_RESET);
}
 
/* 8.4.2: wait >= 10 ms after entering sleep mode. */
@@ -841,7 +841,7 @@ static int adcx140_codec_probe(struct snd_soc_component 
*component)
if (ret)
goto out;
 
-   if(adcx140->supply_areg == NULL)
+   if (adcx140->supply_areg == NULL)
sleep_cfg_val |= ADCX140_AREG_INTERNAL;
 
ret = regmap_write(adcx140->regmap, ADCX140_SLEEP_CFG, sleep_cfg_val);
@@ -942,8 +942,8 @@ static int adcx140_i2c_probe(struct i2c_client *i2c,
if (IS_ERR(adcx140->supply_areg)) {
if (PTR_ERR(adcx140->supply_areg) == -EPROBE_DEFER)
return -EPROBE_DEFER;
-   else
-   adcx140->supply_areg = NULL;
+
+   adcx140->supply_areg = NULL;
} else {
ret = regulator_enable(adcx140->supply_areg);
if (ret) {
-- 
2.28.0



Re: [PATCH net-next RFC 00/13] Add devlink reload level option

2020-07-28 Thread Jacob Keller



On 7/27/2020 10:25 PM, Vasundhara Volam wrote:
> On Mon, Jul 27, 2020 at 4:36 PM Moshe Shemesh  wrote:
>>
>> Introduce new option on devlink reload API to enable the user to select the
>> reload level required. Complete support for all levels in mlx5.
>> The following reload levels are supported:
>>   driver: Driver entities re-instantiation only.
>>   fw_reset: Firmware reset and driver entities re-instantiation.
> The Name is a little confusing. I think it should be renamed to
> fw_live_reset (in which both firmware and driver entities are
> re-instantiated).  For only fw_reset, the driver should not undergo
> reset (it requires a driver reload for firmware to undergo reset).
> 

So, I think the differentiation here is that "live_patch" doesn't reset
anything.

>>   fw_live_patch: Firmware live patching only.
> This level is not clear. Is this similar to flashing??
> 
> Also I have a basic query. The reload command is split into
> reload_up/reload_down handlers (Please correct me if this behaviour is
> changed with this patchset). What if the vendor specific driver does
> not support up/down and needs only a single handler to fire a firmware
> reset or firmware live reset command?

In the "reload_down" handler, they would trigger the appropriate reset,
and quiesce anything that needs to be done. Then on reload up, it would
restore and bring up anything quiesced in the first stage.


Re: [PATCH] media: usbvision: fixed coding style

2020-07-28 Thread Dhiraj Sharma
> As the bot said, only do one type of thing per patch, and "fix all
> checkpatch errors/warnings" is not one type of thing.

So should I send a fresh patch with minimal fixes? instead of replying
to this mail with [PATCH 01]


URGENT RESPONSE

2020-07-28 Thread BILL Exchange manager & Audit Department




Dear Friend,

I am Mr. KARIM TRAORE Working with a reputable bank here in Burkina Faso as the 
manager in audit department. During our last banking audits we discovered an 
abandoned account belongs to one of our deceased customer, late Mr. Hamid Amine 
Razzaq, a billionaire businessman.

Meanwhile, before i contacted you i have done personal investigation in 
locating any of his relatives who knows about the account, but i came out 
unsuccessful. I am writing to request your assistance in transferring the sum 
of 10.500.000.00 (Ten million Five Hundred Thousand Dollars) into your account.

I decided to contact you to act as his foreign business partner so that my bank 
will accord you the recognition and have the fund transfer into your account. 
More details information will be forwarded to you.

I am expecting to read from you soon.
Best Regards
Mr. KARIM TRAORE.


Spende von 2.000.000,00 Euro.

2020-07-28 Thread manuel franco
Sie haben eine Spende von 2.000.000,00 Euro.

Mein Name ist Manuel Franco aus den USA.
Ich habe die America-Lotterie im Wert von 768 Millionen US-Dollar gewonnen und 
spende einen Teil davon an nur 5 glückliche Menschen und einige Waisenhäuser 
als Wohlwollen für die Menschheit.


Re: [PATCH v4 2/2] MIPS: ingenic: Enable JZ4780_NEMC manually

2020-07-28 Thread Thomas Bogendoerfer
On Tue, Jul 28, 2020 at 02:00:35PM +0200, Krzysztof Kozlowski wrote:
> On Tue, Jul 28, 2020 at 01:37:02PM +0200, Thomas Bogendoerfer wrote:
> > On Tue, Jul 28, 2020 at 01:19:35PM +0200, Krzysztof Kozlowski wrote:
> > > On Tue, Jul 28, 2020 at 01:12:11PM +0200, Paul Cercueil wrote:
> > > > Hi Krzysztof,
> > > > 
> > > > Le mar. 28 juil. 2020 à 12:45, Krzysztof Kozlowski  a 
> > > > écrit
> > > > :
> > > > > The CONFIG_JZ4780_NEMC was previously a default on MIPS but now it has
> > > > > to be enabled manually.
> > > > > 
> > > > > Signed-off-by: Krzysztof Kozlowski 
> > > > 
> > > > I think you should swap the two so that there are no problems when
> > > > bisecting.
> > > 
> > > Good point. I was thinking that it will go via some of MIPS trees and
> > > the patch #1 will just wait a cycle.  However with acks, I can take it
> > > through drivers/memory tree.
> > 
> > I've acked the patch.
> > 
> > Thomas.
> 
> Thanks but now I noticed that one of changed configs
> (arch/mips/configs/rs90_defconfig) is only in MIPS tree.
> 
> I think it is easier then to take the patch #2 (configs) via MIPS and
> wait with #1 for the next cycle or also take it via MIPS if it applies
> cleanly.

ok, I'll take it.

Thomas.

-- 
Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
good idea.[ RFC1925, 2.3 ]


Re: [PATCH net-next RFC 00/13] Add devlink reload level option

2020-07-28 Thread Jacob Keller



On 7/27/2020 4:02 AM, Moshe Shemesh wrote:
> Introduce new option on devlink reload API to enable the user to select the
> reload level required. Complete support for all levels in mlx5.
> The following reload levels are supported:
>   driver: Driver entities re-instantiation only. 

So, this is the current support. Ok.

>   fw_reset: Firmware reset and driver entities re-instantiation. 


This would include firmware update? What about differing levels of
device/firmware reset? I.e. I think some of our HW has function level
reset, device wide reset, as well as EMP reset. For us, only EMP reset
would trigger firmware update.

>   fw_live_patch: Firmware live patching only.

This is for update without reset, right?


Re: [PATCH v4 3/5] irqchip/irq-pruss-intc: Add logic for handling reserved interrupts

2020-07-28 Thread Marc Zyngier

On 2020-07-28 10:18, Grzegorz Jaszczyk wrote:

From: Suman Anna 

The PRUSS INTC has a fixed number of output interrupt lines that are
connected to a number of processors or other PRUSS instances or other
devices (like DMA) on the SoC. The output interrupt lines 2 through 9
are usually connected to the main Arm host processor and are referred
to as host interrupts 0 through 7 from ARM/MPU perspective.

All of these 8 host interrupts are not always exclusively connected
to the Arm interrupt controller. Some SoCs have some interrupt lines
not connected to the Arm interrupt controller at all, while a few 
others

have the interrupt lines connected to multiple processors in which they
need to be partitioned as per SoC integration needs. For example, 
AM437x
and 66AK2G SoCs have 2 PRUSS instances each and have the host interrupt 
5

connected to the other PRUSS, while AM335x has host interrupt 0 shared
between MPU and TSC_ADC and host interrupts 6 & 7 shared between MPU 
and

a DMA controller.

Add logic to the PRUSS INTC driver to ignore both these shared and
invalid interrupts.

Signed-off-by: Suman Anna 
Signed-off-by: Grzegorz Jaszczyk 
---
v3->v4:
- Due to changes in DT bindings which converts irqs-reserved
  property from uint8-array to bitmask requested by Rob introduce
  relevant changes in the driver.
- Merge the irqs-reserved and irqs-shared to one property since they
  can be handled by one logic (relevant change was introduced to DT
  binding).


This isn't what I asked for in my initial review.

I repeatedly asked for the *handling* to be common, not for the
properties to be merged. I don't mind either way, but I understood
there were two properties for a good reason. Has this reason gone?

Anyway, I'll come back to it once I start reviewing the series
again.

 M.
--
Jazz is not dead. It just smells funny...


Re: [PATCH v2 1/2] i2c: smbus: add core function handling SMBus host-notify

2020-07-28 Thread Wolfram Sang
Hi Alain,

> I've taken your comments and prepared a new serie including them.
> I'll wait for the conclusion regarding the bindings before pushing it.

Thanks! I hope we can finish the discussion this week because Linus
hasn't made a clear statement if there will be an rc8. But I still think
we can do HostNotify for v5.9.

> I also have an additional patch ready in order to add again the SMBus Alert
> support within the stm32f7 driver since it has been removed from the
> current serie. Hopefully I can push it once binding is acked so that it
> can get merged also in this cycle.

If it is super straight-forward, then yes.



signature.asc
Description: PGP signature


add file system helpers that take kernel pointers for the init code v4

2020-07-28 Thread Christoph Hellwig
Hi Al and Linus,

currently a lot of the file system calls in the early in code (and the
devtmpfs kthread) rely on the implicit set_fs(KERNEL_DS) during boot.
This is one of the few last remaining places we need to deal with to kill
off set_fs entirely, so this series adds new helpers that take kernel
pointers.  These helpers are in init/ and marked __init and thus will
be discarded after bootup.  A few also need to be duplicated in devtmpfs,
though unfortunately.

The series sits on top of my previous

  "decruft the early init / initrd / initramfs code v2"

series.


Git tree:

git://git.infradead.org/users/hch/misc.git init_path

Gitweb:

http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/init_path


Changes since v3:
 - rename fs/for_init.c to fs/init.c
 - document the purpose of the routines in fs/init.c with a comment
 - don't mark devtmpfs __init as that will cause it to get overwritten
   by initmem poisoning
 - add an init_dup helper to make Al more happy than with the version
   commit to the "decruft the early init / initrd / initramfs code v2"
   series

Changes since v2:
 - move to fs/for_init.c
 - reuse the init routines in devtmpfs after refactoring devtmpfsd
   (and thus the broken error handling in the previous version)
 - actually use kern_path in a place where user_path_at sneaked back in

Changes since v1:
 - avoid most core VFS changes
 - renamed the functions and move them to init/ and devtmpfs
 - drop a bunch of cleanups that can be submitted independently now


Diffstat:


[PATCH 04/23] devtmpfs: refactor devtmpfsd()

2020-07-28 Thread Christoph Hellwig
Split the main worker loop into a separate function.  This allows
devtmpfsd_setup to be marked __init, which will allows us to call
__init routines for the setup work.  devtmpfѕ itself needs a __ref
marker for that to work, and a comment explaining why it works.

Signed-off-by: Christoph Hellwig 
---
 drivers/base/devtmpfs.c | 52 -
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index c9017e0584c003..d697634bc0d48c 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -378,7 +378,30 @@ static int handle(const char *name, umode_t mode, kuid_t 
uid, kgid_t gid,
return handle_remove(name, dev);
 }
 
-static int devtmpfs_setup(void *p)
+static void __noreturn devtmpfs_work_loop(void)
+{
+   while (1) {
+   spin_lock(_lock);
+   while (requests) {
+   struct req *req = requests;
+   requests = NULL;
+   spin_unlock(_lock);
+   while (req) {
+   struct req *next = req->next;
+   req->err = handle(req->name, req->mode,
+ req->uid, req->gid, req->dev);
+   complete(>done);
+   req = next;
+   }
+   spin_lock(_lock);
+   }
+   __set_current_state(TASK_INTERRUPTIBLE);
+   spin_unlock(_lock);
+   schedule();
+   }
+}
+
+static int __init devtmpfs_setup(void *p)
 {
int err;
 
@@ -396,31 +419,18 @@ static int devtmpfs_setup(void *p)
return err;
 }
 
-static int devtmpfsd(void *p)
+/*
+ * The __ref is because devtmpfs_setup needs to be __init for the routines it
+ * calls.  That call is done while devtmpfs_init, which is marked __init,
+ * synchronously waits for it to complete.
+ */
+static int __ref devtmpfsd(void *p)
 {
int err = devtmpfs_setup(p);
 
if (err)
return err;
-   while (1) {
-   spin_lock(_lock);
-   while (requests) {
-   struct req *req = requests;
-   requests = NULL;
-   spin_unlock(_lock);
-   while (req) {
-   struct req *next = req->next;
-   req->err = handle(req->name, req->mode,
- req->uid, req->gid, req->dev);
-   complete(>done);
-   req = next;
-   }
-   spin_lock(_lock);
-   }
-   __set_current_state(TASK_INTERRUPTIBLE);
-   spin_unlock(_lock);
-   schedule();
-   }
+   devtmpfs_work_loop();
return 0;
 }
 
-- 
2.27.0



[PATCH 09/23] init: add an init_umount helper

2020-07-28 Thread Christoph Hellwig
Like ksys_umount, but takes a kernel pointer for the destination path.
Switch over the umount in the init code, which just happen to work due to
the implicit set_fs(KERNEL_DS) during early init right now.

Signed-off-by: Christoph Hellwig 
---
 fs/init.c | 14 ++
 fs/internal.h |  1 +
 fs/namespace.c|  4 ++--
 include/linux/init_syscalls.h |  1 +
 include/linux/syscalls.h  |  1 -
 init/do_mounts_initrd.c   |  2 +-
 6 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/fs/init.c b/fs/init.c
index c6eb724e1c7b22..9c8e31fdb048c8 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -23,3 +23,17 @@ int __init init_mount(const char *dev_name, const char 
*dir_name,
path_put();
return ret;
 }
+
+int __init init_umount(const char *name, int flags)
+{
+   int lookup_flags = LOOKUP_MOUNTPOINT;
+   struct path path;
+   int ret;
+
+   if (!(flags & UMOUNT_NOFOLLOW))
+   lookup_flags |= LOOKUP_FOLLOW;
+   ret = kern_path(name, lookup_flags, );
+   if (ret)
+   return ret;
+   return path_umount(, flags);
+}
diff --git a/fs/internal.h b/fs/internal.h
index 72ea0b6f7435a4..491d1e63809b37 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -92,6 +92,7 @@ extern void dissolve_on_fput(struct vfsmount *);
 
 int path_mount(const char *dev_name, struct path *path,
const char *type_page, unsigned long flags, void *data_page);
+int path_umount(struct path *path, int flags);
 
 /*
  * fs_struct.c
diff --git a/fs/namespace.c b/fs/namespace.c
index 2c4d7592097485..a7301790abb211 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1706,7 +1706,7 @@ static inline bool may_mandlock(void)
 }
 #endif
 
-static int path_umount(struct path *path, int flags)
+int path_umount(struct path *path, int flags)
 {
struct mount *mnt;
int retval;
@@ -1736,7 +1736,7 @@ static int path_umount(struct path *path, int flags)
return retval;
 }
 
-int ksys_umount(char __user *name, int flags)
+static int ksys_umount(char __user *name, int flags)
 {
int lookup_flags = LOOKUP_MOUNTPOINT;
struct path path;
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index af9ea88a60e0bd..a5a2e7f1991691 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -2,3 +2,4 @@
 
 int __init init_mount(const char *dev_name, const char *dir_name,
const char *type_page, unsigned long flags, void *data_page);
+int __init init_umount(const char *name, int flags);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e43816198e6001..1a4f5d8ee7044b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1236,7 +1236,6 @@ asmlinkage long sys_ni_syscall(void);
  * the ksys_xyzyyz() functions prototyped below.
  */
 
-int ksys_umount(char __user *name, int flags);
 int ksys_chroot(const char __user *filename);
 ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count);
 int ksys_chdir(const char __user *filename);
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 1f9336209ad9cc..6b020a06990251 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -122,7 +122,7 @@ static void __init handle_initrd(void)
else
printk("failed\n");
printk(KERN_NOTICE "Unmounting old root\n");
-   ksys_umount("/old", MNT_DETACH);
+   init_umount("/old", MNT_DETACH);
}
 }
 
-- 
2.27.0



[PATCH 08/23] init: add an init_mount helper

2020-07-28 Thread Christoph Hellwig
Like do_mount, but takes a kernel pointer for the destination path.
Switch over the mounts in the init code and devtmpfs to it, which
just happen to work due to the implicit set_fs(KERNEL_DS) during early
init right now.

Signed-off-by: Christoph Hellwig 
---
 drivers/base/devtmpfs.c   |  5 +++--
 fs/Makefile   |  2 +-
 fs/init.c | 25 +
 fs/internal.h |  4 
 fs/namespace.c|  2 +-
 include/linux/init_syscalls.h |  4 
 init/do_mounts.c  |  8 
 init/do_mounts.h  |  1 +
 init/do_mounts_initrd.c   |  6 +++---
 9 files changed, 46 insertions(+), 11 deletions(-)
 create mode 100644 fs/init.c
 create mode 100644 include/linux/init_syscalls.h

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index d697634bc0d48c..32af6cb987b428 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include "base.h"
 
@@ -359,7 +360,7 @@ int __init devtmpfs_mount(void)
if (!thread)
return 0;
 
-   err = do_mount("devtmpfs", "dev", "devtmpfs", MS_SILENT, NULL);
+   err = init_mount("devtmpfs", "dev", "devtmpfs", MS_SILENT, NULL);
if (err)
printk(KERN_INFO "devtmpfs: error mounting %i\n", err);
else
@@ -408,7 +409,7 @@ static int __init devtmpfs_setup(void *p)
err = ksys_unshare(CLONE_NEWNS);
if (err)
goto out;
-   err = do_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL);
+   err = init_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL);
if (err)
goto out;
ksys_chdir("/.."); /* will traverse into overmounted root */
diff --git a/fs/Makefile b/fs/Makefile
index 2ce5112b02c867..1c7b0e3f6daa11 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -13,7 +13,7 @@ obj-y :=  open.o read_write.o file_table.o super.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o d_path.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
-   fs_types.o fs_context.o fs_parser.o fsopen.o
+   fs_types.o fs_context.o fs_parser.o fsopen.o init.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=   buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/init.c b/fs/init.c
new file mode 100644
index 00..c6eb724e1c7b22
--- /dev/null
+++ b/fs/init.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Routines that mimic syscalls, but don't use the user address space or file
+ * descriptors.  Only for init/ and related early init code.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "internal.h"
+
+int __init init_mount(const char *dev_name, const char *dir_name,
+   const char *type_page, unsigned long flags, void *data_page)
+{
+   struct path path;
+   int ret;
+
+   ret = kern_path(dir_name, LOOKUP_FOLLOW, );
+   if (ret)
+   return ret;
+   ret = path_mount(dev_name, , type_page, flags, data_page);
+   path_put();
+   return ret;
+}
diff --git a/fs/internal.h b/fs/internal.h
index e903d5aae139a2..72ea0b6f7435a4 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -89,6 +89,10 @@ extern int __mnt_want_write_file(struct file *);
 extern void __mnt_drop_write_file(struct file *);
 
 extern void dissolve_on_fput(struct vfsmount *);
+
+int path_mount(const char *dev_name, struct path *path,
+   const char *type_page, unsigned long flags, void *data_page);
+
 /*
  * fs_struct.c
  */
diff --git a/fs/namespace.c b/fs/namespace.c
index 43834b59eff6c3..2c4d7592097485 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3111,7 +3111,7 @@ char *copy_mount_string(const void __user *data)
  * Therefore, if this magic number is present, it carries no information
  * and must be discarded.
  */
-static int path_mount(const char *dev_name, struct path *path,
+int path_mount(const char *dev_name, struct path *path,
const char *type_page, unsigned long flags, void *data_page)
 {
unsigned int mnt_flags = 0, sb_flags;
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
new file mode 100644
index 00..af9ea88a60e0bd
--- /dev/null
+++ b/include/linux/init_syscalls.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+int __init init_mount(const char *dev_name, const char *dir_name,
+   const char *type_page, unsigned long flags, void *data_page);
diff --git a/init/do_mounts.c b/init/do_mounts.c
index a7f22cf58c7efd..83db87b6e5d1e0 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -395,16 +395,16 @@ static int __init do_mount_root(const char *name, const 
char *fs,
int ret;
 
if (data) {
-   /* do_mount() requires a full page as fifth argument */
+   /* init_mount() requires a full page as fifth 

[PATCH 11/23] init: add an init_rmdir helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to rmdir with a kernel space file name and switch
the early init code over to it.  Remove the now unused ksys_rmdir.

Signed-off-by: Christoph Hellwig 
---
 fs/init.c | 5 +
 include/linux/init_syscalls.h | 1 +
 include/linux/syscalls.h  | 7 ---
 init/initramfs.c  | 2 +-
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/init.c b/fs/init.c
index 507ffbb5d146d6..eabd9ed2b51092 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -42,3 +42,8 @@ int __init init_unlink(const char *pathname)
 {
return do_unlinkat(AT_FDCWD, getname_kernel(pathname));
 }
+
+int __init init_rmdir(const char *pathname)
+{
+   return do_rmdir(AT_FDCWD, getname_kernel(pathname));
+}
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index 00d597249549ee..abf3af563c0b3a 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -4,3 +4,4 @@ int __init init_mount(const char *dev_name, const char 
*dir_name,
const char *type_page, unsigned long flags, void *data_page);
 int __init init_umount(const char *name, int flags);
 int __init init_unlink(const char *pathname);
+int __init init_rmdir(const char *pathname);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 26f9738e5ab861..a7b14258d245e2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1273,13 +1273,6 @@ int compat_ksys_ipc(u32 call, int first, int second,
  * The following kernel syscall equivalents are just wrappers to fs-internal
  * functions. Therefore, provide stubs to be inlined at the callsites.
  */
-long do_rmdir(int dfd, struct filename *name);
-
-static inline long ksys_rmdir(const char __user *pathname)
-{
-   return do_rmdir(AT_FDCWD, getname(pathname));
-}
-
 extern long do_mkdirat(int dfd, const char __user *pathname, umode_t mode);
 
 static inline long ksys_mkdir(const char __user *pathname, umode_t mode)
diff --git a/init/initramfs.c b/init/initramfs.c
index 7e9db1cfa3c060..fb7210731d9e5d 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -300,7 +300,7 @@ static void __init clean_path(char *path, umode_t fmode)
 
if (!vfs_lstat(path, ) && (st.mode ^ fmode) & S_IFMT) {
if (S_ISDIR(st.mode))
-   ksys_rmdir(path);
+   init_rmdir(path);
else
init_unlink(path);
}
-- 
2.27.0



[PATCH 17/23] init: add an init_link helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to link with a kernel space file name and switch
the early init code over to it.  Remove the now unused ksys_link.

Signed-off-by: Christoph Hellwig 
---
 fs/init.c | 33 +
 fs/internal.h |  3 +--
 fs/namei.c|  4 ++--
 include/linux/init_syscalls.h |  1 +
 include/linux/syscalls.h  |  9 -
 init/initramfs.c  |  2 +-
 6 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/fs/init.c b/fs/init.c
index 6d9af40d2897b1..5db9d9f74868e1 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -122,6 +122,39 @@ int __init init_eaccess(const char *filename)
return error;
 }
 
+int __init init_link(const char *oldname, const char *newname)
+{
+   struct dentry *new_dentry;
+   struct path old_path, new_path;
+   int error;
+
+   error = kern_path(oldname, 0, _path);
+   if (error)
+   return error;
+
+   new_dentry = kern_path_create(AT_FDCWD, newname, _path, 0);
+   error = PTR_ERR(new_dentry);
+   if (IS_ERR(new_dentry))
+   goto out;
+
+   error = -EXDEV;
+   if (old_path.mnt != new_path.mnt)
+   goto out_dput;
+   error = may_linkat(_path);
+   if (unlikely(error))
+   goto out_dput;
+   error = security_path_link(old_path.dentry, _path, new_dentry);
+   if (error)
+   goto out_dput;
+   error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry,
+NULL);
+out_dput:
+   done_path_create(_path, new_dentry);
+out:
+   path_put(_path);
+   return error;
+}
+
 int __init init_unlink(const char *pathname)
 {
return do_unlinkat(AT_FDCWD, getname_kernel(pathname));
diff --git a/fs/internal.h b/fs/internal.h
index 6d82681c7d8372..58451b033d2698 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -69,8 +69,7 @@ long do_rmdir(int dfd, struct filename *name);
 long do_unlinkat(int dfd, struct filename *name);
 long do_symlinkat(const char __user *oldname, int newdfd,
  const char __user *newname);
-int do_linkat(int olddfd, const char __user *oldname, int newdfd,
- const char __user *newname, int flags);
+int may_linkat(struct path *link);
 
 /*
  * namespace.c
diff --git a/fs/namei.c b/fs/namei.c
index d75a6039ae3966..13de64c6be7640 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1024,7 +1024,7 @@ static bool safe_hardlink_source(struct inode *inode)
  *
  * Returns 0 if successful, -ve on error.
  */
-static int may_linkat(struct path *link)
+int may_linkat(struct path *link)
 {
struct inode *inode = link->dentry->d_inode;
 
@@ -4086,7 +4086,7 @@ EXPORT_SYMBOL(vfs_link);
  * with linux 2.0, and to avoid hard-linking to directories
  * and other special files.  --ADM
  */
-int do_linkat(int olddfd, const char __user *oldname, int newdfd,
+static int do_linkat(int olddfd, const char __user *oldname, int newdfd,
  const char __user *newname, int flags)
 {
struct dentry *new_dentry;
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index 7031c0934bee9f..5ca15a5b55b7d7 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -8,5 +8,6 @@ int __init init_chroot(const char *filename);
 int __init init_chown(const char *filename, uid_t user, gid_t group, int 
flags);
 int __init init_chmod(const char *filename, umode_t mode);
 int __init init_eaccess(const char *filename);
+int __init init_link(const char *oldname, const char *newname);
 int __init init_unlink(const char *pathname);
 int __init init_rmdir(const char *pathname);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a2779638e41445..4b18b91ce46573 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1295,15 +1295,6 @@ static inline long ksys_mknod(const char __user 
*filename, umode_t mode,
return do_mknodat(AT_FDCWD, filename, mode, dev);
 }
 
-extern int do_linkat(int olddfd, const char __user *oldname, int newdfd,
-const char __user *newname, int flags);
-
-static inline long ksys_link(const char __user *oldname,
-const char __user *newname)
-{
-   return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
-}
-
 extern int do_fchownat(int dfd, const char __user *filename, uid_t user,
   gid_t group, int flag);
 
diff --git a/init/initramfs.c b/init/initramfs.c
index 21a75f6ca893a9..a3d29318cc351d 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -312,7 +312,7 @@ static int __init maybe_link(void)
char *old = find_link(major, minor, ino, mode, collected);
if (old) {
clean_path(collected, 0);
-   return (ksys_link(old, collected) < 0) ? -1 : 1;
+   return (init_link(old, collected) < 0) ? -1 : 1;
}
}
return 0;
-- 

[PATCH 16/23] init: add an init_eaccess helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to check if a file exists based on kernel space file
name and switch the early init code over to it.  Note that this
theoretically changes behavior as it always is based on the effective
permissions.  But during early init that doesn't make a difference.

Signed-off-by: Christoph Hellwig 
---
 fs/init.c | 13 +
 fs/open.c |  2 +-
 include/linux/init_syscalls.h |  1 +
 include/linux/syscalls.h  |  7 ---
 init/main.c   |  4 ++--
 5 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/fs/init.c b/fs/init.c
index a66032d128b618..6d9af40d2897b1 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -109,6 +109,19 @@ int __init init_chmod(const char *filename, umode_t mode)
return error;
 }
 
+int __init init_eaccess(const char *filename)
+{
+   struct path path;
+   int error;
+
+   error = kern_path(filename, LOOKUP_FOLLOW, );
+   if (error)
+   return error;
+   error = inode_permission(d_inode(path.dentry), MAY_ACCESS);
+   path_put();
+   return error;
+}
+
 int __init init_unlink(const char *pathname)
 {
return do_unlinkat(AT_FDCWD, getname_kernel(pathname));
diff --git a/fs/open.c b/fs/open.c
index 7ba89eae46c560..aafecd1f7ba1a5 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -394,7 +394,7 @@ static const struct cred *access_override_creds(void)
return old_cred;
 }
 
-long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
+static long do_faccessat(int dfd, const char __user *filename, int mode, int 
flags)
 {
struct path path;
struct inode *inode;
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index 2b1b4dc586825f..7031c0934bee9f 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -7,5 +7,6 @@ int __init init_chdir(const char *filename);
 int __init init_chroot(const char *filename);
 int __init init_chown(const char *filename, uid_t user, gid_t group, int 
flags);
 int __init init_chmod(const char *filename, umode_t mode);
+int __init init_eaccess(const char *filename);
 int __init init_unlink(const char *pathname);
 int __init init_rmdir(const char *pathname);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 8b71fa321ca20c..a2779638e41445 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1304,13 +1304,6 @@ static inline long ksys_link(const char __user *oldname,
return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
 }
 
-long do_faccessat(int dfd, const char __user *filename, int mode, int flags);
-
-static inline long ksys_access(const char __user *filename, int mode)
-{
-   return do_faccessat(AT_FDCWD, filename, mode, 0);
-}
-
 extern int do_fchownat(int dfd, const char __user *filename, uid_t user,
   gid_t group, int flag);
 
diff --git a/init/main.c b/init/main.c
index 47698427b15f62..1c710d3e1d461a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -96,6 +96,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1514,8 +1515,7 @@ static noinline void __init kernel_init_freeable(void)
 * check if there is an early userspace init.  If yes, let it do all
 * the work
 */
-   if (ksys_access((const char __user *)
-   ramdisk_execute_command, 0) != 0) {
+   if (init_eaccess(ramdisk_execute_command) != 0) {
ramdisk_execute_command = NULL;
prepare_namespace();
}
-- 
2.27.0



[PATCH 19/23] init: add an init_mkdir helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to mkdir with a kernel space file name and switch
the early init code over to it.  Remove the now unused ksys_mkdir.

Signed-off-by: Christoph Hellwig 
---
 fs/init.c | 18 ++
 fs/internal.h |  1 -
 fs/namei.c|  2 +-
 include/linux/init_syscalls.h |  1 +
 include/linux/syscalls.h  |  7 ---
 init/do_mounts_initrd.c   |  2 +-
 init/initramfs.c  |  2 +-
 init/noinitramfs.c|  5 +++--
 8 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/fs/init.c b/fs/init.c
index 09ef2b58d48caa..127033d0842601 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -176,6 +176,24 @@ int __init init_unlink(const char *pathname)
return do_unlinkat(AT_FDCWD, getname_kernel(pathname));
 }
 
+int __init init_mkdir(const char *pathname, umode_t mode)
+{
+   struct dentry *dentry;
+   struct path path;
+   int error;
+
+   dentry = kern_path_create(AT_FDCWD, pathname, , LOOKUP_DIRECTORY);
+   if (IS_ERR(dentry))
+   return PTR_ERR(dentry);
+   if (!IS_POSIXACL(path.dentry->d_inode))
+   mode &= ~current_umask();
+   error = security_path_mkdir(, dentry, mode);
+   if (!error)
+   error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+   done_path_create(, dentry);
+   return error;
+}
+
 int __init init_rmdir(const char *pathname)
 {
return do_rmdir(AT_FDCWD, getname_kernel(pathname));
diff --git a/fs/internal.h b/fs/internal.h
index 40b50a222d7a22..4741e591e923bf 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -64,7 +64,6 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
   const char *, unsigned int, struct path *);
 long do_mknodat(int dfd, const char __user *filename, umode_t mode,
unsigned int dev);
-long do_mkdirat(int dfd, const char __user *pathname, umode_t mode);
 long do_rmdir(int dfd, struct filename *name);
 long do_unlinkat(int dfd, struct filename *name);
 int may_linkat(struct path *link);
diff --git a/fs/namei.c b/fs/namei.c
index 2f6fa53eb3da28..d6b25dd32f4d50 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3645,7 +3645,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, 
umode_t mode)
 }
 EXPORT_SYMBOL(vfs_mkdir);
 
-long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
+static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
 {
struct dentry *dentry;
struct path path;
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index 125f55ae3f80b8..d808985231f8f8 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -11,4 +11,5 @@ int __init init_eaccess(const char *filename);
 int __init init_link(const char *oldname, const char *newname);
 int __init init_symlink(const char *oldname, const char *newname);
 int __init init_unlink(const char *pathname);
+int __init init_mkdir(const char *pathname, umode_t mode);
 int __init init_rmdir(const char *pathname);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 7cdc0d749a049f..5ef77a91382aa5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1270,13 +1270,6 @@ int compat_ksys_ipc(u32 call, int first, int second,
  * The following kernel syscall equivalents are just wrappers to fs-internal
  * functions. Therefore, provide stubs to be inlined at the callsites.
  */
-extern long do_mkdirat(int dfd, const char __user *pathname, umode_t mode);
-
-static inline long ksys_mkdir(const char __user *pathname, umode_t mode)
-{
-   return do_mkdirat(AT_FDCWD, pathname, mode);
-}
-
 extern long do_mknodat(int dfd, const char __user *filename, umode_t mode,
   unsigned int dev);
 
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index a6b447b191dbc8..3f5ac81913dde4 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -81,7 +81,7 @@ static void __init handle_initrd(void)
create_dev("/dev/root.old", Root_RAM0);
/* mount initrd on rootfs' /root */
mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY);
-   ksys_mkdir("/old", 0700);
+   init_mkdir("/old", 0700);
init_chdir("/old");
 
/*
diff --git a/init/initramfs.c b/init/initramfs.c
index b74d18657e7a17..23513e4419e052 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -348,7 +348,7 @@ static int __init do_name(void)
state = CopyFile;
}
} else if (S_ISDIR(mode)) {
-   ksys_mkdir(collected, mode);
+   init_mkdir(collected, mode);
init_chown(collected, uid, gid, 0);
init_chmod(collected, mode);
dir_add(collected, mtime);
diff --git a/init/noinitramfs.c b/init/noinitramfs.c
index fa9cdfa7101d3c..94cc4df74b11f2 100644
--- a/init/noinitramfs.c
+++ b/init/noinitramfs.c
@@ -9,6 +9,7 @@
 #include 
 

[PATCH 22/23] init: add an init_utimes helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to set timestamps with a kernel space file name and
switch the early init code over to it.

Signed-off-by: Christoph Hellwig 
---
 fs/init.c | 13 +
 include/linux/init_syscalls.h |  1 +
 init/initramfs.c  |  3 +--
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/fs/init.c b/fs/init.c
index 51646ba38099e6..db5c48a85644fa 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -238,3 +238,16 @@ int __init init_rmdir(const char *pathname)
 {
return do_rmdir(AT_FDCWD, getname_kernel(pathname));
 }
+
+int __init init_utimes(char *filename, struct timespec64 *ts)
+{
+   struct path path;
+   int error;
+
+   error = kern_path(filename, 0, );
+   if (error)
+   return error;
+   error = vfs_utimes(, ts);
+   path_put();
+   return error;
+}
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index b2fda50daca6c5..3654b525ac0b17 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -15,3 +15,4 @@ int __init init_symlink(const char *oldname, const char 
*newname);
 int __init init_unlink(const char *pathname);
 int __init init_mkdir(const char *pathname, umode_t mode);
 int __init init_rmdir(const char *pathname);
+int __init init_utimes(char *filename, struct timespec64 *ts);
diff --git a/init/initramfs.c b/init/initramfs.c
index 8f7e39f06547ff..d5351737624edd 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -111,8 +111,7 @@ static long __init do_utime(char *filename, time64_t mtime)
t[0].tv_nsec = 0;
t[1].tv_sec = mtime;
t[1].tv_nsec = 0;
-
-   return do_utimes(AT_FDCWD, filename, t, AT_SYMLINK_NOFOLLOW);
+   return init_utimes(filename, t);
 }
 
 static __initdata LIST_HEAD(dir_list);
-- 
2.27.0



Re: [PATCH net-next v2] net: dsa: qca8k: Add 802.1q VLAN support

2020-07-28 Thread Vladimir Oltean
Hi Jonathan,

On Sun, Jul 26, 2020 at 03:56:11PM +0100, Jonathan McDowell wrote:
> This adds full 802.1q VLAN support to the qca8k, allowing the use of
> vlan_filtering and more complicated bridging setups than allowed by
> basic port VLAN support.
> 
> Tested with a number of untagged ports with separate VLANs and then a
> trunk port with all the VLANs tagged on it.
> 
> v2:
> - Return sensible errnos on failure rather than -1 (rmk)
> - Style cleanups based on Florian's feedback
> - Silently allow VLAN 0 as device correctly treats this as no tag
> 
> Signed-off-by: Jonathan McDowell 
> ---

This generally looks ok. The integration with the APIs is fine.
Some comments below.

>  drivers/net/dsa/qca8k.c | 191 ++--
>  drivers/net/dsa/qca8k.h |  28 ++
>  2 files changed, 214 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
> index a5566de82853..1cc61bc8929f 100644
> --- a/drivers/net/dsa/qca8k.c
> +++ b/drivers/net/dsa/qca8k.c
> @@ -408,6 +408,111 @@ qca8k_fdb_flush(struct qca8k_priv *priv)
>   mutex_unlock(>reg_mutex);
>  }
>  
> +static int
> +qca8k_vlan_access(struct qca8k_priv *priv, enum qca8k_vlan_cmd cmd, u16 vid)
> +{
> + u32 reg;
> +
> + /* Set the command and VLAN index */
> + reg = QCA8K_VTU_FUNC1_BUSY;
> + reg |= cmd;
> + reg |= vid << QCA8K_VTU_FUNC1_VID_S;
> +
> + /* Write the function register triggering the table access */
> + qca8k_write(priv, QCA8K_REG_VTU_FUNC1, reg);
> +
> + /* wait for completion */
> + if (qca8k_busy_wait(priv, QCA8K_REG_VTU_FUNC1, QCA8K_VTU_FUNC1_BUSY))
> + return -ETIMEDOUT;
> +
> + /* Check for table full violation when adding an entry */
> + if (cmd == QCA8K_VLAN_LOAD) {
> + reg = qca8k_read(priv, QCA8K_REG_VTU_FUNC1);
> + if (reg & QCA8K_VTU_FUNC1_FULL)
> + return -ENOMEM;
> + }
> +
> + return 0;
> +}
> +
> +static int
> +qca8k_vlan_add(struct qca8k_priv *priv, u8 port, u16 vid, bool tagged)

It is customary to keep referring to this bool as 'untagged' for
consistency with many other parts of the kernel.

> +{
> + u32 reg;
> + int ret;
> +
> + /* We do the right thing with VLAN 0 and treat it as untagged */

...while also preserving the tag on egress.

> + if (vid == 0)
> + return 0;
> +
> + mutex_lock(>reg_mutex);

Unrelated, but what's the purpose of this mutex?

> + ret = qca8k_vlan_access(priv, QCA8K_VLAN_READ, vid);
> + if (ret < 0)
> + goto out;
> +
> + reg = qca8k_read(priv, QCA8K_REG_VTU_FUNC0);
> + reg |= QCA8K_VTU_FUNC0_VALID | QCA8K_VTU_FUNC0_IVL_EN;
> + reg &= ~(3 << QCA8K_VTU_FUNC0_EG_MODE_S(port));
> + if (tagged)
> + reg |= QCA8K_VTU_FUNC0_EG_MODE_TAG <<
> + QCA8K_VTU_FUNC0_EG_MODE_S(port);
> + else
> + reg |= QCA8K_VTU_FUNC0_EG_MODE_UNTAG <<
> + QCA8K_VTU_FUNC0_EG_MODE_S(port);
> +

Not thrilled about the "3 <<" thing, maybe a definition like the one
below would look better:

#define QCA8K_VTU_FUNC_REG0_EG_VLAN_MODE_MASK(port) \
GENMASK(5 + (port) * 2, 4 + (port) * 2)

...

int eg_vlan_mode = QCA8K_VTU_FUNC_REG0_EG_MODE_TAG;

reg &= ~QCA8K_VTU_FUNC_REG0_EG_VLAN_MODE_MASK(port);
if (tagged)
eg_vlan_mode = QCA8K_VTU_FUNC_REG0_EG_MODE_UNTAG;
reg |= QCA8K_VTU_FUNC_REG0_EG_MODE(eg_vlan_mode, port);

Your call if you want to change this, though.

> + qca8k_write(priv, QCA8K_REG_VTU_FUNC0, reg);
> + ret = qca8k_vlan_access(priv, QCA8K_VLAN_LOAD, vid);
> +
> +out:
> + mutex_unlock(>reg_mutex);
> +
> + return ret;
> +}
> +
> +static int
> +qca8k_vlan_del(struct qca8k_priv *priv, u8 port, u16 vid)
> +{
> + u32 reg;
> + u32 mask;
> + int ret;
> + int i;
> + bool del;

How about:

u32 reg, mask;
int ret, i;
bool del;

> +
> + mutex_lock(>reg_mutex);
> + ret = qca8k_vlan_access(priv, QCA8K_VLAN_READ, vid);
> + if (ret < 0)
> + goto out;
> +
> + reg = qca8k_read(priv, QCA8K_REG_VTU_FUNC0);
> + reg &= ~(3 << QCA8K_VTU_FUNC0_EG_MODE_S(port));
> + reg |= QCA8K_VTU_FUNC0_EG_MODE_NOT <<
> + QCA8K_VTU_FUNC0_EG_MODE_S(port);
> +
> + /* Check if we're the last member to be removed */
> + del = true;
> + for (i = 0; i < QCA8K_NUM_PORTS; i++) {
> + mask = QCA8K_VTU_FUNC0_EG_MODE_NOT;
> + mask <<= QCA8K_VTU_FUNC0_EG_MODE_S(i);
> +
> + if ((reg & mask) != mask) {
> + del = false;
> + break;
> + }
> + }
> +
> + if (del) {
> + ret = qca8k_vlan_access(priv, QCA8K_VLAN_PURGE, vid);
> + } else {
> + qca8k_write(priv, QCA8K_REG_VTU_FUNC0, reg);
> + ret = qca8k_vlan_access(priv, QCA8K_VLAN_LOAD, vid);
> + }
> +
> 

[PATCH 07/23] init: mark create_dev as __init

2020-07-28 Thread Christoph Hellwig
This helper is only used for the early init code.

Signed-off-by: Christoph Hellwig 
---
 init/do_mounts.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/init/do_mounts.h b/init/do_mounts.h
index c855b3f0e06d19..021e2f60223e25 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -13,7 +13,7 @@ void  mount_block_root(char *name, int flags);
 void  mount_root(void);
 extern int root_mountflags;
 
-static inline int create_dev(char *name, dev_t dev)
+static inline __init int create_dev(char *name, dev_t dev)
 {
ksys_unlink(name);
return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
-- 
2.27.0



[PATCH 20/23] init: add an init_mknod helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to mknod with a kernel space file name and switch
the early init code over to it.  Remove the now unused ksys_mknod.

Signed-off-by: Christoph Hellwig 
---
 fs/init.c | 25 +
 fs/internal.h |  2 --
 fs/namei.c|  2 +-
 include/linux/init_syscalls.h |  1 +
 include/linux/syscalls.h  |  9 -
 init/do_mounts.h  |  2 +-
 init/initramfs.c  |  2 +-
 init/noinitramfs.c|  3 +--
 8 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/fs/init.c b/fs/init.c
index 127033d0842601..145fb31b7a5f2d 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -122,6 +122,31 @@ int __init init_eaccess(const char *filename)
return error;
 }
 
+int __init init_mknod(const char *filename, umode_t mode, unsigned int dev)
+{
+   struct dentry *dentry;
+   struct path path;
+   int error;
+
+   if (S_ISFIFO(mode) || S_ISSOCK(mode))
+   dev = 0;
+   else if (!(S_ISBLK(mode) || S_ISCHR(mode)))
+   return -EINVAL;
+
+   dentry = kern_path_create(AT_FDCWD, filename, , 0);
+   if (IS_ERR(dentry))
+   return PTR_ERR(dentry);
+
+   if (!IS_POSIXACL(path.dentry->d_inode))
+   mode &= ~current_umask();
+   error = security_path_mknod(, dentry, mode, dev);
+   if (!error)
+   error = vfs_mknod(path.dentry->d_inode, dentry, mode,
+ new_decode_dev(dev));
+   done_path_create(, dentry);
+   return error;
+}
+
 int __init init_link(const char *oldname, const char *newname)
 {
struct dentry *new_dentry;
diff --git a/fs/internal.h b/fs/internal.h
index 4741e591e923bf..07e145b2f88c4a 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -62,8 +62,6 @@ extern int filename_lookup(int dfd, struct filename *name, 
unsigned flags,
   struct path *path, struct path *root);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
   const char *, unsigned int, struct path *);
-long do_mknodat(int dfd, const char __user *filename, umode_t mode,
-   unsigned int dev);
 long do_rmdir(int dfd, struct filename *name);
 long do_unlinkat(int dfd, struct filename *name);
 int may_linkat(struct path *link);
diff --git a/fs/namei.c b/fs/namei.c
index d6b25dd32f4d50..fde8fe086c090d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3564,7 +3564,7 @@ static int may_mknod(umode_t mode)
}
 }
 
-long do_mknodat(int dfd, const char __user *filename, umode_t mode,
+static long do_mknodat(int dfd, const char __user *filename, umode_t mode,
unsigned int dev)
 {
struct dentry *dentry;
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index d808985231f8f8..fa1fe7a877795f 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -8,6 +8,7 @@ int __init init_chroot(const char *filename);
 int __init init_chown(const char *filename, uid_t user, gid_t group, int 
flags);
 int __init init_chmod(const char *filename, umode_t mode);
 int __init init_eaccess(const char *filename);
+int __init init_mknod(const char *filename, umode_t mode, unsigned int dev);
 int __init init_link(const char *oldname, const char *newname);
 int __init init_symlink(const char *oldname, const char *newname);
 int __init init_unlink(const char *pathname);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5ef77a91382aa5..63046c5e9fc5d4 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1270,15 +1270,6 @@ int compat_ksys_ipc(u32 call, int first, int second,
  * The following kernel syscall equivalents are just wrappers to fs-internal
  * functions. Therefore, provide stubs to be inlined at the callsites.
  */
-extern long do_mknodat(int dfd, const char __user *filename, umode_t mode,
-  unsigned int dev);
-
-static inline long ksys_mknod(const char __user *filename, umode_t mode,
- unsigned int dev)
-{
-   return do_mknodat(AT_FDCWD, filename, mode, dev);
-}
-
 extern int do_fchownat(int dfd, const char __user *filename, uid_t user,
   gid_t group, int flag);
 
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 104d8431725aeb..7a29ac3e427bab 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -17,7 +17,7 @@ extern int root_mountflags;
 static inline __init int create_dev(char *name, dev_t dev)
 {
init_unlink(name);
-   return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
+   return init_mknod(name, S_IFBLK | 0600, new_encode_dev(dev));
 }
 
 #ifdef CONFIG_BLK_DEV_RAM
diff --git a/init/initramfs.c b/init/initramfs.c
index 23513e4419e052..ad5800c2d8e206 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -355,7 +355,7 @@ static int __init do_name(void)
} else if (S_ISBLK(mode) || S_ISCHR(mode) ||
   S_ISFIFO(mode) 

[PATCH 06/23] init: mark console_on_rootfs as __init

2020-07-28 Thread Christoph Hellwig
This helper is only used for the early init code.

Signed-off-by: Christoph Hellwig 
---
 init/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/init/main.c b/init/main.c
index c2c9143db96795..47698427b15f62 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1458,7 +1458,7 @@ static int __ref kernel_init(void *unused)
 }
 
 /* Open /dev/console, for stdin/stdout/stderr, this should never fail */
-void console_on_rootfs(void)
+void __init console_on_rootfs(void)
 {
struct file *file = filp_open("/dev/console", O_RDWR, 0);
 
-- 
2.27.0



[PATCH 21/23] init: add an init_stat helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to stat with a kernel space file name and switch
the early init code over to it.

Signed-off-by: Christoph Hellwig 
---
 drivers/md/md-autodetect.c|  3 ++-
 fs/init.c | 15 +++
 include/linux/init_syscalls.h |  1 +
 init/initramfs.c  |  3 ++-
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
index 14b6e86814c061..6bbec89976a748 100644
--- a/drivers/md/md-autodetect.c
+++ b/drivers/md/md-autodetect.c
@@ -5,6 +5,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -151,7 +152,7 @@ static void __init md_setup_drive(struct md_setup_args 
*args)
if (strncmp(devname, "/dev/", 5) == 0)
devname += 5;
snprintf(comp_name, 63, "/dev/%s", devname);
-   if (vfs_stat(comp_name, ) == 0 && S_ISBLK(stat.mode))
+   if (init_stat(comp_name, , 0) == 0 && S_ISBLK(stat.mode))
dev = new_decode_dev(stat.rdev);
if (!dev) {
pr_warn("md: Unknown device name: %s\n", devname);
diff --git a/fs/init.c b/fs/init.c
index 145fb31b7a5f2d..51646ba38099e6 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -122,6 +122,21 @@ int __init init_eaccess(const char *filename)
return error;
 }
 
+int __init init_stat(const char *filename, struct kstat *stat, int flags)
+{
+   int lookup_flags = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+   struct path path;
+   int error;
+
+   error = kern_path(filename, lookup_flags, );
+   if (error)
+   return error;
+   error = vfs_getattr(, stat, STATX_BASIC_STATS,
+   flags | AT_NO_AUTOMOUNT);
+   path_put();
+   return error;
+}
+
 int __init init_mknod(const char *filename, umode_t mode, unsigned int dev)
 {
struct dentry *dentry;
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index fa1fe7a877795f..b2fda50daca6c5 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -8,6 +8,7 @@ int __init init_chroot(const char *filename);
 int __init init_chown(const char *filename, uid_t user, gid_t group, int 
flags);
 int __init init_chmod(const char *filename, umode_t mode);
 int __init init_eaccess(const char *filename);
+int __init init_stat(const char *filename, struct kstat *stat, int flags);
 int __init init_mknod(const char *filename, umode_t mode, unsigned int dev);
 int __init init_link(const char *oldname, const char *newname);
 int __init init_symlink(const char *oldname, const char *newname);
diff --git a/init/initramfs.c b/init/initramfs.c
index ad5800c2d8e206..8f7e39f06547ff 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -298,7 +298,8 @@ static void __init clean_path(char *path, umode_t fmode)
 {
struct kstat st;
 
-   if (!vfs_lstat(path, ) && (st.mode ^ fmode) & S_IFMT) {
+   if (init_stat(path, , AT_SYMLINK_NOFOLLOW) &&
+   (st.mode ^ fmode) & S_IFMT) {
if (S_ISDIR(st.mode))
init_rmdir(path);
else
-- 
2.27.0



[PATCH 05/23] init: initialize ramdisk_execute_command at compile time

2020-07-28 Thread Christoph Hellwig
Set ramdisk_execute_command to "/init" at compile time.  The command
line can still override it, but this saves a few instructions and
removes a NULL check.

Signed-off-by: Christoph Hellwig 
---
 init/main.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/init/main.c b/init/main.c
index db0621dfbb0468..c2c9143db96795 100644
--- a/init/main.c
+++ b/init/main.c
@@ -154,7 +154,7 @@ static bool initargs_found;
 #endif
 
 static char *execute_command;
-static char *ramdisk_execute_command;
+static char *ramdisk_execute_command = "/init";
 
 /*
  * Used to generate warnings if static_key manipulation functions are used
@@ -1514,10 +1514,6 @@ static noinline void __init kernel_init_freeable(void)
 * check if there is an early userspace init.  If yes, let it do all
 * the work
 */
-
-   if (!ramdisk_execute_command)
-   ramdisk_execute_command = "/init";
-
if (ksys_access((const char __user *)
ramdisk_execute_command, 0) != 0) {
ramdisk_execute_command = NULL;
-- 
2.27.0



[PATCH 23/23] init: add an init_dup helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to grab a reference to a file and install it at
the next available fd, and switch the early init code over to it.

Signed-off-by: Christoph Hellwig 
---
 fs/init.c | 12 
 include/linux/init_syscalls.h |  1 +
 init/main.c   |  7 +++
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/fs/init.c b/fs/init.c
index db5c48a85644fa..730e05acda2392 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include "internal.h"
@@ -251,3 +252,14 @@ int __init init_utimes(char *filename, struct timespec64 
*ts)
path_put();
return error;
 }
+
+int __init init_dup(struct file *file)
+{
+   int fd;
+
+   fd = get_unused_fd_flags(0);
+   if (fd < 0)
+   return fd;
+   fd_install(get_unused_fd_flags(0), get_file(file));
+   return 0;
+}
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index 3654b525ac0b17..92045d18cbfc99 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -16,3 +16,4 @@ int __init init_unlink(const char *pathname);
 int __init init_mkdir(const char *pathname, umode_t mode);
 int __init init_rmdir(const char *pathname);
 int __init init_utimes(char *filename, struct timespec64 *ts);
+int __init init_dup(struct file *file);
diff --git a/init/main.c b/init/main.c
index 1c710d3e1d461a..089e21504b1fc1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1467,10 +1467,9 @@ void __init console_on_rootfs(void)
pr_err("Warning: unable to open an initial console.\n");
return;
}
-   get_file_rcu_many(file, 2);
-   fd_install(get_unused_fd_flags(0), file);
-   fd_install(get_unused_fd_flags(0), file);
-   fd_install(get_unused_fd_flags(0), file);
+   init_dup(file);
+   init_dup(file);
+   init_dup(file);
 }
 
 static noinline void __init kernel_init_freeable(void)
-- 
2.27.0



[PATCH 18/23] init: add an init_symlink helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to symlink with a kernel space file name and switch
the early init code over to it.  Remove the now unused ksys_symlink.

Signed-off-by: Christoph Hellwig 
---
 fs/init.c | 16 
 fs/internal.h |  2 --
 fs/namei.c|  2 +-
 include/linux/init_syscalls.h |  1 +
 include/linux/syscalls.h  |  9 -
 init/initramfs.c  |  2 +-
 6 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/fs/init.c b/fs/init.c
index 5db9d9f74868e1..09ef2b58d48caa 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -155,6 +155,22 @@ int __init init_link(const char *oldname, const char 
*newname)
return error;
 }
 
+int __init init_symlink(const char *oldname, const char *newname)
+{
+   struct dentry *dentry;
+   struct path path;
+   int error;
+
+   dentry = kern_path_create(AT_FDCWD, newname, , 0);
+   if (IS_ERR(dentry))
+   return PTR_ERR(dentry);
+   error = security_path_symlink(, dentry, oldname);
+   if (!error)
+   error = vfs_symlink(path.dentry->d_inode, dentry, oldname);
+   done_path_create(, dentry);
+   return error;
+}
+
 int __init init_unlink(const char *pathname)
 {
return do_unlinkat(AT_FDCWD, getname_kernel(pathname));
diff --git a/fs/internal.h b/fs/internal.h
index 58451b033d2698..40b50a222d7a22 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -67,8 +67,6 @@ long do_mknodat(int dfd, const char __user *filename, umode_t 
mode,
 long do_mkdirat(int dfd, const char __user *pathname, umode_t mode);
 long do_rmdir(int dfd, struct filename *name);
 long do_unlinkat(int dfd, struct filename *name);
-long do_symlinkat(const char __user *oldname, int newdfd,
- const char __user *newname);
 int may_linkat(struct path *link);
 
 /*
diff --git a/fs/namei.c b/fs/namei.c
index 13de64c6be7640..2f6fa53eb3da28 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3955,7 +3955,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, 
const char *oldname)
 }
 EXPORT_SYMBOL(vfs_symlink);
 
-long do_symlinkat(const char __user *oldname, int newdfd,
+static long do_symlinkat(const char __user *oldname, int newdfd,
  const char __user *newname)
 {
int error;
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index 5ca15a5b55b7d7..125f55ae3f80b8 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -9,5 +9,6 @@ int __init init_chown(const char *filename, uid_t user, gid_t 
group, int flags);
 int __init init_chmod(const char *filename, umode_t mode);
 int __init init_eaccess(const char *filename);
 int __init init_link(const char *oldname, const char *newname);
+int __init init_symlink(const char *oldname, const char *newname);
 int __init init_unlink(const char *pathname);
 int __init init_rmdir(const char *pathname);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 4b18b91ce46573..7cdc0d749a049f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1277,15 +1277,6 @@ static inline long ksys_mkdir(const char __user 
*pathname, umode_t mode)
return do_mkdirat(AT_FDCWD, pathname, mode);
 }
 
-extern long do_symlinkat(const char __user *oldname, int newdfd,
-const char __user *newname);
-
-static inline long ksys_symlink(const char __user *oldname,
-   const char __user *newname)
-{
-   return do_symlinkat(oldname, AT_FDCWD, newname);
-}
-
 extern long do_mknodat(int dfd, const char __user *filename, umode_t mode,
   unsigned int dev);
 
diff --git a/init/initramfs.c b/init/initramfs.c
index a3d29318cc351d..b74d18657e7a17 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -393,7 +393,7 @@ static int __init do_symlink(void)
 {
collected[N_ALIGN(name_len) + body_len] = '\0';
clean_path(collected, 0);
-   ksys_symlink(collected + N_ALIGN(name_len), collected);
+   init_symlink(collected + N_ALIGN(name_len), collected);
init_chown(collected, uid, gid, AT_SYMLINK_NOFOLLOW);
do_utime(collected, mtime);
state = SkipIt;
-- 
2.27.0



[PATCH 12/23] init: add an init_chdir helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to chdir with a kernel space file name and switch
the early init code over to it.  Remove the now unused ksys_chdir.

Signed-off-by: Christoph Hellwig 
---
 drivers/base/devtmpfs.c   |  2 +-
 fs/init.c | 16 
 fs/open.c |  7 +--
 include/linux/init_syscalls.h |  1 +
 include/linux/syscalls.h  |  1 -
 init/do_mounts.c  |  2 +-
 init/do_mounts_initrd.c   |  8 
 7 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 32af6cb987b428..e48aaba3166b5d 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -412,7 +412,7 @@ static int __init devtmpfs_setup(void *p)
err = init_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL);
if (err)
goto out;
-   ksys_chdir("/.."); /* will traverse into overmounted root */
+   init_chdir("/.."); /* will traverse into overmounted root */
ksys_chroot(".");
 out:
*(int *)p = err;
diff --git a/fs/init.c b/fs/init.c
index eabd9ed2b51092..64d4e12eba9339 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -7,6 +7,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include "internal.h"
 
@@ -38,6 +39,21 @@ int __init init_umount(const char *name, int flags)
return path_umount(, flags);
 }
 
+int __init init_chdir(const char *filename)
+{
+   struct path path;
+   int error;
+
+   error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, );
+   if (error)
+   return error;
+   error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+   if (!error)
+   set_fs_pwd(current->fs, );
+   path_put();
+   return error;
+}
+
 int __init init_unlink(const char *pathname)
 {
return do_unlinkat(AT_FDCWD, getname_kernel(pathname));
diff --git a/fs/open.c b/fs/open.c
index b316dd6a86a8b9..723e0ac898935e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -482,7 +482,7 @@ SYSCALL_DEFINE2(access, const char __user *, filename, int, 
mode)
return do_faccessat(AT_FDCWD, filename, mode, 0);
 }
 
-int ksys_chdir(const char __user *filename)
+SYSCALL_DEFINE1(chdir, const char __user *, filename)
 {
struct path path;
int error;
@@ -508,11 +508,6 @@ int ksys_chdir(const char __user *filename)
return error;
 }
 
-SYSCALL_DEFINE1(chdir, const char __user *, filename)
-{
-   return ksys_chdir(filename);
-}
-
 SYSCALL_DEFINE1(fchdir, unsigned int, fd)
 {
struct fd f = fdget_raw(fd);
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index abf3af563c0b3a..1e845910ae56e9 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -3,5 +3,6 @@
 int __init init_mount(const char *dev_name, const char *dir_name,
const char *type_page, unsigned long flags, void *data_page);
 int __init init_umount(const char *name, int flags);
+int __init init_chdir(const char *filename);
 int __init init_unlink(const char *pathname);
 int __init init_rmdir(const char *pathname);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a7b14258d245e2..31fa67fb9894b3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1238,7 +1238,6 @@ asmlinkage long sys_ni_syscall(void);
 
 int ksys_chroot(const char __user *filename);
 ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count);
-int ksys_chdir(const char __user *filename);
 int ksys_fchown(unsigned int fd, uid_t user, gid_t group);
 ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count);
 void ksys_sync(void);
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 83db87b6e5d1e0..a7581c6e85f268 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -408,7 +408,7 @@ static int __init do_mount_root(const char *name, const 
char *fs,
if (ret)
goto out;
 
-   ksys_chdir("/root");
+   init_chdir("/root");
s = current->fs->pwd.dentry->d_sb;
ROOT_DEV = s->s_dev;
printk(KERN_INFO
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 8b44dd017842a8..04627fd22a921f 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -61,7 +61,7 @@ static int __init init_linuxrc(struct subprocess_info *info, 
struct cred *new)
ksys_unshare(CLONE_FS | CLONE_FILES);
console_on_rootfs();
/* move initrd over / and chdir/chroot in initrd root */
-   ksys_chdir("/root");
+   init_chdir("/root");
init_mount(".", "/", NULL, MS_MOVE, NULL);
ksys_chroot(".");
ksys_setsid();
@@ -82,7 +82,7 @@ static void __init handle_initrd(void)
/* mount initrd on rootfs' /root */
mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY);
ksys_mkdir("/old", 0700);
-   ksys_chdir("/old");
+   init_chdir("/old");
 
/*
 * In case that a resume from disk is 

[PATCH 14/23] init: add an init_chown helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to chown with a kernel space file name and switch
the early init code over to it.

Signed-off-by: Christoph Hellwig 
---
 fs/init.c | 18 ++
 fs/internal.h |  2 +-
 fs/open.c |  2 +-
 include/linux/init_syscalls.h |  1 +
 init/initramfs.c  |  6 +++---
 5 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/fs/init.c b/fs/init.c
index 2c78f24814dde4..edd0244655956e 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -78,6 +78,24 @@ int __init init_chroot(const char *filename)
return error;
 }
 
+int __init init_chown(const char *filename, uid_t user, gid_t group, int flags)
+{
+   int lookup_flags = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+   struct path path;
+   int error;
+
+   error = kern_path(filename, lookup_flags, );
+   if (error)
+   return error;
+   error = mnt_want_write(path.mnt);
+   if (!error) {
+   error = chown_common(, user, group);
+   mnt_drop_write(path.mnt);
+   }
+   path_put();
+   return error;
+}
+
 int __init init_unlink(const char *pathname)
 {
return do_unlinkat(AT_FDCWD, getname_kernel(pathname));
diff --git a/fs/internal.h b/fs/internal.h
index 491d1e63809b37..e81b9e23c3ea3f 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -134,7 +134,7 @@ long do_sys_ftruncate(unsigned int fd, loff_t length, int 
small);
 int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
 int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
-
+int chown_common(const struct path *path, uid_t user, gid_t group);
 extern int vfs_open(const struct path *, struct file *);
 
 /*
diff --git a/fs/open.c b/fs/open.c
index f62f4752bb436d..49960a1248f14b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -639,7 +639,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, 
umode_t, mode)
return do_fchmodat(AT_FDCWD, filename, mode);
 }
 
-static int chown_common(const struct path *path, uid_t user, gid_t group)
+int chown_common(const struct path *path, uid_t user, gid_t group)
 {
struct inode *inode = path->dentry->d_inode;
struct inode *delegated_inode = NULL;
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index e07099a14b91db..0da59d76133e17 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -5,5 +5,6 @@ int __init init_mount(const char *dev_name, const char 
*dir_name,
 int __init init_umount(const char *name, int flags);
 int __init init_chdir(const char *filename);
 int __init init_chroot(const char *filename);
+int __init init_chown(const char *filename, uid_t user, gid_t group, int 
flags);
 int __init init_unlink(const char *pathname);
 int __init init_rmdir(const char *pathname);
diff --git a/init/initramfs.c b/init/initramfs.c
index fb7210731d9e5d..24a8dcc6734064 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -349,14 +349,14 @@ static int __init do_name(void)
}
} else if (S_ISDIR(mode)) {
ksys_mkdir(collected, mode);
-   ksys_chown(collected, uid, gid);
+   init_chown(collected, uid, gid, 0);
ksys_chmod(collected, mode);
dir_add(collected, mtime);
} else if (S_ISBLK(mode) || S_ISCHR(mode) ||
   S_ISFIFO(mode) || S_ISSOCK(mode)) {
if (maybe_link() == 0) {
ksys_mknod(collected, mode, rdev);
-   ksys_chown(collected, uid, gid);
+   init_chown(collected, uid, gid, 0);
ksys_chmod(collected, mode);
do_utime(collected, mtime);
}
@@ -394,7 +394,7 @@ static int __init do_symlink(void)
collected[N_ALIGN(name_len) + body_len] = '\0';
clean_path(collected, 0);
ksys_symlink(collected + N_ALIGN(name_len), collected);
-   ksys_lchown(collected, uid, gid);
+   init_chown(collected, uid, gid, AT_SYMLINK_NOFOLLOW);
do_utime(collected, mtime);
state = SkipIt;
next_state = Reset;
-- 
2.27.0



[PATCH 10/23] init: add an init_unlink helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to unlink with a kernel space file name and switch
the early init code over to it.  Remove the now unused ksys_unlink.

Signed-off-by: Christoph Hellwig 
---
 fs/init.c | 5 +
 include/linux/init_syscalls.h | 1 +
 include/linux/syscalls.h  | 7 ---
 init/do_mounts.h  | 2 +-
 init/do_mounts_initrd.c   | 4 ++--
 init/do_mounts_rd.c   | 2 +-
 init/initramfs.c  | 3 ++-
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/init.c b/fs/init.c
index 9c8e31fdb048c8..507ffbb5d146d6 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -37,3 +37,8 @@ int __init init_umount(const char *name, int flags)
return ret;
return path_umount(, flags);
 }
+
+int __init init_unlink(const char *pathname)
+{
+   return do_unlinkat(AT_FDCWD, getname_kernel(pathname));
+}
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index a5a2e7f1991691..00d597249549ee 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -3,3 +3,4 @@
 int __init init_mount(const char *dev_name, const char *dir_name,
const char *type_page, unsigned long flags, void *data_page);
 int __init init_umount(const char *name, int flags);
+int __init init_unlink(const char *pathname);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 1a4f5d8ee7044b..26f9738e5ab861 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1273,13 +1273,6 @@ int compat_ksys_ipc(u32 call, int first, int second,
  * The following kernel syscall equivalents are just wrappers to fs-internal
  * functions. Therefore, provide stubs to be inlined at the callsites.
  */
-extern long do_unlinkat(int dfd, struct filename *name);
-
-static inline long ksys_unlink(const char __user *pathname)
-{
-   return do_unlinkat(AT_FDCWD, getname(pathname));
-}
-
 long do_rmdir(int dfd, struct filename *name);
 
 static inline long ksys_rmdir(const char __user *pathname)
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 20e7fec8cb499e..104d8431725aeb 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -16,7 +16,7 @@ extern int root_mountflags;
 
 static inline __init int create_dev(char *name, dev_t dev)
 {
-   ksys_unlink(name);
+   init_unlink(name);
return ksys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
 }
 
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 6b020a06990251..8b44dd017842a8 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -137,11 +137,11 @@ bool __init initrd_load(void)
 * mounted in the normal path.
 */
if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) {
-   ksys_unlink("/initrd.image");
+   init_unlink("/initrd.image");
handle_initrd();
return true;
}
}
-   ksys_unlink("/initrd.image");
+   init_unlink("/initrd.image");
return false;
 }
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index d4255c10432a8b..ac021ae6e6fa78 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -272,7 +272,7 @@ int __init rd_load_image(char *from)
fput(out_file);
 out:
kfree(buf);
-   ksys_unlink("/dev/ram");
+   init_unlink("/dev/ram");
return res;
 }
 
diff --git a/init/initramfs.c b/init/initramfs.c
index 584bc8fe88e77c..7e9db1cfa3c060 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static ssize_t __init xwrite(struct file *file, const char *p, size_t count,
loff_t *pos)
@@ -301,7 +302,7 @@ static void __init clean_path(char *path, umode_t fmode)
if (S_ISDIR(st.mode))
ksys_rmdir(path);
else
-   ksys_unlink(path);
+   init_unlink(path);
}
 }
 
-- 
2.27.0



[PATCH 15/23] init: add an init_chmod helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to chmod with a kernel space file name and switch
the early init code over to it.

Signed-off-by: Christoph Hellwig 
---
 fs/init.c | 13 +
 fs/internal.h |  2 +-
 fs/open.c |  4 ++--
 include/linux/init_syscalls.h |  1 +
 include/linux/syscalls.h  |  7 ---
 init/initramfs.c  |  4 ++--
 6 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/fs/init.c b/fs/init.c
index edd0244655956e..a66032d128b618 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -96,6 +96,19 @@ int __init init_chown(const char *filename, uid_t user, 
gid_t group, int flags)
return error;
 }
 
+int __init init_chmod(const char *filename, umode_t mode)
+{
+   struct path path;
+   int error;
+
+   error = kern_path(filename, LOOKUP_FOLLOW, );
+   if (error)
+   return error;
+   error = chmod_common(, mode);
+   path_put();
+   return error;
+}
+
 int __init init_unlink(const char *pathname)
 {
return do_unlinkat(AT_FDCWD, getname_kernel(pathname));
diff --git a/fs/internal.h b/fs/internal.h
index e81b9e23c3ea3f..6d82681c7d8372 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -131,7 +131,7 @@ extern struct open_how build_open_how(int flags, umode_t 
mode);
 extern int build_open_flags(const struct open_how *how, struct open_flags *op);
 
 long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
-int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
+int chmod_common(const struct path *path, umode_t mode);
 int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
int flag);
 int chown_common(const struct path *path, uid_t user, gid_t group);
diff --git a/fs/open.c b/fs/open.c
index 49960a1248f14b..7ba89eae46c560 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -563,7 +563,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
return error;
 }
 
-static int chmod_common(const struct path *path, umode_t mode)
+int chmod_common(const struct path *path, umode_t mode)
 {
struct inode *inode = path->dentry->d_inode;
struct inode *delegated_inode = NULL;
@@ -610,7 +610,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
return err;
 }
 
-int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
+static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
 {
struct path path;
int error;
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index 0da59d76133e17..2b1b4dc586825f 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -6,5 +6,6 @@ int __init init_umount(const char *name, int flags);
 int __init init_chdir(const char *filename);
 int __init init_chroot(const char *filename);
 int __init init_chown(const char *filename, uid_t user, gid_t group, int 
flags);
+int __init init_chmod(const char *filename, umode_t mode);
 int __init init_unlink(const char *pathname);
 int __init init_rmdir(const char *pathname);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e89d62e944dc0e..8b71fa321ca20c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1304,13 +1304,6 @@ static inline long ksys_link(const char __user *oldname,
return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
 }
 
-extern int do_fchmodat(int dfd, const char __user *filename, umode_t mode);
-
-static inline int ksys_chmod(const char __user *filename, umode_t mode)
-{
-   return do_fchmodat(AT_FDCWD, filename, mode);
-}
-
 long do_faccessat(int dfd, const char __user *filename, int mode, int flags);
 
 static inline long ksys_access(const char __user *filename, int mode)
diff --git a/init/initramfs.c b/init/initramfs.c
index 24a8dcc6734064..21a75f6ca893a9 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -350,14 +350,14 @@ static int __init do_name(void)
} else if (S_ISDIR(mode)) {
ksys_mkdir(collected, mode);
init_chown(collected, uid, gid, 0);
-   ksys_chmod(collected, mode);
+   init_chmod(collected, mode);
dir_add(collected, mtime);
} else if (S_ISBLK(mode) || S_ISCHR(mode) ||
   S_ISFIFO(mode) || S_ISSOCK(mode)) {
if (maybe_link() == 0) {
ksys_mknod(collected, mode, rdev);
init_chown(collected, uid, gid, 0);
-   ksys_chmod(collected, mode);
+   init_chmod(collected, mode);
do_utime(collected, mtime);
}
}
-- 
2.27.0



[PATCH 13/23] init: add an init_chroot helper

2020-07-28 Thread Christoph Hellwig
Add a simple helper to chroot with a kernel space file name and switch
the early init code over to it.  Remove the now unused ksys_chroot.

Signed-off-by: Christoph Hellwig 
---
 drivers/base/devtmpfs.c   |  2 +-
 fs/init.c | 24 
 fs/open.c |  7 +--
 include/linux/init_syscalls.h |  1 +
 include/linux/syscalls.h  |  2 --
 init/do_mounts.c  |  2 +-
 init/do_mounts_initrd.c   |  4 ++--
 7 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index e48aaba3166b5d..eac184e6d65774 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -413,7 +413,7 @@ static int __init devtmpfs_setup(void *p)
if (err)
goto out;
init_chdir("/.."); /* will traverse into overmounted root */
-   ksys_chroot(".");
+   init_chroot(".");
 out:
*(int *)p = err;
complete(_done);
diff --git a/fs/init.c b/fs/init.c
index 64d4e12eba9339..2c78f24814dde4 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "internal.h"
 
 int __init init_mount(const char *dev_name, const char *dir_name,
@@ -54,6 +55,29 @@ int __init init_chdir(const char *filename)
return error;
 }
 
+int __init init_chroot(const char *filename)
+{
+   struct path path;
+   int error;
+
+   error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, );
+   if (error)
+   return error;
+   error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+   if (error)
+   goto dput_and_out;
+   error = -EPERM;
+   if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
+   goto dput_and_out;
+   error = security_path_chroot();
+   if (error)
+   goto dput_and_out;
+   set_fs_root(current->fs, );
+dput_and_out:
+   path_put();
+   return error;
+}
+
 int __init init_unlink(const char *pathname)
 {
return do_unlinkat(AT_FDCWD, getname_kernel(pathname));
diff --git a/fs/open.c b/fs/open.c
index 723e0ac898935e..f62f4752bb436d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -530,7 +530,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
return error;
 }
 
-int ksys_chroot(const char __user *filename)
+SYSCALL_DEFINE1(chroot, const char __user *, filename)
 {
struct path path;
int error;
@@ -563,11 +563,6 @@ int ksys_chroot(const char __user *filename)
return error;
 }
 
-SYSCALL_DEFINE1(chroot, const char __user *, filename)
-{
-   return ksys_chroot(filename);
-}
-
 static int chmod_common(const struct path *path, umode_t mode)
 {
struct inode *inode = path->dentry->d_inode;
diff --git a/include/linux/init_syscalls.h b/include/linux/init_syscalls.h
index 1e845910ae56e9..e07099a14b91db 100644
--- a/include/linux/init_syscalls.h
+++ b/include/linux/init_syscalls.h
@@ -4,5 +4,6 @@ int __init init_mount(const char *dev_name, const char 
*dir_name,
const char *type_page, unsigned long flags, void *data_page);
 int __init init_umount(const char *name, int flags);
 int __init init_chdir(const char *filename);
+int __init init_chroot(const char *filename);
 int __init init_unlink(const char *pathname);
 int __init init_rmdir(const char *pathname);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 31fa67fb9894b3..e89d62e944dc0e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1235,8 +1235,6 @@ asmlinkage long sys_ni_syscall(void);
  * Instead, use one of the functions which work equivalently, such as
  * the ksys_xyzyyz() functions prototyped below.
  */
-
-int ksys_chroot(const char __user *filename);
 ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count);
 int ksys_fchown(unsigned int fd, uid_t user, gid_t group);
 ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count);
diff --git a/init/do_mounts.c b/init/do_mounts.c
index a7581c6e85f268..b5f9604d0c98a2 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -629,7 +629,7 @@ void __init prepare_namespace(void)
 out:
devtmpfs_mount();
init_mount(".", "/", NULL, MS_MOVE, NULL);
-   ksys_chroot(".");
+   init_chroot(".");
 }
 
 static bool is_tmpfs;
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 04627fd22a921f..a6b447b191dbc8 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -63,7 +63,7 @@ static int __init init_linuxrc(struct subprocess_info *info, 
struct cred *new)
/* move initrd over / and chdir/chroot in initrd root */
init_chdir("/root");
init_mount(".", "/", NULL, MS_MOVE, NULL);
-   ksys_chroot(".");
+   init_chroot(".");
ksys_setsid();
return 0;
 }
@@ -101,7 +101,7 @@ static void __init handle_initrd(void)
/* move initrd to rootfs' /old */
init_mount("..", ".", NULL, MS_MOVE, NULL);
  

[PATCH 03/23] fs: push the getname from do_rmdir into the callers

2020-07-28 Thread Christoph Hellwig
This mirrors do_unlinkat and will make life a little easier for
the init code to reuse the whole function with a kernel filename.

Signed-off-by: Christoph Hellwig 
---
 fs/internal.h|  2 +-
 fs/namei.c   | 10 --
 include/linux/syscalls.h |  4 ++--
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/fs/internal.h b/fs/internal.h
index 9b863a7bd70892..e903d5aae139a2 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -65,7 +65,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 long do_mknodat(int dfd, const char __user *filename, umode_t mode,
unsigned int dev);
 long do_mkdirat(int dfd, const char __user *pathname, umode_t mode);
-long do_rmdir(int dfd, const char __user *pathname);
+long do_rmdir(int dfd, struct filename *name);
 long do_unlinkat(int dfd, struct filename *name);
 long do_symlinkat(const char __user *oldname, int newdfd,
  const char __user *newname);
diff --git a/fs/namei.c b/fs/namei.c
index 72d4219c93acb7..d75a6039ae3966 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3720,17 +3720,16 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 }
 EXPORT_SYMBOL(vfs_rmdir);
 
-long do_rmdir(int dfd, const char __user *pathname)
+long do_rmdir(int dfd, struct filename *name)
 {
int error = 0;
-   struct filename *name;
struct dentry *dentry;
struct path path;
struct qstr last;
int type;
unsigned int lookup_flags = 0;
 retry:
-   name = filename_parentat(dfd, getname(pathname), lookup_flags,
+   name = filename_parentat(dfd, name, lookup_flags,
, , );
if (IS_ERR(name))
return PTR_ERR(name);
@@ -3781,7 +3780,7 @@ long do_rmdir(int dfd, const char __user *pathname)
 
 SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
 {
-   return do_rmdir(AT_FDCWD, pathname);
+   return do_rmdir(AT_FDCWD, getname(pathname));
 }
 
 /**
@@ -3926,8 +3925,7 @@ SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, 
pathname, int, flag)
return -EINVAL;
 
if (flag & AT_REMOVEDIR)
-   return do_rmdir(dfd, pathname);
-
+   return do_rmdir(dfd, getname(pathname));
return do_unlinkat(dfd, getname(pathname));
 }
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5b0f1fca4cfb9d..e43816198e6001 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1281,11 +1281,11 @@ static inline long ksys_unlink(const char __user 
*pathname)
return do_unlinkat(AT_FDCWD, getname(pathname));
 }
 
-extern long do_rmdir(int dfd, const char __user *pathname);
+long do_rmdir(int dfd, struct filename *name);
 
 static inline long ksys_rmdir(const char __user *pathname)
 {
-   return do_rmdir(AT_FDCWD, pathname);
+   return do_rmdir(AT_FDCWD, getname(pathname));
 }
 
 extern long do_mkdirat(int dfd, const char __user *pathname, umode_t mode);
-- 
2.27.0



[PATCH 01/23] fs: refactor do_mount

2020-07-28 Thread Christoph Hellwig
Factor out a path_mount helper that takes a struct path * instead of the
actual file name.  This will allow to convert the init and devtmpfs code
to properly mount based on a kernel pointer instead of relying on the
implicit set_fs(KERNEL_DS) during early init.

Signed-off-by: Christoph Hellwig 
---
 fs/namespace.c | 67 ++
 1 file changed, 35 insertions(+), 32 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index f30ed401cc6d7a..6f8234f74bed90 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3115,12 +3115,11 @@ char *copy_mount_string(const void __user *data)
  * Therefore, if this magic number is present, it carries no information
  * and must be discarded.
  */
-long do_mount(const char *dev_name, const char __user *dir_name,
+static int path_mount(const char *dev_name, struct path *path,
const char *type_page, unsigned long flags, void *data_page)
 {
-   struct path path;
unsigned int mnt_flags = 0, sb_flags;
-   int retval = 0;
+   int ret;
 
/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
@@ -3133,19 +3132,13 @@ long do_mount(const char *dev_name, const char __user 
*dir_name,
if (flags & MS_NOUSER)
return -EINVAL;
 
-   /* ... and get the mountpoint */
-   retval = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, );
-   if (retval)
-   return retval;
-
-   retval = security_sb_mount(dev_name, ,
-  type_page, flags, data_page);
-   if (!retval && !may_mount())
-   retval = -EPERM;
-   if (!retval && (flags & SB_MANDLOCK) && !may_mandlock())
-   retval = -EPERM;
-   if (retval)
-   goto dput_out;
+   ret = security_sb_mount(dev_name, path, type_page, flags, data_page);
+   if (ret)
+   return ret;
+   if (!may_mount())
+   return -EPERM;
+   if ((flags & SB_MANDLOCK) && !may_mandlock())
+   return -EPERM;
 
/* Default to relatime unless overriden */
if (!(flags & MS_NOATIME))
@@ -3172,7 +3165,7 @@ long do_mount(const char *dev_name, const char __user 
*dir_name,
((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
   MS_STRICTATIME)) == 0)) {
mnt_flags &= ~MNT_ATIME_MASK;
-   mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
+   mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK;
}
 
sb_flags = flags & (SB_RDONLY |
@@ -3185,22 +3178,32 @@ long do_mount(const char *dev_name, const char __user 
*dir_name,
SB_I_VERSION);
 
if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
-   retval = do_reconfigure_mnt(, mnt_flags);
-   else if (flags & MS_REMOUNT)
-   retval = do_remount(, flags, sb_flags, mnt_flags,
-   data_page);
-   else if (flags & MS_BIND)
-   retval = do_loopback(, dev_name, flags & MS_REC);
-   else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
-   retval = do_change_type(, flags);
-   else if (flags & MS_MOVE)
-   retval = do_move_mount_old(, dev_name);
-   else
-   retval = do_new_mount(, type_page, sb_flags, mnt_flags,
- dev_name, data_page);
-dput_out:
+   return do_reconfigure_mnt(path, mnt_flags);
+   if (flags & MS_REMOUNT)
+   return do_remount(path, flags, sb_flags, mnt_flags, data_page);
+   if (flags & MS_BIND)
+   return do_loopback(path, dev_name, flags & MS_REC);
+   if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
+   return do_change_type(path, flags);
+   if (flags & MS_MOVE)
+   return do_move_mount_old(path, dev_name);
+
+   return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name,
+   data_page);
+}
+
+long do_mount(const char *dev_name, const char __user *dir_name,
+   const char *type_page, unsigned long flags, void *data_page)
+{
+   struct path path;
+   int ret;
+
+   ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, );
+   if (ret)
+   return ret;
+   ret = path_mount(dev_name, , type_page, flags, data_page);
path_put();
-   return retval;
+   return ret;
 }
 
 static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
-- 
2.27.0



<    1   2   3   4   5   6   7   8   9   10   >