Re: [PATCH v1 2/8] powerpc: hugetlb: Convert set_huge_pte_at() to take vma

2023-09-21 Thread Christophe Leroy


Le 21/09/2023 à 18:20, Ryan Roberts a écrit :
> In order to fix a bug, arm64 needs access to the vma inside it's
> implementation of set_huge_pte_at(). Provide for this by converting the
> mm parameter to be a vma. Any implementations that require the mm can
> access it via vma->vm_mm.
> 
> This commit makes the required powerpc modifications. Separate commits
> update the other arches and core code, before the actual bug is fixed in
> arm64.
> 
> No behavioral changes intended.
> 
> Signed-off-by: Ryan Roberts 
> ---
>   arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 3 ++-
>   arch/powerpc/mm/book3s64/hugetlbpage.c   | 2 +-
>   arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 2 +-
>   arch/powerpc/mm/nohash/8xx.c | 2 +-
>   arch/powerpc/mm/pgtable.c| 7 ++-
>   5 files changed, 11 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h 
> b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
> index de092b04ee1a..fff8cd726bc7 100644
> --- a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
> +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
> @@ -46,7 +46,8 @@ static inline int check_and_get_huge_psize(int shift)
>   }
>   
>   #define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
> -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, 
> pte_t pte);
> +void set_huge_pte_at(struct vm_area_struct *vma, unsigned long addr, pte_t 
> *ptep, pte_t pte);
> +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t 
> *ptep, pte_t pte);

Don't add the burden of an additional function, see below

>   
>   #define __HAVE_ARCH_HUGE_PTE_CLEAR
>   static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
> diff --git a/arch/powerpc/mm/book3s64/hugetlbpage.c 
> b/arch/powerpc/mm/book3s64/hugetlbpage.c
> index 3bc0eb21b2a0..ae7fd7c90eb8 100644
> --- a/arch/powerpc/mm/book3s64/hugetlbpage.c
> +++ b/arch/powerpc/mm/book3s64/hugetlbpage.c
> @@ -147,7 +147,7 @@ void huge_ptep_modify_prot_commit(struct vm_area_struct 
> *vma, unsigned long addr
>   if (radix_enabled())
>   return radix__huge_ptep_modify_prot_commit(vma, addr, ptep,
>  old_pte, pte);
> - set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
> + set_huge_pte_at(vma, addr, ptep, pte);
>   }
>   
>   void __init hugetlbpage_init_defaultsize(void)
> diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c 
> b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
> index 17075c78d4bc..7cd40a334c3a 100644
> --- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
> +++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
> @@ -58,5 +58,5 @@ void radix__huge_ptep_modify_prot_commit(struct 
> vm_area_struct *vma,
>   atomic_read(&mm->context.copros) > 0)
>   radix__flush_hugetlb_page(vma, addr);
>   
> - set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
> + set_huge_pte_at(vma, addr, ptep, pte);
>   }
> diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
> index dbbfe897455d..650a7a8496b6 100644
> --- a/arch/powerpc/mm/nohash/8xx.c
> +++ b/arch/powerpc/mm/nohash/8xx.c
> @@ -91,7 +91,7 @@ static int __ref __early_map_kernel_hugepage(unsigned long 
> va, phys_addr_t pa,
>   if (new && WARN_ON(pte_present(*ptep) && pgprot_val(prot)))
>   return -EINVAL;
>   
> - set_huge_pte_at(&init_mm, va, ptep, pte_mkhuge(pfn_pte(pa >> 
> PAGE_SHIFT, prot)));
> + __set_huge_pte_at(&init_mm, va, ptep, pte_mkhuge(pfn_pte(pa >> 
> PAGE_SHIFT, prot)));

Call set_huge_pte_at() with a NULL vma instead.

>   
>   return 0;
>   }
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index 3f86fd217690..9cbcb561a4d8 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -288,7 +288,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
>   }
>   
>   #if defined(CONFIG_PPC_8xx)
> -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, 
> pte_t pte)
> +void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t 
> *ptep, pte_t pte)

Keep it as set_huge_pte_at() with vma argument.

>   {
>   pmd_t *pmd = pmd_off(mm, addr);

Change to:

pmd_t *pmd = vma ? pmd_off(vma->vm_mm, addr) : pmd_off_k(addr);

>   pte_basic_t val;
> @@ -310,6 +310,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long 
> addr, pte_t *ptep, pte_
>   for (i = 0; i < num; i++, entry++, val += SZ_4K)
>   *entry = val;
>   }
> +
> +void set_huge_pte_at(struct vm_area_struct *vma, unsigned long addr, pte_t 
> *ptep, pte_t pte)
> +{
> + __set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
> +}

Remove this burden.

>   #endif
>   #endif /* CONFIG_HUGETLB_PAGE */
>   


Christophe


Re: [PATCH v1 2/8] powerpc: hugetlb: Convert set_huge_pte_at() to take vma

2023-09-21 Thread Christophe Leroy


Le 21/09/2023 à 20:43, Christophe Leroy a écrit :
> 
> 
> Le 21/09/2023 à 18:20, Ryan Roberts a écrit :
>> In order to fix a bug, arm64 needs access to the vma inside it's
>> implementation of set_huge_pte_at(). Provide for this by converting the
>> mm parameter to be a vma. Any implementations that require the mm can
>> access it via vma->vm_mm.
>>
>> This commit makes the required powerpc modifications. Separate commits
>> update the other arches and core code, before the actual bug is fixed in
>> arm64.
>>
>> No behavioral changes intended.
> 
> This patch doesn't build, allthough I have also applied patch 1. Is
> something missing ?
> 
> CALLscripts/checksyscalls.sh
> CC  arch/powerpc/kernel/setup-common.o
> In file included from arch/powerpc/kernel/setup-common.c:37:
> ./include/linux/hugetlb.h: In function 'huge_ptep_modify_prot_commit':
> ./include/linux/hugetlb.h:987:28: error: passing argument 1 of
> 'set_huge_pte_at' from incompatible pointer type
> [-Werror=incompatible-pointer-types]
> 987 | set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
> | ~~~^~~
> ||
> |struct mm_struct *
> In file included from ./arch/powerpc/include/asm/hugetlb.h:13,
>from ./include/linux/hugetlb.h:815:
> ./arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h:49:45: note: expected
> 'struct vm_area_struct *' but argument is of type 'struct mm_struct *'
>  49 | void set_huge_pte_at(struct vm_area_struct *vma, unsigned long
> addr, pte_t *ptep, pte_t pte);
> |  ~~~^~~
> cc1: all warnings being treated as errors
> make[4]: *** [scripts/Makefile.build:243:

Oh, I realised that it requires patch 6 to build properly. This is not 
good. Your series should be bisectable, that means it must build and run 
successfully after each patch. Therefore you have to squash patches 1 to 
7 all togethers.

I'll send you comments on the powerpc part in another mail.

Christophe


Re: [RFC PATCH v12 07/33] KVM: Add KVM_EXIT_MEMORY_FAULT exit to report faults to userspace

2023-09-21 Thread Xiaoyao Li

On 9/14/2023 9:55 AM, Sean Christopherson wrote:

From: Chao Peng 

Add a new KVM exit type to allow userspace to handle memory faults that
KVM cannot resolve, but that userspace *may* be able to handle (without
terminating the guest).

KVM will initially use KVM_EXIT_MEMORY_FAULT to report implicit
conversions between private and shared memory.  With guest private memory,
there will be  two kind of memory conversions:

   - explicit conversion: happens when the guest explicitly calls into KVM
 to map a range (as private or shared)

   - implicit conversion: happens when the guest attempts to access a gfn
 that is configured in the "wrong" state (private vs. shared)

On x86 (first architecture to support guest private memory), explicit
conversions will be reported via KVM_EXIT_HYPERCALL+KVM_HC_MAP_GPA_RANGE,


side topic.

Do we expect to integrate TDVMCALL(MAPGPA) of TDX into KVM_HC_MAP_GPA_RANGE?


but reporting KVM_EXIT_HYPERCALL for implicit conversions is undesriable
as there is (obviously) no hypercall, and there is no guarantee that the
guest actually intends to convert between private and shared, i.e. what
KVM thinks is an implicit conversion "request" could actually be the
result of a guest code bug.

KVM_EXIT_MEMORY_FAULT will be used to report memory faults that appear to
be implicit conversions.

Place "struct memory_fault" in a second anonymous union so that filling
memory_fault doesn't clobber state from other yet-to-be-fulfilled exits,
and to provide additional information if KVM does NOT ultimately exit to
userspace with KVM_EXIT_MEMORY_FAULT, e.g. if KVM suppresses (or worse,
loses) the exit, as KVM often suppresses exits for memory failures that
occur when accessing paravirt data structures.  The initial usage for
private memory will be all-or-nothing, but other features such as the
proposed "userfault on missing mappings" support will use
KVM_EXIT_MEMORY_FAULT for potentially _all_ guest memory accesses, i.e.
will run afoul of KVM's various quirks.


So when exit reason is KVM_EXIT_MEMORY_FAULT, how can we tell which 
field in the first union is valid?


When exit reason is not KVM_EXIT_MEMORY_FAULT, how can we know the info 
in the second union run.memory is valid without a run.memory.valid field?



Use bit 3 for flagging private memory so that KVM can use bits 0-2 for
capturing RWX behavior if/when userspace needs such information.

Note!  To allow for future possibilities where KVM reports
KVM_EXIT_MEMORY_FAULT and fills run->memory_fault on _any_ unresolved
fault, KVM returns "-EFAULT" (-1 with errno == EFAULT from userspace's
perspective), not '0'!  Due to historical baggage within KVM, exiting to
userspace with '0' from deep callstacks, e.g. in emulation paths, is
infeasible as doing so would require a near-complete overhaul of KVM,
whereas KVM already propagates -errno return codes to userspace even when
the -errno originated in a low level helper.

Link: https://lore.kernel.org/all/20230908222905.1321305-5-amoor...@google.com
Cc: Anish Moorthy 
Suggested-by: Sean Christopherson 
Co-developed-by: Yu Zhang 
Signed-off-by: Yu Zhang 
Signed-off-by: Chao Peng 
Co-developed-by: Sean Christopherson 
Signed-off-by: Sean Christopherson 
---
  Documentation/virt/kvm/api.rst | 24 
  include/linux/kvm_host.h   | 15 +++
  include/uapi/linux/kvm.h   | 24 
  3 files changed, 63 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 21a7578142a1..e28a13439a95 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6702,6 +6702,30 @@ array field represents return values. The userspace 
should update the return
  values of SBI call before resuming the VCPU. For more details on RISC-V SBI
  spec refer, https://github.com/riscv/riscv-sbi-doc.
  
+::

+
+   /* KVM_EXIT_MEMORY_FAULT */
+   struct {
+  #define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)
+   __u64 flags;
+   __u64 gpa;
+   __u64 size;
+   } memory;
+
+KVM_EXIT_MEMORY_FAULT indicates the vCPU has encountered a memory fault that
+could not be resolved by KVM.  The 'gpa' and 'size' (in bytes) describe the
+guest physical address range [gpa, gpa + size) of the fault.  The 'flags' field
+describes properties of the faulting access that are likely pertinent:
+
+ - KVM_MEMORY_EXIT_FLAG_PRIVATE - When set, indicates the memory fault occurred
+   on a private memory access.  When clear, indicates the fault occurred on a
+   shared access.
+
+Note!  KVM_EXIT_MEMORY_FAULT is unique among all KVM exit reasons in that it
+accompanies a return code of '-1', not '0'!  errno will always be set to EFAULT
+or EHWPOISON when KVM exits with KVM_EXIT_MEMORY_FAULT, userspace should assume
+kvm_run.exit_reason is stale/undefined for all other error numbers.
+


Initially, this section is the copy of struct kvm_run and ha

Re: linux-next: Tree for Sep 20 (ppc32: ADB_CUDA Kconfig warning)

2023-09-21 Thread Randy Dunlap



On 9/21/23 17:10, Michael Ellerman wrote:
> Randy Dunlap  writes:
>> On 9/19/23 20:37, Stephen Rothwell wrote:
>>> Hi all,
>>>
>>> Changes since 20230919:
>>>
>>> The mm tree lost its boot warning.
>>>
>>> The drm-misc tree gained a conflict against Linus' tree.
>>>
>>> Non-merge commits (relative to Linus' tree): 6006
>>>  3996 files changed, 459968 insertions(+), 111742 deletions(-)
>>>
>>> 
>>
>> 4 out of 10 randconfigs have this warning:
>>
>> WARNING: unmet direct dependencies detected for ADB_CUDA
>>   Depends on [n]: MACINTOSH_DRIVERS [=n] && (ADB [=n] || PPC_PMAC [=y]) && 
>> !PPC_PMAC64 [=n]
>>   Selected by [y]:
>>   - PPC_PMAC [=y] && PPC_BOOK3S [=y] && CPU_BIG_ENDIAN [=y] && POWER_RESET 
>> [=y] && PPC32 [=y]
>>
>> WARNING: unmet direct dependencies detected for ADB_CUDA
>>   Depends on [n]: MACINTOSH_DRIVERS [=n] && (ADB [=n] || PPC_PMAC [=y]) && 
>> !PPC_PMAC64 [=n]
>>   Selected by [y]:
>>   - PPC_PMAC [=y] && PPC_BOOK3S [=y] && CPU_BIG_ENDIAN [=y] && POWER_RESET 
>> [=y] && PPC32 [=y]
>>
>> WARNING: unmet direct dependencies detected for ADB_CUDA
>>   Depends on [n]: MACINTOSH_DRIVERS [=n] && (ADB [=n] || PPC_PMAC [=y]) && 
>> !PPC_PMAC64 [=n]
>>   Selected by [y]:
>>   - PPC_PMAC [=y] && PPC_BOOK3S [=y] && CPU_BIG_ENDIAN [=y] && POWER_RESET 
>> [=y] && PPC32 [=y]
> 
> Crud. Caused by:
> 
> a3ef2fef198c ("powerpc/32: Add dependencies of POWER_RESET for pmac32")
> 
> I was suspicious of that select, I should have been *more* suspicious :)
> 
> I think this is a fix. The PPC32 isn't needed because ADB depends on 
> (PPC_PMAC && PPC32).

Yes, that fixes the problem. Thanks.

Tested-by: Randy Dunlap 
Acked-by: Randy Dunlap 

> 
> diff --git a/arch/powerpc/platforms/powermac/Kconfig 
> b/arch/powerpc/platforms/powermac/Kconfig
> index 8bdae0caf21e..84f101ec53a9 100644
> --- a/arch/powerpc/platforms/powermac/Kconfig
> +++ b/arch/powerpc/platforms/powermac/Kconfig
> @@ -2,7 +2,7 @@
>  config PPC_PMAC
> bool "Apple PowerMac based machines"
> depends on PPC_BOOK3S && CPU_BIG_ENDIAN
> -   select ADB_CUDA if POWER_RESET && PPC32
> +   select ADB_CUDA if POWER_RESET && ADB
> select MPIC
> select FORCE_PCI
> select PPC_INDIRECT_PCI if PPC32
> 
> cheers

-- 
~Randy


[PATCH v2 1/1] PCI: layerscape-ep: set 64-bit DMA mask

2023-09-21 Thread Frank Li
From: Guanhua Gao 

Set DMA mask and coherent DMA mask to enable 64-bit addressing.

Signed-off-by: Guanhua Gao 
Signed-off-by: Hou Zhiqiang 
Signed-off-by: Frank Li 
---

Notes:
change from v1 to v2
- Remove 32bit DMA mask set.

 drivers/pci/controller/dwc/pci-layerscape-ep.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c 
b/drivers/pci/controller/dwc/pci-layerscape-ep.c
index de4c1758a6c33..026bf08611e13 100644
--- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
+++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
@@ -249,6 +249,11 @@ static int __init ls_pcie_ep_probe(struct platform_device 
*pdev)
 
pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian");
 
+   /* set 64-bit DMA mask and coherent DMA mask */
+   ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+   if (ret)
+   return ret;
+
platform_set_drvdata(pdev, pcie);
 
ret = dw_pcie_ep_init(&pci->ep);
-- 
2.34.1



Re: [PATCH v1 8/8] arm64: hugetlb: Fix set_huge_pte_at() to work with all swap entries

2023-09-21 Thread Qi Zheng

Hi Ryan,

On 2023/9/22 00:20, Ryan Roberts wrote:

When called with a swap entry that does not embed a PFN (e.g.
PTE_MARKER_POISONED or PTE_MARKER_UFFD_WP), the previous implementation
of set_huge_pte_at() would either cause a BUG() to fire (if
CONFIG_DEBUG_VM is enabled) or cause a dereference of an invalid address
and subsequent panic.

arm64's huge pte implementation supports multiple huge page sizes, some
of which are implemented in the page table with contiguous mappings. So
set_huge_pte_at() needs to work out how big the logical pte is, so that
it can also work out how many physical ptes (or pmds) need to be
written. It does this by grabbing the folio out of the pte and querying
its size.

However, there are cases when the pte being set is actually a swap
entry. But this also used to work fine, because for huge ptes, we only
ever saw migration entries and hwpoison entries. And both of these types
of swap entries have a PFN embedded, so the code would grab that and
everything still worked out.

But over time, more calls to set_huge_pte_at() have been added that set
swap entry types that do not embed a PFN. And this causes the code to go
bang. The triggering case is for the uffd poison test, commit
99aa77215ad0 ("selftests/mm: add uffd unit test for UFFDIO_POISON"),
which sets a PTE_MARKER_POISONED swap entry. But review shows there are
other places too (PTE_MARKER_UFFD_WP).

So the root cause is due to commit 18f3962953e4 ("mm: hugetlb: kill
set_huge_swap_pte_at()"), which aimed to simplify the interface to the
core code by removing set_huge_swap_pte_at() (which took a page size
parameter) and replacing it with calls to set_huge_swap_pte_at() where
the size was inferred from the folio, as descibed above. While that
commit didn't break anything at the time, 


If it didn't break anything at that time, then shouldn't the Fixes tag
be added to this commit?


it did break the interface
because it couldn't handle swap entries without PFNs. And since then new
callers have come along which rely on this working.


So the Fixes tag should be added only to the commit that introduces the
first new callers?

Other than that, LGTM.

Thanks,
Qi



Now that we have modified the set_huge_pte_at() interface to pass the
vma, we can extract the huge page size from it and fix this issue.

I'm tagging the commit that added the uffd poison feature, since that is
what exposed the problem, as well as the original change that broke the
interface. Hopefully this is valuable for people doing bisect.

Signed-off-by: Ryan Roberts 
Fixes: 18f3962953e4 ("mm: hugetlb: kill set_huge_swap_pte_at()")
Fixes: 8a13897fb0da ("mm: userfaultfd: support UFFDIO_POISON for hugetlbfs")
---
  arch/arm64/mm/hugetlbpage.c | 17 +++--
  1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 844832511c1e..a08601a14689 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -241,13 +241,6 @@ static void clear_flush(struct mm_struct *mm,
flush_tlb_range(&vma, saddr, addr);
  }
  
-static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)

-{
-   VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
-
-   return page_folio(pfn_to_page(swp_offset_pfn(entry)));
-}
-
  void set_huge_pte_at(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep, pte_t pte)
  {
@@ -258,13 +251,10 @@ void set_huge_pte_at(struct vm_area_struct *vma, unsigned 
long addr,
unsigned long pfn, dpfn;
pgprot_t hugeprot;
  
-	if (!pte_present(pte)) {

-   struct folio *folio;
-
-   folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte));
-   ncontig = num_contig_ptes(folio_size(folio), &pgsize);
+   ncontig = num_contig_ptes(huge_page_size(hstate_vma(vma)), &pgsize);
  
-		for (i = 0; i < ncontig; i++, ptep++)

+   if (!pte_present(pte)) {
+   for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
set_pte_at(mm, addr, ptep, pte);
return;
}
@@ -274,7 +264,6 @@ void set_huge_pte_at(struct vm_area_struct *vma, unsigned 
long addr,
return;
}
  
-	ncontig = find_num_contig(mm, addr, ptep, &pgsize);

pfn = pte_pfn(pte);
dpfn = pgsize >> PAGE_SHIFT;
hugeprot = pte_pgprot(pte);


Re: [RFC PATCH v4 09/11] media: uapi: Add V4L2_CID_USER_IMX_ASRC_RATIO_MOD control

2023-09-21 Thread Shengjiu Wang
On Thu, Sep 21, 2023 at 10:09 PM Hans Verkuil  wrote:
>
> On 21/09/2023 13:13, Shengjiu Wang wrote:
> > On Thu, Sep 21, 2023 at 3:11 PM Hans Verkuil  wrote:
> >>
> >> On 21/09/2023 08:55, Shengjiu Wang wrote:
> >>> On Wed, Sep 20, 2023 at 6:19 PM Hans Verkuil  wrote:
> 
>  On 20/09/2023 11:32, Shengjiu Wang wrote:
> > The input clock and output clock may not be the accurate
> > rate as the sample rate, there is some drift, so the convert
> > ratio of i.MX ASRC module need to be changed according to
> > actual clock rate.
> >
> > Add V4L2_CID_USER_IMX_ASRC_RATIO_MOD control for user to
> > adjust the ratio.
> >
> > Signed-off-by: Shengjiu Wang 
> > ---
> >  Documentation/userspace-api/media/v4l/control.rst | 5 +
> >  drivers/media/v4l2-core/v4l2-ctrls-defs.c | 1 +
> >  include/uapi/linux/v4l2-controls.h| 1 +
> >  3 files changed, 7 insertions(+)
> >
> > diff --git a/Documentation/userspace-api/media/v4l/control.rst 
> > b/Documentation/userspace-api/media/v4l/control.rst
> > index 4463fce694b0..2bc175900a34 100644
> > --- a/Documentation/userspace-api/media/v4l/control.rst
> > +++ b/Documentation/userspace-api/media/v4l/control.rst
> > @@ -318,6 +318,11 @@ Control IDs
> >  depending on particular custom controls should check the driver 
> > name
> >  and version, see :ref:`querycap`.
> >
> > +.. _v4l2-audio-imx:
> > +
> > +``V4L2_CID_USER_IMX_ASRC_RATIO_MOD``
> > +sets the rasampler ratio modifier of i.MX asrc module.
> 
>  rasampler -> resampler (I think?)
> 
>  This doesn't document at all what the type of the control is or how to 
>  interpret it.
> 
> > +
> >  Applications can enumerate the available controls with the
> >  :ref:`VIDIOC_QUERYCTRL` and
> >  :ref:`VIDIOC_QUERYMENU ` ioctls, get and set a
> > diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c 
> > b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> > index 8696eb1cdd61..16f66f66198c 100644
> > --- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> > +++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> > @@ -1242,6 +1242,7 @@ const char *v4l2_ctrl_get_name(u32 id)
> >   case V4L2_CID_COLORIMETRY_CLASS:return "Colorimetry 
> > Controls";
> >   case V4L2_CID_COLORIMETRY_HDR10_CLL_INFO:   return 
> > "HDR10 Content Light Info";
> >   case V4L2_CID_COLORIMETRY_HDR10_MASTERING_DISPLAY:  return 
> > "HDR10 Mastering Display";
> > + case V4L2_CID_USER_IMX_ASRC_RATIO_MOD:  return 
> > "ASRC RATIO MOD";
> 
>  Let's stay consistent with the other control names:
> 
>  "ASRC Ratio Modifier"
> 
>  But if this is a driver specific control, then this doesn't belong here.
> 
>  Driver specific controls are defined in the driver itself, including this
>  description.
> 
>  Same for the control documentation: if it is driver specific, then that
>  typically is documented either in a driver-specific public header, or
>  possibly in driver-specific documentation 
>  (Documentation/admin-guide/media/).
> 
>  But is this imx specific? Wouldn't other similar devices need this?
> >>>
> >>> It is imx specific.
> >>
> >> Why? I'm not opposed to this, but I wonder if you looked at datasheets of
> >> similar devices from other vendors: would they use something similar?
> >
> > I tried to find some datasheets for other vendors, but failed to find them.
> > So I don't know how they implement this part.
> >
> > Ratio modification on i.MX is to modify the configured ratio.
> > For example, the input rate is 44.1kHz,  output rate is 48kHz,
> > configured ratio = 441/480,   the ratio modification is to modify
> > the fractional part of (441/480) with small steps.  because the
> > input clock or output clock has drift in the real hardware.
> > The ratio modification is signed value, it is added to configured
> > ratio.
> >
> > In our case, we have some sysfs interface for user to get the
> > clock from input audio device and output audio device, user
> > need to calculate the ratio dynamically , then configure the
> > modification to driver
>
> So this ratio modifier comes into play when either the audio input
> or audio output (or both) are realtime audio inputs/outputs where
> the sample rate is not a perfect 44.1 or 48 kHz, but slightly different?

yes.

>
> If you would use this resampler to do offline resampling (i.e. resample
> a 44.1 kHz wav file to a 48 kHz wav file), then this wouldn't be needed,
> correct?

yes.

>
> When dealing with realtime audio, userspace will know how to get the
> precise sample rate, but that is out-of-scope of this driver. Here
> you just need a knob to slightly tweak the resampling ratio.
>
> If my understanding is correct, then I wonder if it is such a good
> idea to put the r

Re: Questions: Should kernel panic when PCIe fatal error occurs?

2023-09-21 Thread Shuai Xue
+ @Rafael for the APEI/GHES part.

On 2023/9/22 05:52, Bjorn Helgaas wrote:
> On Thu, Sep 21, 2023 at 08:10:19PM +0800, Shuai Xue wrote:
>> On 2023/9/21 07:02, Bjorn Helgaas wrote:
>>> On Mon, Sep 18, 2023 at 05:39:58PM +0800, Shuai Xue wrote:
>> ...
> 
>>> I guess your point is that for CPER_SEV_FATAL errors, the APEI/GHES
>>> path always panics but the native path never does, and that maybe both
>>> paths should work the same way?
>>
>> Yes, exactly. Both OS native and APEI/GHES firmware first are notifications
>> used to handles PCIe AER errors, and IMHO, they should ideally work in the
>> same way.
> 
> I agree, that would be nice, but the whole point of the APEI/GHES
> functionality is vendor value-add, so I'm not sure we can achieve that
> ideal.
> 
>> ...
>> As a result, AER driver only does recovery for non-fatal PCIe error.
> 
> This is only true for the APEI/GHES path, right?  For *native* AER
> handling, we attempt recovery for both fatal and non-fatal errors.

Yes, exactly.

> 
>>> It doesn't seem like the native path should always panic.  If we can
>>> tell that data was corrupted, we may want to panic, but otherwise I
>>> don't think we should crash the entire system even if some device is
>>> permanently broken.
>>
>> Got it. But how can we tell if the data is corrupted with OS native?
> 
> I naively expect that by PCIe protocol, corrupted DLLPs or TLPs
> detected by CRC, sequence number errors, etc, would be discarded
> before corrupting memory, so I doubt we'd get an uncorrectable error
> that means "sorry, I just corrupted your data."
> 
> But DPC is advertised as "avoiding the potential spread of any data
> corruption," so there must be some mechanisms of corruption, and since
> DPC is triggered by either ERR_FATAL or ERR_NONFATAL, I guess maybe
> the errors could tell us something.  I'm going to quit speculating
> because I obviously don't know enough about this area.
> 
 However, I have changed my mind on this issue as I encounter a case where
 a error propagation is detected due to fatal DLLP (Data Link Protocol
 Error) error. A DLLP error occurred in the Compute node, causing the
 node to panic because `struct acpi_hest_generic_status::error_severity` was
 set as CPER_SEV_FATAL. However, data corruption was still detected in the
 storage node by CRC.
>>>
>>> The only mention of Data Link Protocol Error that looks relevant is
>>> PCIe r6.0, sec 3.6.2.2, which basically says a DLLP with an unexpected
>>> Sequence Number should be discarded:
>>>
>>>   For Ack and Nak DLLPs, the following steps are followed (see Figure
>>>   3-21):
>>>
>>> - If the Sequence Number specified by the AckNak_Seq_Num does not
>>>   correspond to an unacknowledged TLP, or to the value in
>>>   ACKD_SEQ, the DLLP is discarded
>>>
>>>   - This is a Data Link Protocol Error, which is a reported error
>>> associated with the Port (see Section 6.2).
>>>
>>> So data from that DLLP should not have made it to memory, although of
>>> course the DMA may not have been completed.  But it sounds like you
>>> did see corrupted data written to memory?
>>
>> The storage node use RDMA to directly access remote compute node.
>> And a error detected by CRC in the storage node. So I suspect yes.
> 
> When doing the CRC, can you distinguish between corrupted data and
> data that was not written because a DMA was only partially completed?

Yes, the receiving application layer will perform length verification.
So the data length is definitely correct.

> 
>> ...
>> I tried to inject Data Link Protocol Error on some platform. The mechanism
>> behind is that rootport controls the sequence number of the specific TLPs
>> and ACK/NAK DLLPs. Data Link Protocol Error will be detected at the Rx side
>> of ACK/NAK DLLPs.
>>
>> In such case, NIC and NVMe recovered on fatal and non-fatal DLLP
>> errors.
> 
> I'm guessing this error injection directly writes the AER status bit,
> which would probably only test the reporting (sending an ERR_FATAL
> message), AER interrupt generation, firmware or OS interrupt handling,
> etc.
> 
> It probably would not actually generate a DLLP with a bad sequence
> number, so it probably does not test the hardware behavior of
> discarding the DLLP if the sequence number is bad.  Just my guess
> though.

No, we don't touch AER status bit. The Root port controller provides Error
Injection Function to trigger a real DLLP error. For example,

- set a bad Bad sequence number, assuming 3
- enable error injection
- send a TLP from the controller's Application Interface, assuming SEQ#5 is
  given to the TLP
- the SEQ# is Changed to #2 by the Error Injection Function in Layer2.

> 
>> ...
>> My point is that how kernel could recover from non-fatal and fatal
>> errors in firmware first without DPC? If CPER_SEV_FATAL is used to
>> report fatal PCIe error, kernel will panic in APEI/GHES driver.
> 
> The platform decides whether to use CPER_SEV_FATAL, so we can't change
> that.  We 

[powerpc:merge] BUILD SUCCESS 7c9749edebd772b65aae2d616aa538f26f054c30

2023-09-21 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
merge
branch HEAD: 7c9749edebd772b65aae2d616aa538f26f054c30  Automatic merge of 
'next' into merge (2023-09-21 19:35)

elapsed time: 929m

configs tested: 136
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

tested configs:
alpha allnoconfig   gcc  
alphaallyesconfig   gcc  
alpha   defconfig   gcc  
arc  allmodconfig   gcc  
arc   allnoconfig   gcc  
arc  allyesconfig   gcc  
arc defconfig   gcc  
arc   randconfig-001-20230921   gcc  
arm  allmodconfig   gcc  
arm   allnoconfig   gcc  
arm  allyesconfig   gcc  
arm defconfig   gcc  
arm   randconfig-001-20230921   gcc  
arm64allmodconfig   gcc  
arm64 allnoconfig   gcc  
arm64allyesconfig   gcc  
arm64   defconfig   gcc  
csky allmodconfig   gcc  
csky  allnoconfig   gcc  
csky allyesconfig   gcc  
cskydefconfig   gcc  
i386 allmodconfig   gcc  
i386  allnoconfig   gcc  
i386 allyesconfig   gcc  
i386 buildonly-randconfig-001-20230921   gcc  
i386 buildonly-randconfig-002-20230921   gcc  
i386 buildonly-randconfig-003-20230921   gcc  
i386 buildonly-randconfig-004-20230921   gcc  
i386 buildonly-randconfig-005-20230921   gcc  
i386 buildonly-randconfig-006-20230921   gcc  
i386  debian-10.3   gcc  
i386defconfig   gcc  
i386  randconfig-001-20230921   gcc  
i386  randconfig-002-20230921   gcc  
i386  randconfig-003-20230921   gcc  
i386  randconfig-004-20230921   gcc  
i386  randconfig-005-20230921   gcc  
i386  randconfig-006-20230921   gcc  
i386  randconfig-011-20230921   gcc  
i386  randconfig-012-20230921   gcc  
i386  randconfig-013-20230921   gcc  
i386  randconfig-014-20230921   gcc  
i386  randconfig-015-20230921   gcc  
i386  randconfig-016-20230921   gcc  
loongarchallmodconfig   gcc  
loongarch allnoconfig   gcc  
loongarchallyesconfig   gcc  
loongarch   defconfig   gcc  
loongarch randconfig-001-20230921   gcc  
m68k allmodconfig   gcc  
m68k  allnoconfig   gcc  
m68k allyesconfig   gcc  
m68kdefconfig   gcc  
microblaze   allmodconfig   gcc  
microblazeallnoconfig   gcc  
microblaze   allyesconfig   gcc  
microblaze  defconfig   gcc  
mips allmodconfig   gcc  
mips  allnoconfig   gcc  
mips allyesconfig   gcc  
nios2allmodconfig   gcc  
nios2 allnoconfig   gcc  
nios2allyesconfig   gcc  
nios2   defconfig   gcc  
openrisc allmodconfig   gcc  
openrisc  allnoconfig   gcc  
openrisc allyesconfig   gcc  
openriscdefconfig   gcc  
parisc   allmodconfig   gcc  
pariscallnoconfig   gcc  
parisc   allyesconfig   gcc  
parisc  defconfig   gcc  
parisc64defconfig   gcc  
powerpc  allmodconfig   gcc  
powerpc   allnoconfig   gcc  
powerpc  allyesconfig   gcc  
riscvallmodconfig   gcc  
riscv allnoconfig   gcc  
riscvallyesconfig   gcc  
riscv   defconfig   gcc  
riscv randconfig-001-20230921   gcc  
riscv  rv32_defconfig   gcc  
s390 allmodconfig   gcc  
s390  allnoconfig   gcc  
s390 allyesconfig   

Re: [PATCH v1 6/8] mm: hugetlb: Convert set_huge_pte_at() to take vma

2023-09-21 Thread SeongJae Park
Hi Ryan,

On Thu, 21 Sep 2023 17:20:05 +0100 Ryan Roberts  wrote:

> In order to fix a bug, arm64 needs access to the vma inside it's
> implementation of set_huge_pte_at(). Provide for this by converting the
> mm parameter to be a vma. Any implementations that require the mm can
> access it via vma->vm_mm.
> 
> This commit makes the required modifications to the core mm. Separate
> commits update the arches, before the actual bug is fixed in arm64.
> 
> No behavioral changes intended.
> 
> Signed-off-by: Ryan Roberts 

For mm/damon/ part change,

Reviewed-by: SeongJae Park 


Thanks,
SJ


> ---
>  include/asm-generic/hugetlb.h |  6 +++---
>  include/linux/hugetlb.h   |  6 +++---
>  mm/damon/vaddr.c  |  2 +-
>  mm/hugetlb.c  | 30 +++---
>  mm/migrate.c  |  2 +-
>  mm/rmap.c | 10 +-
>  mm/vmalloc.c  |  5 -
>  7 files changed, 32 insertions(+), 29 deletions(-)
> 
> diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
> index 4da02798a00b..515e4777fb65 100644
> --- a/include/asm-generic/hugetlb.h
> +++ b/include/asm-generic/hugetlb.h
> @@ -75,10 +75,10 @@ static inline void hugetlb_free_pgd_range(struct 
> mmu_gather *tlb,
>  #endif
>  
>  #ifndef __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
> -static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
> - pte_t *ptep, pte_t pte)
> +static inline void set_huge_pte_at(struct vm_area_struct *vma,
> + unsigned long addr, pte_t *ptep, pte_t pte)
>  {
> - set_pte_at(mm, addr, ptep, pte);
> + set_pte_at(vma->vm_mm, addr, ptep, pte);
>  }
>  #endif
>  
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 5b2626063f4f..08184f32430c 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -984,7 +984,7 @@ static inline void huge_ptep_modify_prot_commit(struct 
> vm_area_struct *vma,
>   unsigned long addr, pte_t *ptep,
>   pte_t old_pte, pte_t pte)
>  {
> - set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
> + set_huge_pte_at(vma, addr, ptep, pte);
>  }
>  #endif
>  
> @@ -1172,8 +1172,8 @@ static inline pte_t huge_ptep_clear_flush(struct 
> vm_area_struct *vma,
>  #endif
>  }
>  
> -static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
> -pte_t *ptep, pte_t pte)
> +static inline void set_huge_pte_at(struct vm_area_struct *vma,
> +unsigned long addr, pte_t *ptep, pte_t pte)
>  {
>  }
>  
> diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
> index 4c81a9dbd044..55da8cee8fbc 100644
> --- a/mm/damon/vaddr.c
> +++ b/mm/damon/vaddr.c
> @@ -347,7 +347,7 @@ static void damon_hugetlb_mkold(pte_t *pte, struct 
> mm_struct *mm,
>   if (pte_young(entry)) {
>   referenced = true;
>   entry = pte_mkold(entry);
> - set_huge_pte_at(mm, addr, pte, entry);
> + set_huge_pte_at(vma, addr, pte, entry);
>   }
>  
>  #ifdef CONFIG_MMU_NOTIFIER
[...]


Thanks,
SJ


Re: linux-next: Tree for Sep 20 (ppc32: ADB_CUDA Kconfig warning)

2023-09-21 Thread Michael Ellerman
Randy Dunlap  writes:
> On 9/19/23 20:37, Stephen Rothwell wrote:
>> Hi all,
>> 
>> Changes since 20230919:
>> 
>> The mm tree lost its boot warning.
>> 
>> The drm-misc tree gained a conflict against Linus' tree.
>> 
>> Non-merge commits (relative to Linus' tree): 6006
>>  3996 files changed, 459968 insertions(+), 111742 deletions(-)
>> 
>> 
>
> 4 out of 10 randconfigs have this warning:
>
> WARNING: unmet direct dependencies detected for ADB_CUDA
>   Depends on [n]: MACINTOSH_DRIVERS [=n] && (ADB [=n] || PPC_PMAC [=y]) && 
> !PPC_PMAC64 [=n]
>   Selected by [y]:
>   - PPC_PMAC [=y] && PPC_BOOK3S [=y] && CPU_BIG_ENDIAN [=y] && POWER_RESET 
> [=y] && PPC32 [=y]
>
> WARNING: unmet direct dependencies detected for ADB_CUDA
>   Depends on [n]: MACINTOSH_DRIVERS [=n] && (ADB [=n] || PPC_PMAC [=y]) && 
> !PPC_PMAC64 [=n]
>   Selected by [y]:
>   - PPC_PMAC [=y] && PPC_BOOK3S [=y] && CPU_BIG_ENDIAN [=y] && POWER_RESET 
> [=y] && PPC32 [=y]
>
> WARNING: unmet direct dependencies detected for ADB_CUDA
>   Depends on [n]: MACINTOSH_DRIVERS [=n] && (ADB [=n] || PPC_PMAC [=y]) && 
> !PPC_PMAC64 [=n]
>   Selected by [y]:
>   - PPC_PMAC [=y] && PPC_BOOK3S [=y] && CPU_BIG_ENDIAN [=y] && POWER_RESET 
> [=y] && PPC32 [=y]

Crud. Caused by:

a3ef2fef198c ("powerpc/32: Add dependencies of POWER_RESET for pmac32")

I was suspicious of that select, I should have been *more* suspicious :)

I think this is a fix. The PPC32 isn't needed because ADB depends on (PPC_PMAC 
&& PPC32).

diff --git a/arch/powerpc/platforms/powermac/Kconfig 
b/arch/powerpc/platforms/powermac/Kconfig
index 8bdae0caf21e..84f101ec53a9 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -2,7 +2,7 @@
 config PPC_PMAC
bool "Apple PowerMac based machines"
depends on PPC_BOOK3S && CPU_BIG_ENDIAN
-   select ADB_CUDA if POWER_RESET && PPC32
+   select ADB_CUDA if POWER_RESET && ADB
select MPIC
select FORCE_PCI
select PPC_INDIRECT_PCI if PPC32

cheers


Re: [PATCH rc] kvm: Prevent compiling virt/kvm/vfio.c unless VFIO is selected

2023-09-21 Thread Michael Ellerman
Jason Gunthorpe  writes:
> There are a bunch of reported randconfig failures now because of this,
> something like:
>
>>> arch/powerpc/kvm/../../../virt/kvm/vfio.c:89:7: warning: attribute 
>>> declaration must precede definition [-Wignored-attributes]
>fn = symbol_get(vfio_file_iommu_group);
> ^
>include/linux/module.h:805:60: note: expanded from macro 'symbol_get'
>#define symbol_get(x) ({ extern typeof(x) x 
> __attribute__((weak,visibility("hidden"))); &(x); })
>
> It happens because the arch forces KVM_VFIO without knowing if VFIO is
> even enabled.
>
> Split the kconfig so the arch selects the usual HAVE_KVM_ARCH_VFIO and
> then KVM_VFIO is only enabled if the arch wants it and VFIO is turned on.
>
> Reported-by: kernel test robot 
> Closes: 
> https://lore.kernel.org/oe-kbuild-all/202308251949.5iiav0sz-...@intel.com/
> Closes: 
> https://lore.kernel.org/oe-kbuild-all/202309030741.82alacdg-...@intel.com/
> Closes: 
> https://lore.kernel.org/oe-kbuild-all/202309110914.qlh0lu6l-...@intel.com/
> Cc: Nick Desaulniers 
> Fixes: c1cce6d079b8 ("vfio: Compile vfio_group infrastructure optionally")
> Signed-off-by: Jason Gunthorpe 
> ---
>  arch/arm64/kvm/Kconfig   | 2 +-
>  arch/powerpc/kvm/Kconfig | 2 +-
>  arch/s390/kvm/Kconfig| 2 +-
>  arch/x86/kvm/Kconfig | 2 +-
>  virt/kvm/Kconfig | 7 ++-
>  5 files changed, 10 insertions(+), 5 deletions(-)
>
> Sean's large series will also address this:
>
> https://lore.kernel.org/kvm/20230916003118.2540661-7-sea...@google.com/
>
> I don't know if it is sever enough to fix in the rc cycle, but here is the
> patch.

Thanks for debugging this, I had seen it but hadn't got around to it.

I think it's definitely worth fixing now. It's a pretty simple patch and
it's still early in the rc cycle.

Tested-by: Michael Ellerman 

cheers

> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> index 83c1e09be42e5b..7c43eaea51ce05 100644
> --- a/arch/arm64/kvm/Kconfig
> +++ b/arch/arm64/kvm/Kconfig
> @@ -28,7 +28,7 @@ menuconfig KVM
>   select KVM_MMIO
>   select KVM_GENERIC_DIRTYLOG_READ_PROTECT
>   select KVM_XFER_TO_GUEST_WORK
> - select KVM_VFIO
> + select HAVE_KVM_ARCH_VFIO
>   select HAVE_KVM_EVENTFD
>   select HAVE_KVM_IRQFD
>   select HAVE_KVM_DIRTY_RING_ACQ_REL
> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
> index 902611954200df..b64824e4cbc1eb 100644
> --- a/arch/powerpc/kvm/Kconfig
> +++ b/arch/powerpc/kvm/Kconfig
> @@ -22,7 +22,7 @@ config KVM
>   select PREEMPT_NOTIFIERS
>   select HAVE_KVM_EVENTFD
>   select HAVE_KVM_VCPU_ASYNC_IOCTL
> - select KVM_VFIO
> + select HAVE_KVM_ARCH_VFIO
>   select IRQ_BYPASS_MANAGER
>   select HAVE_KVM_IRQ_BYPASS
>   select INTERVAL_TREE
> diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
> index 45fdf2a9b2e326..d206ad3a777d5d 100644
> --- a/arch/s390/kvm/Kconfig
> +++ b/arch/s390/kvm/Kconfig
> @@ -31,7 +31,7 @@ config KVM
>   select HAVE_KVM_IRQ_ROUTING
>   select HAVE_KVM_INVALID_WAKEUPS
>   select HAVE_KVM_NO_POLL
> - select KVM_VFIO
> + select HAVE_KVM_ARCH_VFIO
>   select INTERVAL_TREE
>   select MMU_NOTIFIER
>   help
> diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
> index ed90f148140dfe..8e70e693f90e30 100644
> --- a/arch/x86/kvm/Kconfig
> +++ b/arch/x86/kvm/Kconfig
> @@ -45,7 +45,7 @@ config KVM
>   select HAVE_KVM_NO_POLL
>   select KVM_XFER_TO_GUEST_WORK
>   select KVM_GENERIC_DIRTYLOG_READ_PROTECT
> - select KVM_VFIO
> + select HAVE_KVM_ARCH_VFIO
>   select INTERVAL_TREE
>   select HAVE_KVM_PM_NOTIFIER if PM
>   select KVM_GENERIC_HARDWARE_ENABLING
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index 484d0873061ca5..0bf34809e1bbfe 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -59,9 +59,14 @@ config HAVE_KVM_MSI
>  config HAVE_KVM_CPU_RELAX_INTERCEPT
> bool
>  
> -config KVM_VFIO
> +config HAVE_KVM_ARCH_VFIO
> bool
>  
> +config KVM_VFIO
> +   def_bool y
> +   depends on HAVE_KVM_ARCH_VFIO
> +   depends on VFIO
> +
>  config HAVE_KVM_INVALID_WAKEUPS
> bool
>  
>
> base-commit: 0bb80ecc33a8fb5a682236443c1e740d5c917d1d
> -- 
> 2.42.0


Re: [PATCH v3 1/3] powerpc/config: Cleanup pmac32_defconfig

2023-09-21 Thread Michael Ellerman
Yuan Tan  writes:
> On 9/14/2023 9:10 PM, Michael Ellerman wrote:
>> Yuan Tan  writes:
>>> Use 'make savedefconfig' to cleanup pmac32_defconfig, based on Linux
>>> 7.6-rc1
>> Thanks but I don't like doing these updates in a single commit like
>> this, it's easy to accidentally lose a symbol.
> Yeah I have the same concerns too.
>>
>> I prefer an explanation for what's changing for each symbol. See
>> 1ce7fda142af ("powerpc/configs/64s: Drop IPV6 which is default y") and
>> the commits leading up to it, to see what I mean.
>>
>> But I suspect you probably don't want to go to all that effort, which is
>> fine :)
>
> I am not familiar with other options, so I'd better not do that. :)
>
> By the way, just to be cautious, since the defconfig can only be updated
> by 'savedefconfig'[1], how can we write an explanation for a single
> change in an option?

Well the defconfig can be updated manually, but the changes you make
manually should match what savedefconfig would do.

> I mean, when I change one option, the value of the other undetermined
> option will be set just like in patch 1.

At that point I just stage the change to the option I'm changing, and
leave the other lines modified by savedefconfig alone. That way you can
commit the changes made by savedefconfig in multiple steps, explaining
what happens along the way, and the end result is the same as what
savedefconfig generates.

So for example at the moment if you do savedefconfig on
pmac32_defconfig, the start of the diff looks like:

 1  diff --git a/arch/powerpc/configs/pmac32_defconfig 
b/arch/powerpc/configs/pmac32_defconfig
 2  index 57ded82c2840..17df965be099 100644
 3  --- a/arch/powerpc/configs/pmac32_defconfig
 4  +++ b/arch/powerpc/configs/pmac32_defconfig
 5  @@ -1,4 +1,3 @@
 6  -CONFIG_ALTIVEC=y
 7   # CONFIG_LOCALVERSION_AUTO is not set
 8   CONFIG_SYSVIPC=y
 9   CONFIG_POSIX_MQUEUE=y
10  @@ -8,12 +7,8 @@ CONFIG_IKCONFIG=y
11   CONFIG_IKCONFIG_PROC=y
12   CONFIG_LOG_BUF_SHIFT=14
13   CONFIG_BLK_DEV_INITRD=y
14  -# CONFIG_COMPAT_BRK is not set
15   CONFIG_PROFILING=y
16  -CONFIG_MODULES=y
17  -CONFIG_MODULE_UNLOAD=y
18  -CONFIG_MODULE_FORCE_UNLOAD=y
19 -CONFIG_PARTITION_ADVANCED=y
20 +CONFIG_ALTIVEC=y
21  # CONFIG_PPC_CHRP is not set
22  CONFIG_CPU_FREQ=y
23  CONFIG_CPU_FREQ_GOV_POWERSAVE=y

So you can stage lines 6 and 20, and commit that as "Update for symbol
movement", ie. nothing changed other than a symbol moved.

Then repeat that until eventually the config is up to date.

cheers


Re: [RFC v3 1/2] powerpc/cpuidle: cpuidle wakeup latency based on IPI and timer events

2023-09-21 Thread Michael Ellerman
Aboorva Devarajan  writes:
> On Wed, 2023-09-13 at 08:54 +1000, Michael Ellerman wrote:
>> Aboorva Devarajan  writes:
>> > From: Pratik R. Sampat 
>> > 
>> > Introduce a mechanism to fire directed IPIs from a source CPU to a
>> > specified target CPU and measure the time incurred on waking up the
>> > target CPU in response.
>> > 
>> > Also, introduce a mechanism to queue a hrtimer on a specified CPU
>> > and
>> > subsequently measure the time taken to wakeup the CPU.
>> > 
>> > Define a simple debugfs interface that allows for adjusting the
>> > settings to trigger IPI and timer events on a designated CPU, and
>> > to
>> > observe the resulting cpuidle wakeup latencies.
>> > 
>> > Reviewed-by: Srikar Dronamraju 
>> > Signed-off-by: Pratik R. Sampat 
>> > Signed-off-by: Aboorva Devarajan 
>> > ---
>> >  arch/powerpc/Kconfig.debug |  10 ++
>> >  arch/powerpc/kernel/Makefile   |   1 +
>> >  arch/powerpc/kernel/test_cpuidle_latency.c | 154
>> > +
>>   
>> I don't see anything here that's powerpc specific?
>> 
>> Which makes me wonder 1) could this be done with some existing
>> generic
>> mechanism?, and 2) if not can this test code be made generic.
>> 
>> At the very least this should be Cc'ed to the cpuidle lists &
>> maintainers given it's a test for cpuidle latency :)
>> 
>> cheers
>
> Hi Michael,
>
> Thanks a lot for taking a look at this.
>
> Yes, this test-case can be used as a generic benchmark for evaluating
> CPU idle latencies across different architectures, as it has thus far
> been exclusively tested and used on PowerPC, so we thought it would be
> more beneficial to incorporate it into a PowerPC specific self-test
> suite. But I will work on making it a generic self-test and send across
> a v4.

I'd suggest just posting v3 again but Cc'ing the cpuidle lists &
maintainers, to see if there is any interest in making it generic.

cheers


Re: [PATCH v2 1/2] powerpc: add `cur_cpu_spec` symbol to vmcoreinfo

2023-09-21 Thread Michael Ellerman
"Aneesh Kumar K.V"  writes:
> Aditya Gupta  writes:
>> On Wed, Sep 20, 2023 at 05:45:36PM +0530, Aneesh Kumar K.V wrote:
>>> Aditya Gupta  writes:
>>>
>>> > Since below commit, address mapping for vmemmap has changed for Radix
>>> > MMU, where address mapping is stored in kernel page table itself,
>>> > instead of earlier used 'vmemmap_list'.
>>> >
>>> > commit 368a0590d954 ("powerpc/book3s64/vmemmap: switch radix to use
>>> > a different vmemmap handling function")
>>> >
>>> > Hence with upstream kernel, in case of Radix MMU, makedumpfile fails to do
>>> > address translation for vmemmap addresses, as it depended on vmemmap_list,
>>> > which can now be empty.
>>> >
>>> > While fixing the address translation in makedumpfile, it was identified
>>> > that currently makedumpfile cannot distinguish between Hash MMU and
>>> > Radix MMU, unless VMLINUX is passed with -x flag to makedumpfile.
>>> > And hence fails to assign offsets and shifts correctly (such as in L4 to
>>> > PGDIR offset calculation in makedumpfile).
>>> >
>>> > For getting the MMU, makedumpfile uses `cur_cpu_spec.mmu_features`.
>>> >
>>> > Add `cur_cpu_spec` symbol and offset of `mmu_features` in the
>>> > `cpu_spec` struct, to VMCOREINFO, so that makedumpfile can assign the
>>> > offsets correctly, without needing a VMLINUX.
>>> >
>>> > Fixes: 368a0590d954 ("powerpc/book3s64/vmemmap: switch radix to use a 
>>> > different vmemmap handling function")
>>> > Reported-by: Sachin Sant 
>>> > Tested-by: Sachin Sant 
>>> > Signed-off-by: Aditya Gupta 
>>> >
>>> > ---
>>> > Corresponding makedumpfile patches to fix address translation, in Radix
>>> > MMU case:
>>> >
>>> > Link: 
>>> > https://lore.kernel.org/kexec/b5f0f00e-f2b1-47d7-a143-5683d10dc...@linux.ibm.com/T/#t
>>> > ---
>>> > ---
>>> >  arch/powerpc/kexec/core.c | 2 ++
>>> >  1 file changed, 2 insertions(+)
>>> >
>>> > diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
>>> > index de64c7962991..369b8334a4f0 100644
>>> > --- a/arch/powerpc/kexec/core.c
>>> > +++ b/arch/powerpc/kexec/core.c
>>> > @@ -63,6 +63,8 @@ void arch_crash_save_vmcoreinfo(void)
>>> >  #ifndef CONFIG_NUMA
>>> >   VMCOREINFO_SYMBOL(contig_page_data);
>>> >  #endif
>>> > + VMCOREINFO_SYMBOL(cur_cpu_spec);
>>> > + VMCOREINFO_OFFSET(cpu_spec, mmu_features);
>>> >  #if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
>>> >   VMCOREINFO_SYMBOL(vmemmap_list);
>>> >   VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
>>> >
>>>
>>> That implies we now have to be careful when updating MMU_FTR_* #defines.
>>> It is not bad considering other hacks we do in crash to identify kernel
>>> changes tied to version number. But i am wondering if there another way
>>> to identify radix vs hash?
>>>
>>
>> I could not find another way to get any other flag for RADIX vs HASH in
>> makedumpfile. And currently I don't know of any other way.
>>
>> Both makedumpfile and crash look for '0x40' flag set in
>> 'cur_cpu_spec.mmu_features', so only requirement for 'MMU_FTR_TYPE_RADIX' is 
>> to
>> be '0x40', or we will need to change the value accordingly in the tools.
>
> Instead of exporting cur_cpu_spec.mmu_feature, you could do
> coreinfo_mmu_features that does
>
> if (radix_enabled())
>coreinfo_mmu_feature = VMCORE_INFO_RADIX_TRANSLATION;

On other arches they seem to use vmcoreinfo_append_str() for more things
than we do on powerpc.

eg. x86:

   vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n", 
pgtable_l5_enabled());

Could we do something like that instead:

   vmcoreinfo_append_str("RADIX_MMU=%d\n", early_radix_enabled());

cheers


[PATCH] powerpc/stacktrace: Fix arch_stack_walk_reliable()

2023-09-21 Thread Michael Ellerman
The changes to copy_thread() made in commit eed7c420aac7 ("powerpc:
copy_thread differentiate kthreads and user mode threads") inadvertently
broke arch_stack_walk_reliable() because it has knowledge of the stack
layout.

Fix it by changing the condition to match the new logic in
copy_thread(). The changes make the comments about the stack layout
incorrect, rather than rephrasing them just refer the reader to
copy_thread().

Also the comment about the stack backchain is no longer true, since
commit edbd0387f324 ("powerpc: copy_thread add a back chain to the
switch stack frame"), so remove that as well.

Reported-by: Joe Lawrence 
Signed-off-by: Michael Ellerman 
Fixes: eed7c420aac7 ("powerpc: copy_thread differentiate kthreads and user mode 
threads")
---
 arch/powerpc/kernel/stacktrace.c | 27 +--
 1 file changed, 5 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index b15f15dcacb5..e6a958a5da27 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -73,29 +73,12 @@ int __no_sanitize_address 
arch_stack_walk_reliable(stack_trace_consume_fn consum
bool firstframe;
 
stack_end = stack_page + THREAD_SIZE;
-   if (!is_idle_task(task)) {
-   /*
-* For user tasks, this is the SP value loaded on
-* kernel entry, see "PACAKSAVE(r13)" in _switch() and
-* system_call_common().
-*
-* Likewise for non-swapper kernel threads,
-* this also happens to be the top of the stack
-* as setup by copy_thread().
-*
-* Note that stack backlinks are not properly setup by
-* copy_thread() and thus, a forked task() will have
-* an unreliable stack trace until it's been
-* _switch()'ed to for the first time.
-*/
-   stack_end -= STACK_USER_INT_FRAME_SIZE;
-   } else {
-   /*
-* idle tasks have a custom stack layout,
-* c.f. cpu_idle_thread_init().
-*/
+
+   // See copy_thread() for details.
+   if (task->flags & PF_KTHREAD)
stack_end -= STACK_FRAME_MIN_SIZE;
-   }
+   else
+   stack_end -= STACK_USER_INT_FRAME_SIZE;
 
if (task == current)
sp = current_stack_frame();
-- 
2.41.0




Re: [PATCH v3 06/13] mm/execmem: introduce execmem_data_alloc()

2023-09-21 Thread Song Liu
On Mon, Sep 18, 2023 at 12:31 AM Mike Rapoport  wrote:
>
[...]
> diff --git a/include/linux/execmem.h b/include/linux/execmem.h
> index 519bdfdca595..09d45ac786e9 100644
> --- a/include/linux/execmem.h
> +++ b/include/linux/execmem.h
> @@ -29,6 +29,7 @@
>   * @EXECMEM_KPROBES: parameters for kprobes
>   * @EXECMEM_FTRACE: parameters for ftrace
>   * @EXECMEM_BPF: parameters for BPF
> + * @EXECMEM_MODULE_DATA: parameters for module data sections
>   * @EXECMEM_TYPE_MAX:
>   */
>  enum execmem_type {
> @@ -37,6 +38,7 @@ enum execmem_type {
> EXECMEM_KPROBES,
> EXECMEM_FTRACE,

In longer term, I think we can improve the JITed code and merge
kprobe/ftrace/bpf. to use the same ranges. Also, do we need special
setting for FTRACE? If not, let's just remove it.

> EXECMEM_BPF,
> +   EXECMEM_MODULE_DATA,
> EXECMEM_TYPE_MAX,
>  };

Overall, it is great that kprobe/ftrace/bpf no longer depend on modules.

OTOH, I think we should merge execmem_type and existing mod_mem_type.
Otherwise, we still need to handle page permissions in multiple places.
What is our plan for that?

Thanks,
Song


>
> @@ -107,6 +109,23 @@ struct execmem_params *execmem_arch_params(void);
>   */
>  void *execmem_text_alloc(enum execmem_type type, size_t size);
>
> +/**
> + * execmem_data_alloc - allocate memory for data coupled to code
> + * @type: type of the allocation
> + * @size: how many bytes of memory are required
> + *
> + * Allocates memory that will contain data coupled with executable code,
> + * like data sections in kernel modules.
> + *
> + * The memory will have protections defined by architecture.
> + *
> + * The allocated memory will reside in an area that does not impose
> + * restrictions on the addressing modes.
> + *
> + * Return: a pointer to the allocated memory or %NULL
> + */
> +void *execmem_data_alloc(enum execmem_type type, size_t size);
> +
>  /**
>   * execmem_free - free executable memory
>   * @ptr: pointer to the memory that should be freed
> diff --git a/kernel/module/main.c b/kernel/module/main.c
> index c4146bfcd0a7..2ae83a6abf66 100644
> --- a/kernel/module/main.c
> +++ b/kernel/module/main.c
> @@ -1188,25 +1188,16 @@ void __weak module_arch_freeing_init(struct module 
> *mod)
>  {
>  }
>
> -static bool mod_mem_use_vmalloc(enum mod_mem_type type)
> -{
> -   return IS_ENABLED(CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC) &&
> -   mod_mem_type_is_core_data(type);
> -}
> -
>  static void *module_memory_alloc(unsigned int size, enum mod_mem_type type)
>  {
> -   if (mod_mem_use_vmalloc(type))
> -   return vzalloc(size);
> +   if (mod_mem_type_is_data(type))
> +   return execmem_data_alloc(EXECMEM_MODULE_DATA, size);
> return execmem_text_alloc(EXECMEM_MODULE_TEXT, size);
>  }
>
>  static void module_memory_free(void *ptr, enum mod_mem_type type)
>  {
> -   if (mod_mem_use_vmalloc(type))
> -   vfree(ptr);
> -   else
> -   execmem_free(ptr);
> +   execmem_free(ptr);
>  }
>
>  static void free_mod_mem(struct module *mod)
> diff --git a/mm/execmem.c b/mm/execmem.c
> index abcbd07e05ac..aeff85261360 100644
> --- a/mm/execmem.c
> +++ b/mm/execmem.c
> @@ -53,11 +53,23 @@ static void *execmem_alloc(size_t size, struct 
> execmem_range *range)
> return kasan_reset_tag(p);
>  }
>
> +static inline bool execmem_range_is_data(enum execmem_type type)
> +{
> +   return type == EXECMEM_MODULE_DATA;
> +}
> +
>  void *execmem_text_alloc(enum execmem_type type, size_t size)
>  {
> return execmem_alloc(size, &execmem_params.ranges[type]);
>  }
>
> +void *execmem_data_alloc(enum execmem_type type, size_t size)
> +{
> +   WARN_ON_ONCE(!execmem_range_is_data(type));
> +
> +   return execmem_alloc(size, &execmem_params.ranges[type]);
> +}
> +
>  void execmem_free(void *ptr)
>  {
> /*
> @@ -93,7 +105,10 @@ static void execmem_init_missing(struct execmem_params *p)
> struct execmem_range *r = &p->ranges[i];
>
> if (!r->start) {
> -   r->pgprot = default_range->pgprot;
> +   if (execmem_range_is_data(i))
> +   r->pgprot = PAGE_KERNEL;
> +   else
> +   r->pgprot = default_range->pgprot;
> r->alignment = default_range->alignment;
> r->start = default_range->start;
> r->end = default_range->end;
> --
> 2.39.2
>


Re: [PATCH v3 02/13] mm: introduce execmem_text_alloc() and execmem_free()

2023-09-21 Thread Song Liu
On Mon, Sep 18, 2023 at 12:30 AM Mike Rapoport  wrote:
>

[...]

> diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
> index 42215f9404af..db5561d0c233 100644
> --- a/arch/s390/kernel/module.c
> +++ b/arch/s390/kernel/module.c
> @@ -21,6 +21,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -76,7 +77,7 @@ void *module_alloc(unsigned long size)
>  #ifdef CONFIG_FUNCTION_TRACER
>  void module_arch_cleanup(struct module *mod)
>  {
> -   module_memfree(mod->arch.trampolines_start);
> +   execmem_free(mod->arch.trampolines_start);
>  }
>  #endif
>
> @@ -510,7 +511,7 @@ static int 
> module_alloc_ftrace_hotpatch_trampolines(struct module *me,
>
> size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
> numpages = DIV_ROUND_UP(size, PAGE_SIZE);
> -   start = module_alloc(numpages * PAGE_SIZE);
> +   start = execmem_text_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE);

This should be EXECMEM_MODULE_TEXT?

Thanks,
Song

> if (!start)
> return -ENOMEM;
> set_memory_rox((unsigned long)start, numpages);
[...]


Re: [PATCH v3 09/13] powerpc: extend execmem_params for kprobes allocations

2023-09-21 Thread Song Liu
On Mon, Sep 18, 2023 at 12:31 AM Mike Rapoport  wrote:
>
[...]
> @@ -135,5 +138,13 @@ struct execmem_params __init *execmem_arch_params(void)
>
> range->pgprot = prot;
>
> +   execmem_params.ranges[EXECMEM_KPROBES].start = VMALLOC_START;
> +   execmem_params.ranges[EXECMEM_KPROBES].start = VMALLOC_END;

.end = VMALLOC_END.

Thanks,
Song

> +
> +   if (strict_module_rwx_enabled())
> +   execmem_params.ranges[EXECMEM_KPROBES].pgprot = 
> PAGE_KERNEL_ROX;
> +   else
> +   execmem_params.ranges[EXECMEM_KPROBES].pgprot = 
> PAGE_KERNEL_EXEC;
> +
> return &execmem_params;
>  }
> --
> 2.39.2
>
>


RE: Questions: Should kernel panic when PCIe fatal error occurs?

2023-09-21 Thread David Laight
> It would be nice if they worked the same, but I suspect that vendors
> may rely on the fact that CPER_SEV_FATAL forces a restart/panic as
> part of their system integrity story.

The file system errors created by a panic (especially an NMI panic)
could easily be more problematic than a failed PCIe data transfer.
Evan a read that returned ~0u - which can be checked for.

Panicking a system that is converting TDM telephony to RTP for the
911 emergency service because a PCIe cable/riser connecting one of the
TDM board has become loose doesn't seem ideal.
(Or because the TDM board's fpga has decided it isn't going to respond
to any accesses until the BARs are setup again...)

The system can carry on with some TDM connections disabled - but that
is ok because they are all duplicated in case a cable gets cuit.

(Yes - that is a live system...)

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)



Re: [PATCH v3 02/13] mm: introduce execmem_text_alloc() and execmem_free()

2023-09-21 Thread Song Liu
On Mon, Sep 18, 2023 at 12:30 AM Mike Rapoport  wrote:
>
[...]
> +
> +/**
> + * enum execmem_type - types of executable memory ranges
> + *
> + * There are several subsystems that allocate executable memory.
> + * Architectures define different restrictions on placement,
> + * permissions, alignment and other parameters for memory that can be used
> + * by these subsystems.
> + * Types in this enum identify subsystems that allocate executable memory
> + * and let architectures define parameters for ranges suitable for
> + * allocations by each subsystem.
> + *
> + * @EXECMEM_DEFAULT: default parameters that would be used for types that
> + * are not explcitly defined.
> + * @EXECMEM_MODULE_TEXT: parameters for module text sections
> + * @EXECMEM_KPROBES: parameters for kprobes
> + * @EXECMEM_FTRACE: parameters for ftrace
> + * @EXECMEM_BPF: parameters for BPF
> + * @EXECMEM_TYPE_MAX:
> + */
> +enum execmem_type {
> +   EXECMEM_DEFAULT,

I found EXECMEM_DEFAULT more confusing than helpful.

Song

> +   EXECMEM_MODULE_TEXT = EXECMEM_DEFAULT,
> +   EXECMEM_KPROBES,
> +   EXECMEM_FTRACE,
> +   EXECMEM_BPF,
> +   EXECMEM_TYPE_MAX,
> +};
> +
[...]


Re: [PATCH v3 02/13] mm: introduce execmem_text_alloc() and execmem_free()

2023-09-21 Thread Song Liu
On Mon, Sep 18, 2023 at 12:30 AM Mike Rapoport  wrote:
>
[...]
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +static void *execmem_alloc(size_t size)
> +{
> +   return module_alloc(size);
> +}
> +
> +void *execmem_text_alloc(enum execmem_type type, size_t size)
> +{
> +   return execmem_alloc(size);
> +}

execmem_text_alloc (and later execmem_data_alloc) both take "type" as
input. I guess we can just use execmem_alloc(type, size) for everything?

Thanks,
Song

> +
> +void execmem_free(void *ptr)
> +{
> +   /*
> +* This memory may be RO, and freeing RO memory in an interrupt is not
> +* supported by vmalloc.
> +*/
> +   WARN_ON(in_interrupt());
> +   vfree(ptr);
> +}
> --
> 2.39.2
>


Re: [PATCH 1/1] PCI: layerscape-ep: set 64-bit DMA mask

2023-09-21 Thread Frank Li
On Thu, Sep 21, 2023 at 10:04:31PM +0200, Christophe JAILLET wrote:
> Le 21/09/2023 à 20:35, Frank Li a écrit :
> > On Thu, Sep 21, 2023 at 07:59:51PM +0200, Christophe JAILLET wrote:
> > > Le 21/09/2023 à 17:37, Frank Li a écrit :
> > > > From: Guanhua Gao 
> > > > 
> > > > Set DMA mask and coherent DMA mask to enable 64-bit addressing.
> > > > 
> > > > Signed-off-by: Guanhua Gao 
> > > > Signed-off-by: Hou Zhiqiang 
> > > > Signed-off-by: Frank Li 
> > > > ---
> > > >drivers/pci/controller/dwc/pci-layerscape-ep.c | 5 +
> > > >1 file changed, 5 insertions(+)
> > > > 
> > > > diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c 
> > > > b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > > > index de4c1758a6c33..6fd0dea38a32c 100644
> > > > --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > > > +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > > > @@ -249,6 +249,11 @@ static int __init ls_pcie_ep_probe(struct 
> > > > platform_device *pdev)
> > > > pcie->big_endian = of_property_read_bool(dev->of_node, 
> > > > "big-endian");
> > > > +   /* set 64-bit DMA mask and coherent DMA mask */
> > > > +   if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
> > > > +   if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)))
> > > 
> > > As stated in [1], dma_set_mask() with a 64-bit mask will never
> > > fail if dev->dma_mask is non-NULL.
> > > 
> > > So, if it fails, the 32 bits case will also fail for the same reason.
> > > There is no need for the 2nd test.
> > > 
> > > 
> > > See [1] for Christoph Hellwig comment about it.
> > 
> > I don't think it is true. the below is dma_set_mask()'s implementation
> 
> I'll try to recollect a more detailled explanation from Christoph.
> 
> I also checked all paths some times ago, and the conclusion was that if
> dma_set_mask(64) failed, dma_set_mask(32) would fail for the exact same
> reasons.
> 
> I'll try to find the corresponding mail and come back to you.

I go through iommu driver and code carefully. You are right.
The dma_supported() actual means iommu require minimized dma capatiblity.

It is quite miss leading. There are many codes in driver like these pattern. 

A example: 
static int sba_dma_supported( struct device *dev, u64 mask)()
{
...
 * check if mask is >= than the current max IO Virt Address 

 * The max IO Virt address will *always* < 30 bits. 

 */ 

return((int)(mask >= (ioc->ibase - 1 +  

(ioc->pdir_size / sizeof(u64) * IOVP_SIZE) )));
...
}

1 means supported. 0 means unsupported. 

So dma_set_mask(64) is enough. Let me send new patch.

Frank


> 
> I don't thing that implementation details have changed since that times, so
> the conclusion should still be valid.
> 
> Adding Christoph in cc, if he wants to give another look at it, or if he
> beats me finding the 1 or 2 years old mails.
> 
> CJ
> 
> > 
> > int dma_set_mask(struct device *dev, u64 mask)
> > {
> > /*
> >  * Truncate the mask to the actually supported dma_addr_t width to
> >  * avoid generating unsupportable addresses.
> >  */
> > mask = (dma_addr_t)mask;
> > 
> > if (!dev->dma_mask || !dma_supported(dev, mask))
> > ^^^
> > return -EIO;
> > 
> > arch_dma_set_mask(dev, mask);
> > *dev->dma_mask = mask;
> > return 0;
> > }
> > 
> > dma_supported() may return failiure.
> > 
> > static int dma_supported(struct device *dev, u64 mask)
> > {
> > const struct dma_map_ops *ops = get_dma_ops(dev);
> > 
> > /*
> >  * ->dma_supported sets the bypass flag, so we must always call
> >  * into the method here unless the device is truly direct mapped.
> >  */
> > if (!ops)
> > return dma_direct_supported(dev, mask);
> > if (!ops->dma_supported)
> > return 1;
> > return ops->dma_supported(dev, mask);
> >  ^^
> > DMA driver or IOMMU driver may return failure.
> > }
> > 
> > Frank
> > 
> > > 
> > > CJ
> > > 
> > > 
> > > [1]: https://lkml.org/lkml/2021/6/7/398
> > > 
> > > > +   return -EIO;
> > > > +
> > > > platform_set_drvdata(pdev, pcie);
> > > > ret = dw_pcie_ep_init(&pci->ep);
> > > 
> > 
> 


Re: Questions: Should kernel panic when PCIe fatal error occurs?

2023-09-21 Thread Bjorn Helgaas
On Thu, Sep 21, 2023 at 08:10:19PM +0800, Shuai Xue wrote:
> On 2023/9/21 07:02, Bjorn Helgaas wrote:
> > On Mon, Sep 18, 2023 at 05:39:58PM +0800, Shuai Xue wrote:
> ...

> > I guess your point is that for CPER_SEV_FATAL errors, the APEI/GHES
> > path always panics but the native path never does, and that maybe both
> > paths should work the same way?
> 
> Yes, exactly. Both OS native and APEI/GHES firmware first are notifications
> used to handles PCIe AER errors, and IMHO, they should ideally work in the
> same way.

I agree, that would be nice, but the whole point of the APEI/GHES
functionality is vendor value-add, so I'm not sure we can achieve that
ideal.

> ...
> As a result, AER driver only does recovery for non-fatal PCIe error.

This is only true for the APEI/GHES path, right?  For *native* AER
handling, we attempt recovery for both fatal and non-fatal errors.

> > It doesn't seem like the native path should always panic.  If we can
> > tell that data was corrupted, we may want to panic, but otherwise I
> > don't think we should crash the entire system even if some device is
> > permanently broken.
> 
> Got it. But how can we tell if the data is corrupted with OS native?

I naively expect that by PCIe protocol, corrupted DLLPs or TLPs
detected by CRC, sequence number errors, etc, would be discarded
before corrupting memory, so I doubt we'd get an uncorrectable error
that means "sorry, I just corrupted your data."

But DPC is advertised as "avoiding the potential spread of any data
corruption," so there must be some mechanisms of corruption, and since
DPC is triggered by either ERR_FATAL or ERR_NONFATAL, I guess maybe
the errors could tell us something.  I'm going to quit speculating
because I obviously don't know enough about this area.

> >> However, I have changed my mind on this issue as I encounter a case where
> >> a error propagation is detected due to fatal DLLP (Data Link Protocol
> >> Error) error. A DLLP error occurred in the Compute node, causing the
> >> node to panic because `struct acpi_hest_generic_status::error_severity` was
> >> set as CPER_SEV_FATAL. However, data corruption was still detected in the
> >> storage node by CRC.
> > 
> > The only mention of Data Link Protocol Error that looks relevant is
> > PCIe r6.0, sec 3.6.2.2, which basically says a DLLP with an unexpected
> > Sequence Number should be discarded:
> > 
> >   For Ack and Nak DLLPs, the following steps are followed (see Figure
> >   3-21):
> > 
> > - If the Sequence Number specified by the AckNak_Seq_Num does not
> >   correspond to an unacknowledged TLP, or to the value in
> >   ACKD_SEQ, the DLLP is discarded
> > 
> >   - This is a Data Link Protocol Error, which is a reported error
> > associated with the Port (see Section 6.2).
> > 
> > So data from that DLLP should not have made it to memory, although of
> > course the DMA may not have been completed.  But it sounds like you
> > did see corrupted data written to memory?
> 
> The storage node use RDMA to directly access remote compute node.
> And a error detected by CRC in the storage node. So I suspect yes.

When doing the CRC, can you distinguish between corrupted data and
data that was not written because a DMA was only partially completed?

> ...
> I tried to inject Data Link Protocol Error on some platform. The mechanism
> behind is that rootport controls the sequence number of the specific TLPs
> and ACK/NAK DLLPs. Data Link Protocol Error will be detected at the Rx side
> of ACK/NAK DLLPs.
> 
> In such case, NIC and NVMe recovered on fatal and non-fatal DLLP
> errors.

I'm guessing this error injection directly writes the AER status bit,
which would probably only test the reporting (sending an ERR_FATAL
message), AER interrupt generation, firmware or OS interrupt handling,
etc.

It probably would not actually generate a DLLP with a bad sequence
number, so it probably does not test the hardware behavior of
discarding the DLLP if the sequence number is bad.  Just my guess
though.

> ...
> My point is that how kernel could recover from non-fatal and fatal
> errors in firmware first without DPC? If CPER_SEV_FATAL is used to
> report fatal PCIe error, kernel will panic in APEI/GHES driver.

The platform decides whether to use CPER_SEV_FATAL, so we can't change
that.  We *could* change whether Linux panics when the platform says
an error is CPER_SEV_FATAL.  That happens in drivers/acpi, so it's
really up to Rafael.

Personally I would want to hear from vendors who use the APEI/GHES
path.  Poking around the web for logs that mention HEST and related
things, it looks like at least Dell, HP, and Lenovo use it.  And there
are drivers/acpi/apei commits from nxp.com, alibaba.com, amd.com,
arm.com huawei.com, etc., so some of them probably care, too.

Bjorn


Re: [PATCH 1/1] PCI: layerscape-ep: set 64-bit DMA mask

2023-09-21 Thread Christophe JAILLET

Le 21/09/2023 à 20:35, Frank Li a écrit :

On Thu, Sep 21, 2023 at 07:59:51PM +0200, Christophe JAILLET wrote:

Le 21/09/2023 à 17:37, Frank Li a écrit :

From: Guanhua Gao 

Set DMA mask and coherent DMA mask to enable 64-bit addressing.

Signed-off-by: Guanhua Gao 
Signed-off-by: Hou Zhiqiang 
Signed-off-by: Frank Li 
---
   drivers/pci/controller/dwc/pci-layerscape-ep.c | 5 +
   1 file changed, 5 insertions(+)

diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c 
b/drivers/pci/controller/dwc/pci-layerscape-ep.c
index de4c1758a6c33..6fd0dea38a32c 100644
--- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
+++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
@@ -249,6 +249,11 @@ static int __init ls_pcie_ep_probe(struct platform_device 
*pdev)
pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian");
+   /* set 64-bit DMA mask and coherent DMA mask */
+   if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
+   if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)))


As stated in [1], dma_set_mask() with a 64-bit mask will never
fail if dev->dma_mask is non-NULL.

So, if it fails, the 32 bits case will also fail for the same reason.
There is no need for the 2nd test.


See [1] for Christoph Hellwig comment about it.


I don't think it is true. the below is dma_set_mask()'s implementation


I'll try to recollect a more detailled explanation from Christoph.

I also checked all paths some times ago, and the conclusion was that if 
dma_set_mask(64) failed, dma_set_mask(32) would fail for the exact same 
reasons.


I'll try to find the corresponding mail and come back to you.

I don't thing that implementation details have changed since that times, 
so the conclusion should still be valid.


Adding Christoph in cc, if he wants to give another look at it, or if he 
beats me finding the 1 or 2 years old mails.


CJ



int dma_set_mask(struct device *dev, u64 mask)
{
/*
 * Truncate the mask to the actually supported dma_addr_t width to
 * avoid generating unsupportable addresses.
 */
mask = (dma_addr_t)mask;

if (!dev->dma_mask || !dma_supported(dev, mask))
^^^
return -EIO;

arch_dma_set_mask(dev, mask);
*dev->dma_mask = mask;
return 0;
}

dma_supported() may return failiure.

static int dma_supported(struct device *dev, u64 mask)
{
const struct dma_map_ops *ops = get_dma_ops(dev);

/*
 * ->dma_supported sets the bypass flag, so we must always call
 * into the method here unless the device is truly direct mapped.
 */
if (!ops)
return dma_direct_supported(dev, mask);
if (!ops->dma_supported)
return 1;
return ops->dma_supported(dev, mask);
 ^^
DMA driver or IOMMU driver may return failure.
}

  
Frank




CJ


[1]: https://lkml.org/lkml/2021/6/7/398


+   return -EIO;
+
platform_set_drvdata(pdev, pcie);
ret = dw_pcie_ep_init(&pci->ep);








Re: [RFC PATCH v12 14/33] KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory

2023-09-21 Thread Sean Christopherson
On Wed, Sep 13, 2023, Sean Christopherson wrote:
>  virt/kvm/guest_mem.c   | 593 +

Getting to the really important stuff...

Anyone object to naming the new file guest_memfd.c instead of guest_mem.c?  Just
the file, i.e. still keep the gmem namespace.

Using guest_memfd.c would make it much more obvious that the file holds more 
than
generic "guest memory" APIs, and would provide a stronger conceptual connection
with memfd.c.


Re: [PATCH v1 2/8] powerpc: hugetlb: Convert set_huge_pte_at() to take vma

2023-09-21 Thread Christophe Leroy


Le 21/09/2023 à 18:20, Ryan Roberts a écrit :
> In order to fix a bug, arm64 needs access to the vma inside it's
> implementation of set_huge_pte_at(). Provide for this by converting the
> mm parameter to be a vma. Any implementations that require the mm can
> access it via vma->vm_mm.
> 
> This commit makes the required powerpc modifications. Separate commits
> update the other arches and core code, before the actual bug is fixed in
> arm64.
> 
> No behavioral changes intended.

This patch doesn't build, allthough I have also applied patch 1. Is 
something missing ?

   CALLscripts/checksyscalls.sh
   CC  arch/powerpc/kernel/setup-common.o
In file included from arch/powerpc/kernel/setup-common.c:37:
./include/linux/hugetlb.h: In function 'huge_ptep_modify_prot_commit':
./include/linux/hugetlb.h:987:28: error: passing argument 1 of 
'set_huge_pte_at' from incompatible pointer type 
[-Werror=incompatible-pointer-types]
   987 | set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
   | ~~~^~~
   ||
   |struct mm_struct *
In file included from ./arch/powerpc/include/asm/hugetlb.h:13,
  from ./include/linux/hugetlb.h:815:
./arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h:49:45: note: expected 
'struct vm_area_struct *' but argument is of type 'struct mm_struct *'
49 | void set_huge_pte_at(struct vm_area_struct *vma, unsigned long 
addr, pte_t *ptep, pte_t pte);
   |  ~~~^~~
cc1: all warnings being treated as errors
make[4]: *** [scripts/Makefile.build:243: 
arch/powerpc/kernel/setup-common.o] Error 1
make[4]: Target 'arch/powerpc/kernel/' not remade because of errors.
make[3]: *** [scripts/Makefile.build:480: arch/powerpc/kernel] Error 2
   CC  arch/powerpc/mm/fault.o
In file included from arch/powerpc/mm/fault.c:33:
./include/linux/hugetlb.h: In function 'huge_ptep_modify_prot_commit':
./include/linux/hugetlb.h:987:28: error: passing argument 1 of 
'set_huge_pte_at' from incompatible pointer type 
[-Werror=incompatible-pointer-types]
   987 | set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
   | ~~~^~~
   ||
   |struct mm_struct *
In file included from ./arch/powerpc/include/asm/hugetlb.h:13,
  from ./include/linux/hugetlb.h:815:
./arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h:49:45: note: expected 
'struct vm_area_struct *' but argument is of type 'struct mm_struct *'
49 | void set_huge_pte_at(struct vm_area_struct *vma, unsigned long 
addr, pte_t *ptep, pte_t pte);
   |  ~~~^~~
cc1: all warnings being treated as errors
make[4]: *** [scripts/Makefile.build:243: arch/powerpc/mm/fault.o] Error 1
   CC  arch/powerpc/mm/pgtable.o
In file included from arch/powerpc/mm/pgtable.c:25:
./include/linux/hugetlb.h: In function 'huge_ptep_modify_prot_commit':
./include/linux/hugetlb.h:987:28: error: passing argument 1 of 
'set_huge_pte_at' from incompatible pointer type 
[-Werror=incompatible-pointer-types]
   987 | set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
   | ~~~^~~
   ||
   |struct mm_struct *
In file included from ./arch/powerpc/include/asm/hugetlb.h:13,
  from ./include/linux/hugetlb.h:815:
./arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h:49:45: note: expected 
'struct vm_area_struct *' but argument is of type 'struct mm_struct *'
49 | void set_huge_pte_at(struct vm_area_struct *vma, unsigned long 
addr, pte_t *ptep, pte_t pte);
   |  ~~~^~~
cc1: all warnings being treated as errors
make[4]: *** [scripts/Makefile.build:243: arch/powerpc/mm/pgtable.o] Error 1
   CC  arch/powerpc/mm/init_32.o
In file included from arch/powerpc/mm/init_32.c:30:
./include/linux/hugetlb.h: In function 'huge_ptep_modify_prot_commit':
./include/linux/hugetlb.h:987:28: error: passing argument 1 of 
'set_huge_pte_at' from incompatible pointer type 
[-Werror=incompatible-pointer-types]
   987 | set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
   | ~~~^~~
   ||
   |struct mm_struct *
In file included from ./arch/powerpc/include/asm/hugetlb.h:13,
  from ./include/linux/hugetlb.h:815:
./arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h:49:45: note: expected 
'struct vm_area_struct *' but argument is of type 'struct mm_struct *'
49 | void set_huge_pte_at(struct vm_area_struct *vma, unsigned long 
addr, pte_t *ptep, pte_t pte);
   |  ~~~^~~
cc1: all warnings being treated as errors
make[4]: *** [scripts/Makefile.build:243: arch/powerpc/mm/init_32.o] Error 1
   CC  a

Re: [PATCH 1/1] PCI: layerscape-ep: set 64-bit DMA mask

2023-09-21 Thread Frank Li
On Thu, Sep 21, 2023 at 07:59:51PM +0200, Christophe JAILLET wrote:
> Le 21/09/2023 à 17:37, Frank Li a écrit :
> > From: Guanhua Gao 
> > 
> > Set DMA mask and coherent DMA mask to enable 64-bit addressing.
> > 
> > Signed-off-by: Guanhua Gao 
> > Signed-off-by: Hou Zhiqiang 
> > Signed-off-by: Frank Li 
> > ---
> >   drivers/pci/controller/dwc/pci-layerscape-ep.c | 5 +
> >   1 file changed, 5 insertions(+)
> > 
> > diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c 
> > b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > index de4c1758a6c33..6fd0dea38a32c 100644
> > --- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > +++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
> > @@ -249,6 +249,11 @@ static int __init ls_pcie_ep_probe(struct 
> > platform_device *pdev)
> > pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian");
> > +   /* set 64-bit DMA mask and coherent DMA mask */
> > +   if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
> > +   if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)))
> 
> As stated in [1], dma_set_mask() with a 64-bit mask will never
> fail if dev->dma_mask is non-NULL.
> 
> So, if it fails, the 32 bits case will also fail for the same reason.
> There is no need for the 2nd test.
> 
> 
> See [1] for Christoph Hellwig comment about it.

I don't think it is true. the below is dma_set_mask()'s implementation

int dma_set_mask(struct device *dev, u64 mask)
{
/*
 * Truncate the mask to the actually supported dma_addr_t width to
 * avoid generating unsupportable addresses.
 */
mask = (dma_addr_t)mask;

if (!dev->dma_mask || !dma_supported(dev, mask))
^^^
return -EIO;

arch_dma_set_mask(dev, mask);
*dev->dma_mask = mask;
return 0;
}

dma_supported() may return failiure.

static int dma_supported(struct device *dev, u64 mask)
{
const struct dma_map_ops *ops = get_dma_ops(dev);

/*
 * ->dma_supported sets the bypass flag, so we must always call
 * into the method here unless the device is truly direct mapped.
 */
if (!ops)
return dma_direct_supported(dev, mask);
if (!ops->dma_supported)
return 1;
return ops->dma_supported(dev, mask);
^^
DMA driver or IOMMU driver may return failure.
}

 
Frank

> 
> CJ
> 
> 
> [1]: https://lkml.org/lkml/2021/6/7/398
> 
> > +   return -EIO;
> > +
> > platform_set_drvdata(pdev, pcie);
> > ret = dw_pcie_ep_init(&pci->ep);
> 


Re: [PATCH 1/1] PCI: layerscape-ep: set 64-bit DMA mask

2023-09-21 Thread Christophe JAILLET

Le 21/09/2023 à 17:37, Frank Li a écrit :

From: Guanhua Gao 

Set DMA mask and coherent DMA mask to enable 64-bit addressing.

Signed-off-by: Guanhua Gao 
Signed-off-by: Hou Zhiqiang 
Signed-off-by: Frank Li 
---
  drivers/pci/controller/dwc/pci-layerscape-ep.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c 
b/drivers/pci/controller/dwc/pci-layerscape-ep.c
index de4c1758a6c33..6fd0dea38a32c 100644
--- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
+++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
@@ -249,6 +249,11 @@ static int __init ls_pcie_ep_probe(struct platform_device 
*pdev)
  
  	pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian");
  
+	/* set 64-bit DMA mask and coherent DMA mask */

+   if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
+   if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)))


As stated in [1], dma_set_mask() with a 64-bit mask will never
fail if dev->dma_mask is non-NULL.

So, if it fails, the 32 bits case will also fail for the same reason.
There is no need for the 2nd test.


See [1] for Christoph Hellwig comment about it.

CJ


[1]: https://lkml.org/lkml/2021/6/7/398


+   return -EIO;
+
platform_set_drvdata(pdev, pcie);
  
  	ret = dw_pcie_ep_init(&pci->ep);




Re: [PATCH v1 0/8] Fix set_huge_pte_at() panic on arm64

2023-09-21 Thread Catalin Marinas
On Thu, Sep 21, 2023 at 05:35:54PM +0100, Ryan Roberts wrote:
> On 21/09/2023 17:30, Andrew Morton wrote:
> > On Thu, 21 Sep 2023 17:19:59 +0100 Ryan Roberts  
> > wrote:
> >> Ryan Roberts (8):
> >>   parisc: hugetlb: Convert set_huge_pte_at() to take vma
> >>   powerpc: hugetlb: Convert set_huge_pte_at() to take vma
> >>   riscv: hugetlb: Convert set_huge_pte_at() to take vma
> >>   s390: hugetlb: Convert set_huge_pte_at() to take vma
> >>   sparc: hugetlb: Convert set_huge_pte_at() to take vma
> >>   mm: hugetlb: Convert set_huge_pte_at() to take vma
> >>   arm64: hugetlb: Convert set_huge_pte_at() to take vma
> >>   arm64: hugetlb: Fix set_huge_pte_at() to work with all swap entries
> >>
> >>  arch/arm64/include/asm/hugetlb.h  |  2 +-
> >>  arch/arm64/mm/hugetlbpage.c   | 22 --
> >>  arch/parisc/include/asm/hugetlb.h |  2 +-
> >>  arch/parisc/mm/hugetlbpage.c  |  4 +--
> >>  .../include/asm/nohash/32/hugetlb-8xx.h   |  3 +-
> >>  arch/powerpc/mm/book3s64/hugetlbpage.c|  2 +-
> >>  arch/powerpc/mm/book3s64/radix_hugetlbpage.c  |  2 +-
> >>  arch/powerpc/mm/nohash/8xx.c  |  2 +-
> >>  arch/powerpc/mm/pgtable.c |  7 -
> >>  arch/riscv/include/asm/hugetlb.h  |  2 +-
> >>  arch/riscv/mm/hugetlbpage.c   |  3 +-
> >>  arch/s390/include/asm/hugetlb.h   |  8 +++--
> >>  arch/s390/mm/hugetlbpage.c|  8 -
> >>  arch/sparc/include/asm/hugetlb.h  |  8 +++--
> >>  arch/sparc/mm/hugetlbpage.c   |  8 -
> >>  include/asm-generic/hugetlb.h |  6 ++--
> >>  include/linux/hugetlb.h   |  6 ++--
> >>  mm/damon/vaddr.c  |  2 +-
> >>  mm/hugetlb.c  | 30 +--
> >>  mm/migrate.c  |  2 +-
> >>  mm/rmap.c | 10 +++
> >>  mm/vmalloc.c  |  5 +++-
> >>  22 files changed, 80 insertions(+), 64 deletions(-)
> > 
> > Looks scary but it's actually a fairly modest patchset.  It could
> > easily be all rolled into a single patch for ease of backporting. 
> > Maybe Greg has an opinion?
> 
> Yes, I thought about doing that; or perhaps 2 patches - one for the interface
> change across all arches and core code, and one for the actual bug fix?

I think this would make more sense, especially if we want to backport
it. The first patch would have no functional change, only an interface
change, followed by the arm64 fix.

-- 
Catalin


Re: [PATCH v1 0/8] Fix set_huge_pte_at() panic on arm64

2023-09-21 Thread Andrew Morton
On Thu, 21 Sep 2023 17:19:59 +0100 Ryan Roberts  wrote:

> Hi All,
> 
> This series fixes a bug in arm64's implementation of set_huge_pte_at(), which
> can result in an unprivileged user causing a kernel panic. The problem was
> triggered when running the new uffd poison mm selftest for HUGETLB memory. 
> This
> test (and the uffd poison feature) was merged for v6.6-rc1. However, upon
> inspection there are multiple other pre-existing paths that can trigger this
> bug.
> 
> Ideally, I'd like to get this fix in for v6.6 if possible? And I guess it 
> should
> be backported too, given there are call sites where this can theoretically
> happen that pre-date v6.6-rc1 (I've cc'ed sta...@vger.kernel.org).

This gets you a naggygram from Greg.  The way to request a backport is
to add cc:stable to all the changelogs.  I'll make that change to my copy.


> Ryan Roberts (8):
>   parisc: hugetlb: Convert set_huge_pte_at() to take vma
>   powerpc: hugetlb: Convert set_huge_pte_at() to take vma
>   riscv: hugetlb: Convert set_huge_pte_at() to take vma
>   s390: hugetlb: Convert set_huge_pte_at() to take vma
>   sparc: hugetlb: Convert set_huge_pte_at() to take vma
>   mm: hugetlb: Convert set_huge_pte_at() to take vma
>   arm64: hugetlb: Convert set_huge_pte_at() to take vma
>   arm64: hugetlb: Fix set_huge_pte_at() to work with all swap entries
> 
>  arch/arm64/include/asm/hugetlb.h  |  2 +-
>  arch/arm64/mm/hugetlbpage.c   | 22 --
>  arch/parisc/include/asm/hugetlb.h |  2 +-
>  arch/parisc/mm/hugetlbpage.c  |  4 +--
>  .../include/asm/nohash/32/hugetlb-8xx.h   |  3 +-
>  arch/powerpc/mm/book3s64/hugetlbpage.c|  2 +-
>  arch/powerpc/mm/book3s64/radix_hugetlbpage.c  |  2 +-
>  arch/powerpc/mm/nohash/8xx.c  |  2 +-
>  arch/powerpc/mm/pgtable.c |  7 -
>  arch/riscv/include/asm/hugetlb.h  |  2 +-
>  arch/riscv/mm/hugetlbpage.c   |  3 +-
>  arch/s390/include/asm/hugetlb.h   |  8 +++--
>  arch/s390/mm/hugetlbpage.c|  8 -
>  arch/sparc/include/asm/hugetlb.h  |  8 +++--
>  arch/sparc/mm/hugetlbpage.c   |  8 -
>  include/asm-generic/hugetlb.h |  6 ++--
>  include/linux/hugetlb.h   |  6 ++--
>  mm/damon/vaddr.c  |  2 +-
>  mm/hugetlb.c  | 30 +--
>  mm/migrate.c  |  2 +-
>  mm/rmap.c | 10 +++
>  mm/vmalloc.c  |  5 +++-
>  22 files changed, 80 insertions(+), 64 deletions(-)

Looks scary but it's actually a fairly modest patchset.  It could
easily be all rolled into a single patch for ease of backporting. 
Maybe Greg has an opinion?



Re: [GIT PULL] Please pull powerpc/linux.git powerpc-6.6-2 tag

2023-09-21 Thread pr-tracker-bot
The pull request you sent on Thu, 21 Sep 2023 19:45:21 +1000:

> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
> tags/powerpc-6.6-2

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/7bdfc1af0a5af34b3c9620a2023d2ea00fd77b57

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html


[PATCH 1/1] PCI: layerscape-ep: set 64-bit DMA mask

2023-09-21 Thread Frank Li
From: Guanhua Gao 

Set DMA mask and coherent DMA mask to enable 64-bit addressing.

Signed-off-by: Guanhua Gao 
Signed-off-by: Hou Zhiqiang 
Signed-off-by: Frank Li 
---
 drivers/pci/controller/dwc/pci-layerscape-ep.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c 
b/drivers/pci/controller/dwc/pci-layerscape-ep.c
index de4c1758a6c33..6fd0dea38a32c 100644
--- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
+++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
@@ -249,6 +249,11 @@ static int __init ls_pcie_ep_probe(struct platform_device 
*pdev)
 
pcie->big_endian = of_property_read_bool(dev->of_node, "big-endian");
 
+   /* set 64-bit DMA mask and coherent DMA mask */
+   if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
+   if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)))
+   return -EIO;
+
platform_set_drvdata(pdev, pcie);
 
ret = dw_pcie_ep_init(&pci->ep);
-- 
2.34.1



Re: [RFC PATCH v12 18/33] KVM: x86/mmu: Handle page fault for private memory

2023-09-21 Thread Sean Christopherson
On Mon, Sep 18, 2023, Yan Zhao wrote:
> On Fri, Sep 15, 2023 at 07:26:16AM -0700, Sean Christopherson wrote:
> > On Fri, Sep 15, 2023, Yan Zhao wrote:
> > > >  static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct 
> > > > kvm_page_fault *fault)
> > > >  {
> > > > struct kvm_memory_slot *slot = fault->slot;
> > > > @@ -4293,6 +4356,14 @@ static int __kvm_faultin_pfn(struct kvm_vcpu 
> > > > *vcpu, struct kvm_page_fault *fault
> > > > return RET_PF_EMULATE;
> > > > }
> > > >  
> > > > +   if (fault->is_private != kvm_mem_is_private(vcpu->kvm, 
> > > > fault->gfn)) {
> > > In patch 21,
> > > fault->is_private is set as:
> > >   ".is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT)",
> > > then, the inequality here means memory attribute has been updated after
> > > last check.
> > > So, why an exit to user space for converting is required instead of a 
> > > mere retry?
> > > 
> > > Or, is it because how .is_private is assigned in patch 21 is subjected to 
> > > change
> > > in future? 
> > 
> > This.  Retrying on SNP or TDX would hang the guest.  I suppose we could 
> > special
> Is this because if the guest access a page in private way (e.g. via
> private key in TDX), the returned page must be a private page?

Yes, the returned page must be private, because the GHCI (TDX) and GHCB (SNP)
require that the host allow implicit conversions.  I.e. if the guest accesses
memory as private (or shared), then the host must map memory as private (or 
shared).
Simply resuming the guest will not change the guest access, nor will it change 
KVM's
memory attributes.

Ideally (IMO), implicit conversions would be disallowed, but even if implicit
conversions weren't a thing, retrying would still be wrong as KVM would either
inject an exception into the guest or exit to userspace to let userspace handle
the illegal access.

> > case VMs where .is_private is derived from the memory attributes, but the
> > SW_PROTECTED_VM type is primary a development vehicle at this point.  I'd 
> > like to
> > have it mimic SNP/TDX as much as possible; performance is a secondary 
> > concern.
> Ok. But this mimic is somewhat confusing as it may be problematic in below 
> scenario,
> though sane guest should ensure no one is accessing a page before doing memory
> conversion.
> 
> 
> CPU 0   CPU 1
> access GFN A in private way
> fault->is_private=true
> convert GFN A to shared
>   set memory attribute of A to shared
> 
> faultin, mismatch and exit
> set memory attribute of A
> to private
> 
> vCPU access GFN A in shared way
> fault->is_private = true
> faultin, match and map a private PFN B
> 
> vCPU accesses private PFN B in shared way

If this is a TDX or SNP VM, then the private vs. shared information comes from
the guest itself, e.g. this sequence

   vCPU access GFN A in shared way
   fault->is_private = true

cannot happen because is_private will be false based on the error code (SNP) or
the GPA (TDX).

And when hardware doesn't generate page faults based on private vs. shared, i.e.
for non-TDX/SNP VMs, from a fault handling perspective there is no concept of 
the
guest accessing a GFN in a "private way" or a "shared way".  I.e. there are no
implicit conversions.

For SEV and SEV-ES, the guest can access memory as private vs. shared, but the
and the host VMM absolutely must be in agreement and synchronized with respect 
to
the state of a page, otherwise guest memory will be corrupted.  But that has
nothing to do with the fault handling, e.g. creating aliases in the guest to 
access
a single GFN as shared and private from two CPUs will create incoherent cache
entries and/or corrupt data without any involvement from KVM.

In other words, the above isn't possible for TDX/SNP, and for all other types,
the conflict between CPU0 and CPU1 is unequivocally a guest bug.


Re: [RFC PATCH v12 02/33] KVM: Use gfn instead of hva for mmu_notifier_retry

2023-09-21 Thread Sean Christopherson
On Thu, Sep 21, 2023, Xu Yilun wrote:
> When the invalidation acrosses multiple slots, I'm not sure if the
> contiguous HVA range must correspond to contiguous GFN range. If not,
> are we producing a larger range than required?

Multiple invalidations are all but guaranteed to yield a range that covers 
addresses
that aren't actually being invalidated.  This is true today.

> And when the invalidation acrosses multiple address space, I'm almost
> sure it is wrong to merge GFN ranges from different address spaces. 

It's not "wrong" in the sense that false positives do not cause functional
problems, at worst a false positive can unnecessarily stall a vCPU until the
unrelated invalidations complete.

Multiple concurrent invalidations are not common, and if they do happen, they 
are
likely related and will have spacial locality in both host virtual address space
and guest physical address space.  Given that, we chose for the simple (and 
fast!)
approach of maintaining a single all-encompassing range.


Re: [RFC PATCH v4 09/11] media: uapi: Add V4L2_CID_USER_IMX_ASRC_RATIO_MOD control

2023-09-21 Thread Hans Verkuil
On 21/09/2023 13:13, Shengjiu Wang wrote:
> On Thu, Sep 21, 2023 at 3:11 PM Hans Verkuil  wrote:
>>
>> On 21/09/2023 08:55, Shengjiu Wang wrote:
>>> On Wed, Sep 20, 2023 at 6:19 PM Hans Verkuil  wrote:

 On 20/09/2023 11:32, Shengjiu Wang wrote:
> The input clock and output clock may not be the accurate
> rate as the sample rate, there is some drift, so the convert
> ratio of i.MX ASRC module need to be changed according to
> actual clock rate.
>
> Add V4L2_CID_USER_IMX_ASRC_RATIO_MOD control for user to
> adjust the ratio.
>
> Signed-off-by: Shengjiu Wang 
> ---
>  Documentation/userspace-api/media/v4l/control.rst | 5 +
>  drivers/media/v4l2-core/v4l2-ctrls-defs.c | 1 +
>  include/uapi/linux/v4l2-controls.h| 1 +
>  3 files changed, 7 insertions(+)
>
> diff --git a/Documentation/userspace-api/media/v4l/control.rst 
> b/Documentation/userspace-api/media/v4l/control.rst
> index 4463fce694b0..2bc175900a34 100644
> --- a/Documentation/userspace-api/media/v4l/control.rst
> +++ b/Documentation/userspace-api/media/v4l/control.rst
> @@ -318,6 +318,11 @@ Control IDs
>  depending on particular custom controls should check the driver name
>  and version, see :ref:`querycap`.
>
> +.. _v4l2-audio-imx:
> +
> +``V4L2_CID_USER_IMX_ASRC_RATIO_MOD``
> +sets the rasampler ratio modifier of i.MX asrc module.

 rasampler -> resampler (I think?)

 This doesn't document at all what the type of the control is or how to 
 interpret it.

> +
>  Applications can enumerate the available controls with the
>  :ref:`VIDIOC_QUERYCTRL` and
>  :ref:`VIDIOC_QUERYMENU ` ioctls, get and set a
> diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c 
> b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> index 8696eb1cdd61..16f66f66198c 100644
> --- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> +++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> @@ -1242,6 +1242,7 @@ const char *v4l2_ctrl_get_name(u32 id)
>   case V4L2_CID_COLORIMETRY_CLASS:return "Colorimetry 
> Controls";
>   case V4L2_CID_COLORIMETRY_HDR10_CLL_INFO:   return 
> "HDR10 Content Light Info";
>   case V4L2_CID_COLORIMETRY_HDR10_MASTERING_DISPLAY:  return 
> "HDR10 Mastering Display";
> + case V4L2_CID_USER_IMX_ASRC_RATIO_MOD:  return 
> "ASRC RATIO MOD";

 Let's stay consistent with the other control names:

 "ASRC Ratio Modifier"

 But if this is a driver specific control, then this doesn't belong here.

 Driver specific controls are defined in the driver itself, including this
 description.

 Same for the control documentation: if it is driver specific, then that
 typically is documented either in a driver-specific public header, or
 possibly in driver-specific documentation 
 (Documentation/admin-guide/media/).

 But is this imx specific? Wouldn't other similar devices need this?
>>>
>>> It is imx specific.
>>
>> Why? I'm not opposed to this, but I wonder if you looked at datasheets of
>> similar devices from other vendors: would they use something similar?
> 
> I tried to find some datasheets for other vendors, but failed to find them.
> So I don't know how they implement this part.
> 
> Ratio modification on i.MX is to modify the configured ratio.
> For example, the input rate is 44.1kHz,  output rate is 48kHz,
> configured ratio = 441/480,   the ratio modification is to modify
> the fractional part of (441/480) with small steps.  because the
> input clock or output clock has drift in the real hardware.
> The ratio modification is signed value, it is added to configured
> ratio.
> 
> In our case, we have some sysfs interface for user to get the
> clock from input audio device and output audio device, user
> need to calculate the ratio dynamically , then configure the
> modification to driver

So this ratio modifier comes into play when either the audio input
or audio output (or both) are realtime audio inputs/outputs where
the sample rate is not a perfect 44.1 or 48 kHz, but slightly different?

If you would use this resampler to do offline resampling (i.e. resample
a 44.1 kHz wav file to a 48 kHz wav file), then this wouldn't be needed,
correct?

When dealing with realtime audio, userspace will know how to get the
precise sample rate, but that is out-of-scope of this driver. Here
you just need a knob to slightly tweak the resampling ratio.

If my understanding is correct, then I wonder if it is such a good
idea to put the rate into the v4l2_audio_format: it really has nothing
to do with the audio format as it is stored in memory.

What if you would drop that 'rate' field and instead create just a single
control for the resampling ratio. This can use struct v4l2_fract to represent
a fraction. It would be more wor

Re: [PATCH] ASoC: fsl_sai: sw reset consumer on pause/stop

2023-09-21 Thread Fabio Estevam
On Thu, Sep 21, 2023 at 5:48 AM Emil Abildgaard Svendsen
 wrote:
>
> When in consumer mode with BCLK disabled (FSL_SAI_CSR_BCE = 0) the
> FIFO's can still contain data when resumed. It might also be possible
> with BCLK enabled but just less likely.
>
> When the FIFO's still contain data on resume it can cause channel
> shifting on e.g. XRUNS. A Software Reset will reset the FIFO's and make
> sure channels are aligned.
>
> Fixes: 269f399dc19f ("ASoC: fsl_sai: Disable bit clock with transmitter")
> Signed-off-by: Emil Svendsen 

Reviewed-by: Fabio Estevam 


Re: Recent Power changes and stack_trace_save_tsk_reliable?

2023-09-21 Thread Joe Lawrence
On 9/21/23 08:26, Michael Ellerman wrote:
> Petr Mladek  writes:
>> On Wed 2023-08-30 17:47:35, Joe Lawrence wrote:
>>> On 8/30/23 02:37, Michael Ellerman wrote:
 Michael Ellerman  writes:
> Joe Lawrence  writes:
>> We noticed that our kpatch integration tests started failing on ppc64le
>> when targeting the upstream v6.4 kernel, and then confirmed that the
>> in-tree livepatching kselftests similarly fail, too.  From the kselftest
>> results, it appears that livepatch transitions are no longer completing.
> ...

 The diff below fixes it for me, can you test that on your setup?

>>>
>>> Thanks for the fast triage of this one.  The proposed fix works well on
>>> our setup.  I have yet to try the kpatch integration tests with this,
>>> but I can verify that all of the kernel livepatching kselftests now
>>> happily run.
>>
>> Have this been somehow handled, please? I do not see the proposed
>> change in linux-next as of now.
> 
> I thought I was waiting for Joe to run the kpatch integration tests, but
> in hindsight maybe he was hinting that someone else should run them (ie. me) 
> ;)
> 
> Patch incoming.

Ah sorry for the confusion.  kpatch integration tests - that's on me.
If kernel stack unwinding is fixed, I'm pretty confident they will
execute.  I will kick them off today, but don't let that hold up the
kernel patches.

Thanks,
-- 
Joe



RE: Questions: Should kernel panic when PCIe fatal error occurs?

2023-09-21 Thread David Laight
...
I've got a target to generate AER errors by generating read cycles
that are inside the address range that the bridge forwards but
outside of any BAR because there are 2 different sized BARs.
(Pretty easy to setup.)
On the system I was using they didn't get propagated all the way
to the root bridge - but were visible in the lower bridge.
It would be nice for a driver to be able to detect/clear such
a flag if it gets an unexpected ~0u read value.
(I'm not sure an error callback helps.)

OTOH a 'nebs compliant' server routed any kind of PCIe link error
through to some 'system management' logic that then raised an NMI.
I'm not sure who thought an NMI was a good idea - they are pretty
impossible to handle in the kernel and too late to be of use to
the code performing the access.

In any case we were getting one after 'echo 1 >xxx/remove' and
then taking the PCIe link down by reprogramming the fpga.
So the link going down was entirely expected, but there seemed
to be nothing we could do to stop the kernel crashing.

I'm sure 'nebs compliant' ought to contain some requirements for
resilience to hardware failures!

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)


Re: [PATCH v2 1/2] powerpc: add `cur_cpu_spec` symbol to vmcoreinfo

2023-09-21 Thread Michael Ellerman
"Aneesh Kumar K.V"  writes:
> Aditya Gupta  writes:
>
>> Since below commit, address mapping for vmemmap has changed for Radix
>> MMU, where address mapping is stored in kernel page table itself,
>> instead of earlier used 'vmemmap_list'.
>>
>> commit 368a0590d954 ("powerpc/book3s64/vmemmap: switch radix to use
>> a different vmemmap handling function")
>>
>> Hence with upstream kernel, in case of Radix MMU, makedumpfile fails to do
>> address translation for vmemmap addresses, as it depended on vmemmap_list,
>> which can now be empty.
>>
>> While fixing the address translation in makedumpfile, it was identified
>> that currently makedumpfile cannot distinguish between Hash MMU and
>> Radix MMU, unless VMLINUX is passed with -x flag to makedumpfile.
>> And hence fails to assign offsets and shifts correctly (such as in L4 to
>> PGDIR offset calculation in makedumpfile).
>>
>> For getting the MMU, makedumpfile uses `cur_cpu_spec.mmu_features`.
>>
>> Add `cur_cpu_spec` symbol and offset of `mmu_features` in the
>> `cpu_spec` struct, to VMCOREINFO, so that makedumpfile can assign the
>> offsets correctly, without needing a VMLINUX.
>>
>> Fixes: 368a0590d954 ("powerpc/book3s64/vmemmap: switch radix to use a 
>> different vmemmap handling function")
>> Reported-by: Sachin Sant 
>> Tested-by: Sachin Sant 
>> Signed-off-by: Aditya Gupta 
>>
>> ---
>> Corresponding makedumpfile patches to fix address translation, in Radix
>> MMU case:
>>
>> Link: 
>> https://lore.kernel.org/kexec/b5f0f00e-f2b1-47d7-a143-5683d10dc...@linux.ibm.com/T/#t
>> ---
>> ---
>>  arch/powerpc/kexec/core.c | 2 ++
>>  1 file changed, 2 insertions(+)
>>
>> diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
>> index de64c7962991..369b8334a4f0 100644
>> --- a/arch/powerpc/kexec/core.c
>> +++ b/arch/powerpc/kexec/core.c
>> @@ -63,6 +63,8 @@ void arch_crash_save_vmcoreinfo(void)
>>  #ifndef CONFIG_NUMA
>>  VMCOREINFO_SYMBOL(contig_page_data);
>>  #endif
>> +VMCOREINFO_SYMBOL(cur_cpu_spec);
>> +VMCOREINFO_OFFSET(cpu_spec, mmu_features);
>>  #if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
>>  VMCOREINFO_SYMBOL(vmemmap_list);
>>  VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
>>
>
> That implies we now have to be careful when updating MMU_FTR_* #defines.
 
Yeah, that's a good point.

> It is not bad considering other hacks we do in crash to identify kernel
> changes tied to version number. But i am wondering if there another way
> to identify radix vs hash?

Ultimately crash just has to accept that the kernel will change over
time, and some times crash will have to adapt.

We can try not to change values unnecessarily, but given the knowledge
of kernel internals crash has there will be breakage from time to time.

The only other alternative I think is to create a well defined data
structure that is explicitly provided by the kernel for crash with
various information in a set format. With a committment that the data
structure will be maintained in a forward-compatible manner. Which
sounds like a bunch of work :)

cheers


Re: [PATCH v5 3/4] arch/*/io.h: remove ioremap_uc in some architectures

2023-09-21 Thread Arnd Bergmann
On Thu, Sep 21, 2023, at 07:04, Baoquan He wrote:
> ioremap_uc() is only meaningful on old x86-32 systems with the PAT
> extension, and on ia64 with its slightly unconventional ioremap()
> behavior. So remove the ioremap_uc() definition in architecutures
> other than x86 and ia64. These architectures all have asm-generic/io.h
> included and will have the default ioremap_uc() definition which
> returns NULL.
>
> This changes the existing behaviour, while no need to worry about
> any breakage because in the only callsite of ioremap_uc(), code
> has been adjusted to eliminate the impact. Please see
> atyfb_setup_generic() of drivers/video/fbdev/aty/atyfb_base.c.
>
> If any new invocation of ioremap_uc() need be added, please consider
> using ioremap() intead or adding a ARCH specific version if necessary.
>
> Signed-off-by: Baoquan He 
> Acked-by: Geert Uytterhoeven 
> Acked-by: Michael Ellerman  (powerpc)
> Acked-by: Helge Deller   # parisc
> Cc: linux-al...@vger.kernel.org
> Cc: linux-hexa...@vger.kernel.org
> Cc: linux-m...@lists.linux-m68k.org
> Cc: linux-m...@vger.kernel.org
> Cc: linux-par...@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux...@vger.kernel.org
> Cc: sparcli...@vger.kernel.org

Acked-by: Arnd Bergmann 


Re: Recent Power changes and stack_trace_save_tsk_reliable?

2023-09-21 Thread Michael Ellerman
Petr Mladek  writes:
> On Wed 2023-08-30 17:47:35, Joe Lawrence wrote:
>> On 8/30/23 02:37, Michael Ellerman wrote:
>> > Michael Ellerman  writes:
>> >> Joe Lawrence  writes:
>> >>> We noticed that our kpatch integration tests started failing on ppc64le
>> >>> when targeting the upstream v6.4 kernel, and then confirmed that the
>> >>> in-tree livepatching kselftests similarly fail, too.  From the kselftest
>> >>> results, it appears that livepatch transitions are no longer completing.
...
>> > 
>> > The diff below fixes it for me, can you test that on your setup?
>> > 
>> 
>> Thanks for the fast triage of this one.  The proposed fix works well on
>> our setup.  I have yet to try the kpatch integration tests with this,
>> but I can verify that all of the kernel livepatching kselftests now
>> happily run.
>
> Have this been somehow handled, please? I do not see the proposed
> change in linux-next as of now.

I thought I was waiting for Joe to run the kpatch integration tests, but
in hindsight maybe he was hinting that someone else should run them (ie. me) ;)

Patch incoming.

cheers


Re: Questions: Should kernel panic when PCIe fatal error occurs?

2023-09-21 Thread Shuai Xue


On 2023/9/21 07:02, Bjorn Helgaas wrote:
> On Mon, Sep 18, 2023 at 05:39:58PM +0800, Shuai Xue wrote:
>> Hi, all folks,
>>
>> Error reporting and recovery are one of the important features of PCIe, and
>> the kernel has been supporting them since version 2.6, 17 years ago.
>> I am very curious about the expected behavior of the software.
>> I first recap the error classification and then list my questions bellow it.
>>
>> ## Recap: Error classification
>>
>> - Fatal Errors
>>
>> Fatal errors are uncorrectable error conditions which render the particular
>> Link and related hardware unreliable. For Fatal errors, a reset of the
>> components on the Link may be required to return to reliable operation.
>> Platform handling of Fatal errors, and any efforts to limit the effects of
>> these errors, is platform implementation specific. (PCIe 6.0.1, sec
>> 6.2.2.2.1 Fatal Errors).
>>
>> - Non-Fatal Errors
>>
>> Non-fatal errors are uncorrectable errors which cause a particular
>> transaction to be unreliable but the Link is otherwise fully functional.
>> Isolating Non-fatal from Fatal errors provides Requester/Receiver logic in
>> a device or system management software the opportunity to recover from the
>> error without resetting the components on the Link and disturbing other
>> transactions in progress. Devices not associated with the transaction in
>> error are not impacted by the error.  (PCIe 6.0.1, sec 6.2.2.2.1 Non-Fatal
>> Errors).
>>
>> ## What the kernel do?
>>
>> The Linux kernel supports both the OS native and firmware first modes in
>> AER and DPC drivers. The error recovery API is defined in `struct
>> pci_error_handlers`, and the recovery process is performed in several
>> stages in pcie_do_recovery(). One main difference in handling PCIe errors
>> is that the kernel only resets the link when a fatal error is detected.
>>
>> ## Questions
>>
>> 1. Should kernel panic when fatal errors occur without AER recovery?
>>
>> IMHO, the answer is NO. The AER driver handles both fatal and
>> non-fatal errors, and I have not found any panic changes in the
>> recovery path in OS native mode.
>>
>> As far as I know, on many X86 platforms, struct
>> `acpi_hest_generic_status::error_severity` is set as CPER_SEV_FATAL
>> in firmware first mode. As a result, kernel will panic immediately
>> in ghes_proc() when fatal AER errors occur, and there is no chance
>> to handle the error and perform recovery in AER driver.
> 
> UEFI r2.10, sec N.2.1,, defines CPER_SEV_FATAL, and platform firmware
> decides which Error Severity to put in the error record.  I don't see
> anything in UEFI about how the OS should handle fatal errors.
> 
> ACPI r6.5, sec 18.1, says on fatal uncorrected error, the system
> should be restarted to prevent propagation of the error.  For
> CPER_SEV_FATAL errors, it looks like ghes_proc() panics even before
> trying AER recovery.
> 
> I guess your point is that for CPER_SEV_FATAL errors, the APEI/GHES
> path always panics but the native path never does, and that maybe both
> paths should work the same way?

Yes, exactly. Both OS native and APEI/GHES firmware first are notifications
used to handles PCIe AER errors, and IMHO, they should ideally work in the
same way.

> 
> It would be nice if they worked the same, but I suspect that vendors
> may rely on the fact that CPER_SEV_FATAL forces a restart/panic as
> part of their system integrity story.


As far I I know, vendor use CPER_SEV_FATAL to report default fatal PCIe error

- Data Link Protocol Error
- Surprise Down Error
- Flow Control Protocol Error Severity
- Receiver Overflow Severity
- Malformed TLP Severity
- Uncorrectable Internal Error Severity
- IDE Check Failed Severity
(per PCIe r6.0 7.8.4.4 Uncorrectable Error Severity Register (Offset 0Ch))

which forces a panic.

As a result, AER driver only does recovery for non-fatal PCIe error.

> 
> It doesn't seem like the native path should always panic.  If we can
> tell that data was corrupted, we may want to panic, but otherwise I
> don't think we should crash the entire system even if some device is
> permanently broken.

Got it. But how can we tell if the data is corrupted with OS native?

> 
>> For fatal and non-fatal errors, struct
>> `acpi_hest_generic_status::error_severity` should as
>> CPER_SEV_RECOVERABLE, and struct
>> `acpi_hest_generic_data::error_severity` should reflect its real
>> severity. Then, the kernel is equivalent to handling PCIe errors in
>> Firmware first mode as it does in OS native mode.  Please correct me
>> if I am wrong.
> 
> I don't know enough to comment on how Error Severity should be used in
> the Generic Error Status Block vs the Generic Error Data Entry.

The APEI/UEFI standardized a means for a computer platform to convey error
information to OSPM. I think the problem here is that CPER_SEV_FATAL is 
ambiguous.
We can not tell that the data is corrupted or it just a PCIe fatal error?

> 
>> However, I have changed my mind on this issue as I encounter a case 

Re: [RFC PATCH v4 09/11] media: uapi: Add V4L2_CID_USER_IMX_ASRC_RATIO_MOD control

2023-09-21 Thread Mark Brown
On Thu, Sep 21, 2023 at 07:13:14PM +0800, Shengjiu Wang wrote:

> Ratio modification on i.MX is to modify the configured ratio.
> For example, the input rate is 44.1kHz,  output rate is 48kHz,
> configured ratio = 441/480,   the ratio modification is to modify
> the fractional part of (441/480) with small steps.  because the
> input clock or output clock has drift in the real hardware.
> The ratio modification is signed value, it is added to configured
> ratio.

It does sound like something other vendors are likely to have to provide
a mechanism to compensate for clock inaccuracies.


signature.asc
Description: PGP signature


Re: [RFC PATCH v4 09/11] media: uapi: Add V4L2_CID_USER_IMX_ASRC_RATIO_MOD control

2023-09-21 Thread Shengjiu Wang
On Thu, Sep 21, 2023 at 3:11 PM Hans Verkuil  wrote:
>
> On 21/09/2023 08:55, Shengjiu Wang wrote:
> > On Wed, Sep 20, 2023 at 6:19 PM Hans Verkuil  wrote:
> >>
> >> On 20/09/2023 11:32, Shengjiu Wang wrote:
> >>> The input clock and output clock may not be the accurate
> >>> rate as the sample rate, there is some drift, so the convert
> >>> ratio of i.MX ASRC module need to be changed according to
> >>> actual clock rate.
> >>>
> >>> Add V4L2_CID_USER_IMX_ASRC_RATIO_MOD control for user to
> >>> adjust the ratio.
> >>>
> >>> Signed-off-by: Shengjiu Wang 
> >>> ---
> >>>  Documentation/userspace-api/media/v4l/control.rst | 5 +
> >>>  drivers/media/v4l2-core/v4l2-ctrls-defs.c | 1 +
> >>>  include/uapi/linux/v4l2-controls.h| 1 +
> >>>  3 files changed, 7 insertions(+)
> >>>
> >>> diff --git a/Documentation/userspace-api/media/v4l/control.rst 
> >>> b/Documentation/userspace-api/media/v4l/control.rst
> >>> index 4463fce694b0..2bc175900a34 100644
> >>> --- a/Documentation/userspace-api/media/v4l/control.rst
> >>> +++ b/Documentation/userspace-api/media/v4l/control.rst
> >>> @@ -318,6 +318,11 @@ Control IDs
> >>>  depending on particular custom controls should check the driver name
> >>>  and version, see :ref:`querycap`.
> >>>
> >>> +.. _v4l2-audio-imx:
> >>> +
> >>> +``V4L2_CID_USER_IMX_ASRC_RATIO_MOD``
> >>> +sets the rasampler ratio modifier of i.MX asrc module.
> >>
> >> rasampler -> resampler (I think?)
> >>
> >> This doesn't document at all what the type of the control is or how to 
> >> interpret it.
> >>
> >>> +
> >>>  Applications can enumerate the available controls with the
> >>>  :ref:`VIDIOC_QUERYCTRL` and
> >>>  :ref:`VIDIOC_QUERYMENU ` ioctls, get and set a
> >>> diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c 
> >>> b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> >>> index 8696eb1cdd61..16f66f66198c 100644
> >>> --- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> >>> +++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> >>> @@ -1242,6 +1242,7 @@ const char *v4l2_ctrl_get_name(u32 id)
> >>>   case V4L2_CID_COLORIMETRY_CLASS:return "Colorimetry 
> >>> Controls";
> >>>   case V4L2_CID_COLORIMETRY_HDR10_CLL_INFO:   return 
> >>> "HDR10 Content Light Info";
> >>>   case V4L2_CID_COLORIMETRY_HDR10_MASTERING_DISPLAY:  return 
> >>> "HDR10 Mastering Display";
> >>> + case V4L2_CID_USER_IMX_ASRC_RATIO_MOD:  return 
> >>> "ASRC RATIO MOD";
> >>
> >> Let's stay consistent with the other control names:
> >>
> >> "ASRC Ratio Modifier"
> >>
> >> But if this is a driver specific control, then this doesn't belong here.
> >>
> >> Driver specific controls are defined in the driver itself, including this
> >> description.
> >>
> >> Same for the control documentation: if it is driver specific, then that
> >> typically is documented either in a driver-specific public header, or
> >> possibly in driver-specific documentation 
> >> (Documentation/admin-guide/media/).
> >>
> >> But is this imx specific? Wouldn't other similar devices need this?
> >
> > It is imx specific.
>
> Why? I'm not opposed to this, but I wonder if you looked at datasheets of
> similar devices from other vendors: would they use something similar?

I tried to find some datasheets for other vendors, but failed to find them.
So I don't know how they implement this part.

Ratio modification on i.MX is to modify the configured ratio.
For example, the input rate is 44.1kHz,  output rate is 48kHz,
configured ratio = 441/480,   the ratio modification is to modify
the fractional part of (441/480) with small steps.  because the
input clock or output clock has drift in the real hardware.
The ratio modification is signed value, it is added to configured
ratio.

In our case, we have some sysfs interface for user to get the
clock from input audio device and output audio device, user
need to calculate the ratio dynamically , then configure the
modification to driver

May be other vendors has similar implementation. or make
the definition be generic is an option.

best regards
wang shengjiu

>
> And the very short description you gave in the commit log refers to input
> and output clock: how would userspace know those clock frequencies? In
> other words, what information does userspace need in order to set this
> control correctly? And is that information actually available? How would
> you use this control?
>
> I don't really understand how this is supposed to be used.
>
> >
> > Does this mean that I need to create a header file in include/uapi/linux
> > folder to put this definition?  I just hesitate if this is necessary.
>
> Yes, put it there. There are some examples of this already:
>
> include/uapi/linux/aspeed-video.h
> include/uapi/linux/max2175.h
>
> >
> > There is folder Documentation/userspace-api/media/drivers/ for drivers
> > Should this document in this folder, not in the
> > Documentation/admin-guide/media/?
>
> Yes, you are correct. For the headers abov

[PATCH v5 3/4] arch/*/io.h: remove ioremap_uc in some architectures

2023-09-21 Thread Baoquan He
ioremap_uc() is only meaningful on old x86-32 systems with the PAT
extension, and on ia64 with its slightly unconventional ioremap()
behavior. So remove the ioremap_uc() definition in architecutures
other than x86 and ia64. These architectures all have asm-generic/io.h
included and will have the default ioremap_uc() definition which
returns NULL.

This changes the existing behaviour, while no need to worry about
any breakage because in the only callsite of ioremap_uc(), code
has been adjusted to eliminate the impact. Please see
atyfb_setup_generic() of drivers/video/fbdev/aty/atyfb_base.c.

If any new invocation of ioremap_uc() need be added, please consider
using ioremap() intead or adding a ARCH specific version if necessary.

Signed-off-by: Baoquan He 
Acked-by: Geert Uytterhoeven 
Acked-by: Michael Ellerman  (powerpc)
Acked-by: Helge Deller   # parisc
Cc: linux-al...@vger.kernel.org
Cc: linux-hexa...@vger.kernel.org
Cc: linux-m...@lists.linux-m68k.org
Cc: linux-m...@vger.kernel.org
Cc: linux-par...@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux...@vger.kernel.org
Cc: sparcli...@vger.kernel.org
---
 Documentation/driver-api/device-io.rst | 9 +
 arch/alpha/include/asm/io.h| 1 -
 arch/hexagon/include/asm/io.h  | 3 ---
 arch/m68k/include/asm/kmap.h   | 1 -
 arch/mips/include/asm/io.h | 1 -
 arch/parisc/include/asm/io.h   | 2 --
 arch/powerpc/include/asm/io.h  | 1 -
 arch/sh/include/asm/io.h   | 2 --
 arch/sparc/include/asm/io_64.h | 1 -
 9 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/Documentation/driver-api/device-io.rst 
b/Documentation/driver-api/device-io.rst
index 2c7abd234f4e..d55384b106bd 100644
--- a/Documentation/driver-api/device-io.rst
+++ b/Documentation/driver-api/device-io.rst
@@ -408,11 +408,12 @@ functions for details on the CPU side of things.
 ioremap_uc()
 
 
-ioremap_uc() behaves like ioremap() except that on the x86 architecture without
-'PAT' mode, it marks memory as uncached even when the MTRR has designated
-it as cacheable, see Documentation/arch/x86/pat.rst.
+ioremap_uc() is only meaningful on old x86-32 systems with the PAT extension,
+and on ia64 with its slightly unconventional ioremap() behavior, everywhere
+elss ioremap_uc() defaults to return NULL.
 
-Portable drivers should avoid the use of ioremap_uc().
+
+Portable drivers should avoid the use of ioremap_uc(), use ioremap() instead.
 
 ioremap_cache()
 ---
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
index 7aeaf7c30a6f..076f0e4e7f1e 100644
--- a/arch/alpha/include/asm/io.h
+++ b/arch/alpha/include/asm/io.h
@@ -308,7 +308,6 @@ static inline void __iomem *ioremap(unsigned long port, 
unsigned long size)
 }
 
 #define ioremap_wc ioremap
-#define ioremap_uc ioremap
 
 static inline void iounmap(volatile void __iomem *addr)
 {
diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
index e2b308e32a37..b7bc246dbcb1 100644
--- a/arch/hexagon/include/asm/io.h
+++ b/arch/hexagon/include/asm/io.h
@@ -174,9 +174,6 @@ static inline void writel(u32 data, volatile void __iomem 
*addr)
 #define _PAGE_IOREMAP (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
   (__HEXAGON_C_DEV << 6))
 
-#define ioremap_uc(addr, size) ioremap((addr), (size))
-
-
 #define __raw_writel writel
 
 static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
diff --git a/arch/m68k/include/asm/kmap.h b/arch/m68k/include/asm/kmap.h
index 4efb3efa593a..b778f015c917 100644
--- a/arch/m68k/include/asm/kmap.h
+++ b/arch/m68k/include/asm/kmap.h
@@ -25,7 +25,6 @@ static inline void __iomem *ioremap(unsigned long physaddr, 
unsigned long size)
return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
 }
 
-#define ioremap_uc ioremap
 #define ioremap_wt ioremap_wt
 static inline void __iomem *ioremap_wt(unsigned long physaddr,
   unsigned long size)
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 41d8bd5adef8..1ecf255efb40 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -170,7 +170,6 @@ void iounmap(const volatile void __iomem *addr);
  */
 #define ioremap(offset, size)  \
ioremap_prot((offset), (size), _CACHE_UNCACHED)
-#define ioremap_uc ioremap
 
 /*
  * ioremap_cache - map bus memory into CPU space
diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h
index 366537042465..48630c78714a 100644
--- a/arch/parisc/include/asm/io.h
+++ b/arch/parisc/include/asm/io.h
@@ -132,8 +132,6 @@ static inline void gsc_writeq(unsigned long long val, 
unsigned long addr)
 
 #define ioremap_wc(addr, size)  \
ioremap_prot((addr), (size), _PAGE_IOREMAP)
-#define ioremap_uc(addr, size)  \
-   ioremap_prot((addr), (size), _PAGE_IOREMAP)
 
 #define pci_iounmappci_iounmap
 

Re: [RFC v3 1/2] powerpc/cpuidle: cpuidle wakeup latency based on IPI and timer events

2023-09-21 Thread Aboorva Devarajan
On Wed, 2023-09-13 at 08:54 +1000, Michael Ellerman wrote:
> Aboorva Devarajan  writes:
> > From: Pratik R. Sampat 
> > 
> > Introduce a mechanism to fire directed IPIs from a source CPU to a
> > specified target CPU and measure the time incurred on waking up the
> > target CPU in response.
> > 
> > Also, introduce a mechanism to queue a hrtimer on a specified CPU
> > and
> > subsequently measure the time taken to wakeup the CPU.
> > 
> > Define a simple debugfs interface that allows for adjusting the
> > settings to trigger IPI and timer events on a designated CPU, and
> > to
> > observe the resulting cpuidle wakeup latencies.
> > 
> > Reviewed-by: Srikar Dronamraju 
> > Signed-off-by: Pratik R. Sampat 
> > Signed-off-by: Aboorva Devarajan 
> > ---
> >  arch/powerpc/Kconfig.debug |  10 ++
> >  arch/powerpc/kernel/Makefile   |   1 +
> >  arch/powerpc/kernel/test_cpuidle_latency.c | 154
> > +
>   
> I don't see anything here that's powerpc specific?
> 
> Which makes me wonder 1) could this be done with some existing
> generic
> mechanism?, and 2) if not can this test code be made generic.
> 
> At the very least this should be Cc'ed to the cpuidle lists &
> maintainers given it's a test for cpuidle latency :)
> 
> cheers

Hi Michael,

Thanks a lot for taking a look at this.

Yes, this test-case can be used as a generic benchmark for evaluating
CPU idle latencies across different architectures, as it has thus far
been exclusively tested and used on PowerPC, so we thought it would be
more beneficial to incorporate it into a PowerPC specific self-test
suite. But I will work on making it a generic self-test and send across
a v4.



[GIT PULL] Please pull powerpc/linux.git powerpc-6.6-2 tag

2023-09-21 Thread Michael Ellerman
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA256

Hi Linus,

Please pull some powerpc fixes for 6.6:

The following changes since commit ce9ecca0238b140b88f43859b211c9fdfd8e5b70:

  Linux 6.6-rc2 (2023-09-17 14:40:24 -0700)

are available in the git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
tags/powerpc-6.6-2

for you to fetch changes up to c3f4309693758b13fbb34b3741c2e2801ad28769:

  powerpc/dexcr: Move HASHCHK trap handler (2023-09-18 12:23:48 +1000)

- --
powerpc fixes for 6.6 #2

 - A fix for breakpoint handling which was using get_user() while atomic.

 - Fix the Power10 HASHCHK handler which was using get_user() while atomic.

 - A few build fixes for issues caused by recent changes.

Thanks to: Benjamin Gray, Christophe Leroy, Kajol Jain, Naveen N Rao.

- --
Benjamin Gray (4):
  powerpc/watchpoints: Disable preemption in thread_change_pc()
  powerpc/watchpoint: Disable pagefaults when getting user instruction
  powerpc/watchpoints: Annotate atomic context in more places
  powerpc/dexcr: Move HASHCHK trap handler

Christophe Leroy (1):
  powerpc/82xx: Select FSL_SOC

Kajol Jain (1):
  powerpc/perf/hv-24x7: Update domain value check

Naveen N Rao (1):
  powerpc: Fix build issue with LD_DEAD_CODE_DATA_ELIMINATION and 
FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY


 arch/powerpc/Kconfig|  2 +-
 arch/powerpc/kernel/hw_breakpoint.c | 16 +-
 arch/powerpc/kernel/hw_breakpoint_constraints.c |  7 ++-
 arch/powerpc/kernel/traps.c | 56 +---
 arch/powerpc/perf/hv-24x7.c |  2 +-
 arch/powerpc/platforms/82xx/Kconfig |  3 +-
 6 files changed, 60 insertions(+), 26 deletions(-)
-BEGIN PGP SIGNATURE-

iQIzBAEBCAAdFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAmUMEHgACgkQUevqPMjh
pYDDQw/+IuTozHtltaJEigsvcf9htmKoTBDqZqX3io+b8tg4dVnHIcfFlwAzr5XE
u7y0ktoqzldKBs2cOJh6hboafhazE/EdJlXEJP/79RQHf7Qyn2qFrH+627Az0dzi
ApuKpJtWqDyx934U+Xoys8dE8vDssGMD5kdjg0QMS6bp5pIXeJVLvxQUp/S+CQg3
MNcPGyFOXPd23FaXLCXuOL1rO4eyJCpiH5JOlBsod3YY85eyVD6Gpl4Uoi488C5j
fFpqOIKI+FmdA2NkbWxXPhC4j95y65l2qPhjANEfr2GnnKTW37RFmXS8OECleCHU
g8XB5quhNMeBkrRB5ZsmJ88M7IeYRJyYnsV+SKiR/NesiUrlWZKaZUdhiTnjDcVo
xzvLq9ZZjOx1opH23luC6HaXs/XiGD7n190OVw/8oBfklUkkqjhSN1pZXRWLsbL3
duGOFS+VuTI6UO4w8mqzQSrUjptR9UaQng/z5iloxzrbvwlnUrbccKFqf7seuv0c
HLyAbl5tMoGIhHWANn+1U0sMrgJy5L2b4es0cEsN0IxhCpNno+YnrtqVBdhDFfJZ
7TLxw6J8rVz5LN0bjXBwAiknGgi9xobJcIqP/nbG/jMjnCJ8w8XVnnOfZhJFSMEZ
nsQii1wmAuUTzFKJ/73Y+X61P1MYfs8sQOfqgqvLkhRNUI7TrFc=
=NfRO
-END PGP SIGNATURE-


Re: (subset) [PATCH v3 0/3] Kconfig: Add dependencies of POWER_RESET for pmac32

2023-09-21 Thread Michael Ellerman
On Thu, 14 Sep 2023 18:09:55 +0800, Yuan Tan wrote:
> These patches are to add dependencies of POWER_RESET for pmac32.
> 
> As I have to use "savedefconfig" on the latest branch of different
> architectures, I am sending separate patches for each architecture in v3.
> 
> To simplify the enablement of the poweroff support, selecting the
> required options for CONFIG_POWER_RESET=y may make many people happy
> especially when they are using a customized config (maybe tinyconfig
> based) for a target qemu board. Without normal poweroff support from the
> kernel side, qemu will simply hang[1] there after a 'poweroff' command,
> which is a very bad experience for the automatical tests.
> 
> [...]

Patches 2 and 3 applied to powerpc/next.

[2/3] Kconfig: Add dependencies of POWER_RESET for pmac32
  https://git.kernel.org/powerpc/c/a3ef2fef198c25c1d9ac6ff89fd50230e9507207
[3/3] powerpc/config: Simplify pmac32_defconfig
  https://git.kernel.org/powerpc/c/f84b727d132c39c70208503e60149af6dd5a217f

cheers


Re: [PATCH 00/11] add missing of_node_put

2023-09-21 Thread Michael Ellerman
On Thu, 07 Sep 2023 11:55:10 +0200, Julia Lawall wrote:
> Add of_node_put on a break out of an of_node loop.
> 

Patches 3 and 6 applied to powerpc/next.

[03/11] powerpc/powermac: add missing of_node_put

https://git.kernel.org/powerpc/c/a59e9eb25216eb1dc99e14fc31b76aa648d79540
[06/11] powerpc/kexec_file: add missing of_node_put

https://git.kernel.org/powerpc/c/06b627c1236216ac1239c5e1afcc75359af3fb72

cheers


Re: [PATCH v3 1/2] vmcore: remove dependency with is_kdump_kernel() for exporting vmcore

2023-09-21 Thread Michael Ellerman
On Tue, 12 Sep 2023 13:59:49 +0530, Hari Bathini wrote:
> Currently, is_kdump_kernel() returns true when elfcorehdr_addr is set.
> While elfcorehdr_addr is set for kexec based kernel dump mechanism,
> alternate dump capturing methods like fadump [1] also set it to export
> the vmcore. Since, is_kdump_kernel() is used to restrict resources in
> crash dump capture kernel and such restrictions may not be desirable
> for fadump, allow is_kdump_kernel() to be defined differently for such
> scenarios. With this, is_kdump_kernel() could be false while vmcore is
> usable. So, remove unnecessary dependency with is_kdump_kernel(), for
> exporting vmcore.
> 
> [...]

Applied to powerpc/next.

[1/2] vmcore: remove dependency with is_kdump_kernel() for exporting vmcore
  https://git.kernel.org/powerpc/c/86328b338c3996b814417dd68e3f899a1a649059
[2/2] powerpc/fadump: make is_kdump_kernel() return false when fadump is active
  https://git.kernel.org/powerpc/c/b098f1c32365304633077d73e4ae21c72d4241b3

cheers


Re: [PATCH] powerpc/configs: Set more PPC debug configs

2023-09-21 Thread Michael Ellerman
On Wed, 30 Aug 2023 14:42:38 +1000, Benjamin Gray wrote:
> Add more config options that wouldn't be done by the generic debug
> config in kernel/configs/debug.config
> 
> CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
>   Adds an initialized check on each (cpu|mmu)_has_feature()
> 
> CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
>   Adds some extra checks around IRQ mask manipulation
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/configs: Set more PPC debug configs
  https://git.kernel.org/powerpc/c/ff25ad0aa280012eda5699713a9f0b468a1d6e4e

cheers


Re: [PATCH] powerpc: add `cur_cpu_spec` symbol to vmcoreinfo

2023-09-21 Thread Michael Ellerman
On Mon, 11 Sep 2023 14:44:09 +0530, Aditya Gupta wrote:
> Presently, while reading a vmcore, makedumpfile uses
> `cur_cpu_spec.mmu_features` to decide whether the crashed system had
> RADIX MMU or not.
> 
> Currently, makedumpfile fails to get the `cur_cpu_spec` symbol (unless
> a vmlinux is passed with the `-x` flag to makedumpfile), and hence
> assigns offsets and shifts (such as pgd_offset_l4) incorrecly considering
> MMU to be hash MMU.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: add `cur_cpu_spec` symbol to vmcoreinfo
  https://git.kernel.org/powerpc/c/7135b921b32966d7602ede396b7286d372aee63f

cheers


Re: [PATCH v5 00/11] KVM: PPC: Nested APIv2 guest support

2023-09-21 Thread Michael Ellerman
On Thu, 14 Sep 2023 13:05:49 +1000, Jordan Niethe wrote:
> A nested-HV API for PAPR has been developed based on the KVM-specific
> nested-HV API that is upstream in Linux/KVM and QEMU. The PAPR API had
> to break compatibility to accommodate implementation in other
> hypervisors and partitioning firmware. The existing KVM-specific API
> will be known as the Nested APIv1 and the PAPR API will be known as the
> Nested APIv2.
> 
> [...]

Applied to powerpc/topic/ppc-kvm.

[01/11] KVM: PPC: Always use the GPR accessors

https://git.kernel.org/powerpc/c/0e85b7df9cb0c65f840109159ef6754c783e07a0
[02/11] KVM: PPC: Introduce FPR/VR accessor functions

https://git.kernel.org/powerpc/c/52425a3b3c11cec58cf66e4c897fc1504f3911a9
[03/11] KVM: PPC: Rename accessor generator macros

https://git.kernel.org/powerpc/c/2a64bc673133346a7a3bd163f2e6048bd1788727
[04/11] KVM: PPC: Use accessors for VCPU registers

https://git.kernel.org/powerpc/c/7028ac8d174f28220f0e2de0cb3346cd3c31976d
[05/11] KVM: PPC: Use accessors for VCORE registers

https://git.kernel.org/powerpc/c/c8ae9b3c6e7f22a4b71e42edb0fc3942aa7a7c42
[06/11] KVM: PPC: Book3S HV: Use accessors for VCPU registers

https://git.kernel.org/powerpc/c/ebc88ea7a6ad0ea349df9c765357d3aa4e662aa9
[07/11] KVM: PPC: Book3S HV: Introduce low level MSR accessor

https://git.kernel.org/powerpc/c/6de2e837babb411cfb3cdb570581c3a65576ddaf
[08/11] KVM: PPC: Add helper library for Guest State Buffers

https://git.kernel.org/powerpc/c/6ccbbc33f06adaf79acde18571c6543ad1cb4be6
[09/11] KVM: PPC: Book3s HV: Hold LPIDs in an unsigned long

https://git.kernel.org/powerpc/c/dfcaacc8f970c6b4ea4e32d2186f2bea4a1d5255
[10/11] KVM: PPC: Add support for nestedv2 guests

https://git.kernel.org/powerpc/c/19d31c5f115754c369c0995df47479c384757f82
[11/11] docs: powerpc: Document nested KVM on POWER

https://git.kernel.org/powerpc/c/476652297f94a2e5e5ef29e734b0da37ade94110

cheers


Re: [PATCH 0/3] Fix preemption errors in watchpoints

2023-09-21 Thread Michael Ellerman
On Tue, 29 Aug 2023 16:34:54 +1000, Benjamin Gray wrote:
> When enabling debug config options relating to preemption, several bugs
> appear in the kernel log. With this series applied, the breakpoint code
> no longer prints bugs when running the powerpc/ptrace selftests.
> 
> Benjamin Gray (3):
>   powerpc/watchpoints: Disable preemption in thread_change_pc()
>   powerpc/watchpoint: Disable pagefaults when getting user instruction
>   powerpc/watchpoints: Annotate atomic context in more places
> 
> [...]

Applied to powerpc/fixes.

[1/3] powerpc/watchpoints: Disable preemption in thread_change_pc()
  https://git.kernel.org/powerpc/c/cc879ab3ce39bc39f9b1d238b283f43a5f6f957d
[2/3] powerpc/watchpoint: Disable pagefaults when getting user instruction
  https://git.kernel.org/powerpc/c/3241f260eb830d27d09cc604690ec24533fdb433
[3/3] powerpc/watchpoints: Annotate atomic context in more places
  https://git.kernel.org/powerpc/c/27646b2e02b096a6936b3e3b6ba334ae20763eab

cheers


Re: [PATCH] powerpc: Fix build issue with LD_DEAD_CODE_DATA_ELIMINATION and FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY

2023-09-21 Thread Michael Ellerman
On Wed, 13 Sep 2023 19:11:29 +0530, Naveen N Rao wrote:
> We recently added support for -fpatchable-function-entry and it is
> enabled by default on ppc32 (ppc64 needs gcc v13.1.0). When building the
> kernel for ppc32 and also enabling CONFIG_LD_DEAD_CODE_DATA_ELIMINATION,
> we see the below build error with older gcc versions:
>   powerpc-linux-gnu-ld: init/main.o(__patchable_function_entries): error: 
> need linked-to section for --gc-sections
> 
> This error is thrown since __patchable_function_entries section would be
> garbage collected with --gc-sections since it does not reference any
> other kept sections. This has subsequently been fixed with:
>   
> https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=b7d072167715829eed0622616f6ae0182900de3e
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc: Fix build issue with LD_DEAD_CODE_DATA_ELIMINATION and 
FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
  https://git.kernel.org/powerpc/c/60d77ed24bb3068c0837fe45b8921b0a6598829d

cheers


Re: [PATCH] powerpc/perf/hv-24x7: Update domain value check

2023-09-21 Thread Michael Ellerman
On Fri, 25 Aug 2023 11:26:01 +0530, Kajol Jain wrote:
> Valid domain value is in range 1 to HV_PERF_DOMAIN_MAX.
> Current code has check for domain value greater than or
> equal to HV_PERF_DOMAIN_MAX. But the check for domain value 0
> is missing.
> Fix this issue by adding check for domain value 0.
> 
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc/perf/hv-24x7: Update domain value check
  https://git.kernel.org/powerpc/c/4ff3ba4db5943cac1045e3e4a3c0463ea10f6930

cheers


Re: [PATCH] powerpc/82xx: Select FSL_SOC

2023-09-21 Thread Michael Ellerman
On Thu, 14 Sep 2023 17:23:45 +0200, Christophe Leroy wrote:
> It used to be impossible to select CONFIG_CPM2 without selecting
> CONFIG_FSL_SOC at the same time because CONFIG_CPM2 was dependent
> on CONFIG_8260 and CONFIG_8260 was selecting CONFIG_FSL_SOC.
> 
> But after commit eb5aa2137275 ("powerpc/82xx: Remove CONFIG_8260
> and CONFIG_8272") CONFIG_CPM2 depends on CONFIG_MPC82xx instead
> but CONFIG_MPC82xx doesn't directly selects CONFIG_FSL_SOC.
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc/82xx: Select FSL_SOC
  https://git.kernel.org/powerpc/c/6901a9f9ef156283a0d8c8d1cea634d089ef

cheers


Re: [PATCH] powerpc/dexcr: Move HASHCHK trap handler

2023-09-21 Thread Michael Ellerman
On Fri, 25 Aug 2023 11:49:10 +1000, Benjamin Gray wrote:
> To determine if a trap was caused by a HASHCHK instruction, we inspect
> the user instruction that triggered the trap. However this may sleep
> if the page needs to be faulted in.
> 
> Move the HASHCHK handler logic to after we allow IRQs, which is fine
> because we are only interested in HASHCHK if it's a user space trap.
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc/dexcr: Move HASHCHK trap handler
  https://git.kernel.org/powerpc/c/c3f4309693758b13fbb34b3741c2e2801ad28769

cheers


Re: [PATCH v2] powerpc/dexcr: Move HASHCHK trap handler

2023-09-21 Thread Michael Ellerman
On Fri, 15 Sep 2023 13:46:04 +1000, Benjamin Gray wrote:
> Syzkaller reported a sleep in atomic context bug relating to the HASHCHK
> handler logic
> 
>   BUG: sleeping function called from invalid context at 
> arch/powerpc/kernel/traps.c:1518
>   in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 25040, name: 
> syz-executor
>   preempt_count: 0, expected: 0
>   RCU nest depth: 0, expected: 0
>   no locks held by syz-executor/25040.
>   irq event stamp: 34
>   hardirqs last  enabled at (33): [] 
> prep_irq_for_enabled_exit arch/powerpc/kernel/interrupt.c:56 [inline]
>   hardirqs last  enabled at (33): [] 
> interrupt_exit_user_prepare_main+0x148/0x600 
> arch/powerpc/kernel/interrupt.c:230
>   hardirqs last disabled at (34): [] 
> interrupt_enter_prepare+0x144/0x4f0 arch/powerpc/include/asm/interrupt.h:176
>   softirqs last  enabled at (0): [] 
> copy_process+0x16e4/0x4750 kernel/fork.c:2436
>   softirqs last disabled at (0): [<>] 0x0
>   CPU: 15 PID: 25040 Comm: syz-executor Not tainted 
> 6.5.0-rc5-1-g3ccdff6bb06d #3
>   Hardware name: IBM,9105-22A POWER10 (raw) 0x800200 0xf06 
> of:IBM,FW1040.00 (NL1040_021) hv:phyp pSeries
>   Call Trace:
>   [c000a8247ce0] [c032b0e4] __might_resched+0x3b4/0x400 
> kernel/sched/core.c:10189
>   [c000a8247d80] [c08c7dc8] __might_fault+0xa8/0x170 
> mm/memory.c:5853
>   [c000a8247dc0] [c004160c] do_program_check+0x32c/0xb20 
> arch/powerpc/kernel/traps.c:1518
>   [c000a8247e50] [c0009b2c] program_check_common_virt+0x3bc/0x3c0
> 
> [...]

Applied to powerpc/fixes.

[1/1] powerpc/dexcr: Move HASHCHK trap handler
  https://git.kernel.org/powerpc/c/c3f4309693758b13fbb34b3741c2e2801ad28769

cheers


[PATCH] ASoC: fsl_sai: sw reset consumer on pause/stop

2023-09-21 Thread Emil Abildgaard Svendsen
When in consumer mode with BCLK disabled (FSL_SAI_CSR_BCE = 0) the
FIFO's can still contain data when resumed. It might also be possible
with BCLK enabled but just less likely.

When the FIFO's still contain data on resume it can cause channel
shifting on e.g. XRUNS. A Software Reset will reset the FIFO's and make
sure channels are aligned.

Fixes: 269f399dc19f ("ASoC: fsl_sai: Disable bit clock with transmitter")
Signed-off-by: Emil Svendsen 
---
 sound/soc/fsl/fsl_sai.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index 1e4020fae05a..1da5c17cd329 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -730,13 +730,18 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, 
int dir)
 * anymore. Add software reset to fix this issue.
 * This is a hardware bug, and will be fix in the
 * next sai version.
+*
+* When in consumer mode with BCLK disabled
+* (FSL_SAI_CSR_BCE = 0) the FIFO's can still contain
+* data when resumed. This can cause channel shifting
+* on e.g. XRUNS. A Software Reset will reset the
+* FIFO's and make sure the channels aren't shifted.
 */
-   if (!sai->is_consumer_mode) {
-   /* Software Reset */
-   regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 
FSL_SAI_CSR_SR);
-   /* Clear SR bit to finish the reset */
-   regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0);
-   }
+
+   /* Software Reset */
+   regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR);
+   /* Clear SR bit to finish the reset */
+   regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0);
 }
 
 static int fsl_sai_trigger(struct snd_pcm_substream *substream, int cmd,
-- 
2.34.1


Re: [PATCH v5 25/31] dt-bindings: net: Add the Lantiq PEF2256 E1/T1/J1 framer

2023-09-21 Thread Christophe Leroy
Hi Hervé,

Le 18/09/2023 à 09:49, Herve Codina a écrit :
> Hi Christophe,
> 
> On Tue, 12 Sep 2023 18:49:26 +
> Christophe Leroy  wrote:
> 
>> Le 12/09/2023 à 20:13, Conor Dooley a écrit :
>>> Yo,
>>>
>>> I'm not au fait enough with this to leave particularly meaningful
>>> comments, so just some minor ones for you.
>>>
>>> On Tue, Sep 12, 2023 at 12:14:44PM +0200, Herve Codina wrote:
 The Lantiq PEF2256 is a framer and line interface component designed to
 fulfill all required interfacing between an analog E1/T1/J1 line and the
 digital PCM system highway/H.100 bus.

 Signed-off-by: Herve Codina 
 Signed-off-by: Christophe Leroy 
>>>
>>> Missing a co-developed-by?
>>
>> No, I guess it's a left-over of version v4 that I sent-out while Hervé
>> was AFK.
>>
>> If a v6 is sent I think this line can be removed.
> 
> May I move to reviewed-by ?

Yes you can do that.

Best regards
Christophe


Re: [PATCH v2 1/2] powerpc: add `cur_cpu_spec` symbol to vmcoreinfo

2023-09-21 Thread Aneesh Kumar K.V
Aditya Gupta  writes:

> On Wed, Sep 20, 2023 at 05:45:36PM +0530, Aneesh Kumar K.V wrote:
>> Aditya Gupta  writes:
>> 
>> > Since below commit, address mapping for vmemmap has changed for Radix
>> > MMU, where address mapping is stored in kernel page table itself,
>> > instead of earlier used 'vmemmap_list'.
>> >
>> > commit 368a0590d954 ("powerpc/book3s64/vmemmap: switch radix to use
>> > a different vmemmap handling function")
>> >
>> > Hence with upstream kernel, in case of Radix MMU, makedumpfile fails to do
>> > address translation for vmemmap addresses, as it depended on vmemmap_list,
>> > which can now be empty.
>> >
>> > While fixing the address translation in makedumpfile, it was identified
>> > that currently makedumpfile cannot distinguish between Hash MMU and
>> > Radix MMU, unless VMLINUX is passed with -x flag to makedumpfile.
>> > And hence fails to assign offsets and shifts correctly (such as in L4 to
>> > PGDIR offset calculation in makedumpfile).
>> >
>> > For getting the MMU, makedumpfile uses `cur_cpu_spec.mmu_features`.
>> >
>> > Add `cur_cpu_spec` symbol and offset of `mmu_features` in the
>> > `cpu_spec` struct, to VMCOREINFO, so that makedumpfile can assign the
>> > offsets correctly, without needing a VMLINUX.
>> >
>> > Fixes: 368a0590d954 ("powerpc/book3s64/vmemmap: switch radix to use a 
>> > different vmemmap handling function")
>> > Reported-by: Sachin Sant 
>> > Tested-by: Sachin Sant 
>> > Signed-off-by: Aditya Gupta 
>> >
>> > ---
>> > Corresponding makedumpfile patches to fix address translation, in Radix
>> > MMU case:
>> >
>> > Link: 
>> > https://lore.kernel.org/kexec/b5f0f00e-f2b1-47d7-a143-5683d10dc...@linux.ibm.com/T/#t
>> > ---
>> > ---
>> >  arch/powerpc/kexec/core.c | 2 ++
>> >  1 file changed, 2 insertions(+)
>> >
>> > diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
>> > index de64c7962991..369b8334a4f0 100644
>> > --- a/arch/powerpc/kexec/core.c
>> > +++ b/arch/powerpc/kexec/core.c
>> > @@ -63,6 +63,8 @@ void arch_crash_save_vmcoreinfo(void)
>> >  #ifndef CONFIG_NUMA
>> >VMCOREINFO_SYMBOL(contig_page_data);
>> >  #endif
>> > +  VMCOREINFO_SYMBOL(cur_cpu_spec);
>> > +  VMCOREINFO_OFFSET(cpu_spec, mmu_features);
>> >  #if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
>> >VMCOREINFO_SYMBOL(vmemmap_list);
>> >VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
>> >
>> 
>> That implies we now have to be careful when updating MMU_FTR_* #defines.
>> It is not bad considering other hacks we do in crash to identify kernel
>> changes tied to version number. But i am wondering if there another way
>> to identify radix vs hash?
>> 
>
> I could not find another way to get any other flag for RADIX vs HASH in
> makedumpfile. And currently I don't know of any other way.
>
> Both makedumpfile and crash look for '0x40' flag set in
> 'cur_cpu_spec.mmu_features', so only requirement for 'MMU_FTR_TYPE_RADIX' is 
> to
> be '0x40', or we will need to change the value accordingly in the tools.
>

Instead of exporting cur_cpu_spec.mmu_feature, you could do
coreinfo_mmu_features that does

if (radix_enabled())
   coreinfo_mmu_feature = VMCORE_INFO_RADIX_TRANSLATION;

-aneesh


[PATCH] selftests/powerpc: Fix emit_tests to work with run_kselftest.sh

2023-09-21 Thread Michael Ellerman
In order to use run_kselftest.sh the list of tests must be emitted to
populate kselftest-list.txt.

The powerpc Makefile is written to use EMIT_TESTS. But support for
EMIT_TESTS was dropped in commit d4e59a536f50 ("selftests: Use runner.sh
for emit targets"). Although prior to that commit a548de0fe8e1
("selftests: lib.mk: add test execute bit check to EMIT_TESTS") had
already broken run_kselftest.sh for powerpc due to the executable check
using the wrong path.

It can be fixed by replacing the EMIT_TESTS definitions with actual
emit_tests rules in the powerpc Makefiles. This makes run_kselftest.sh
able to run powerpc tests:

  $ cd linux
  $ export ARCH=powerpc
  $ export CROSS_COMPILE=powerpc64le-linux-gnu-
  $ make headers
  $ make -j -C tools/testing/selftests install
  $ grep -c "^powerpc" 
tools/testing/selftests/kselftest_install/kselftest-list.txt
  182

Fixes: d4e59a536f50 ("selftests: Use runner.sh for emit targets")
Signed-off-by: Michael Ellerman 
---
 tools/testing/selftests/powerpc/Makefile |  7 +++
 tools/testing/selftests/powerpc/pmu/Makefile | 11 ++-
 2 files changed, 9 insertions(+), 9 deletions(-)

I'll plan to merge this via the powerpc tree.

cheers

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 49f2ad1793fd..7ea42fa02eab 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -59,12 +59,11 @@ override define INSTALL_RULE
done;
 endef
 
-override define EMIT_TESTS
+emit_tests:
+@for TARGET in $(SUB_DIRS); do \
BUILD_TARGET=$(OUTPUT)/$$TARGET;\
-   $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\
+   $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET $@;\
done;
-endef
 
 override define CLEAN
+@for TARGET in $(SUB_DIRS); do \
@@ -77,4 +76,4 @@ endef
 tags:
find . -name '*.c' -o -name '*.h' | xargs ctags
 
-.PHONY: tags $(SUB_DIRS)
+.PHONY: tags $(SUB_DIRS) emit_tests
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile 
b/tools/testing/selftests/powerpc/pmu/Makefile
index 2b95e44d20ff..a284fa874a9f 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -30,13 +30,14 @@ override define RUN_TESTS
+TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) 
OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
 endef
 
-DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
-override define EMIT_TESTS
-   $(DEFAULT_EMIT_TESTS)
+emit_tests:
+   for TEST in $(TEST_GEN_PROGS); do \
+   BASENAME_TEST=`basename $$TEST`;\
+   echo "$(COLLECTION):$$BASENAME_TEST";   \
+   done
+TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) 
OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
+TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) 
OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
+TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) 
OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
-endef
 
 DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
 override define INSTALL_RULE
@@ -64,4 +65,4 @@ sampling_tests:
 event_code_tests:
TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; 
$(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
 
-.PHONY: all run_tests ebb sampling_tests event_code_tests
+.PHONY: all run_tests ebb sampling_tests event_code_tests emit_tests
-- 
2.41.0



Re: [RFC PATCH v4 09/11] media: uapi: Add V4L2_CID_USER_IMX_ASRC_RATIO_MOD control

2023-09-21 Thread Hans Verkuil
On 21/09/2023 08:55, Shengjiu Wang wrote:
> On Wed, Sep 20, 2023 at 6:19 PM Hans Verkuil  wrote:
>>
>> On 20/09/2023 11:32, Shengjiu Wang wrote:
>>> The input clock and output clock may not be the accurate
>>> rate as the sample rate, there is some drift, so the convert
>>> ratio of i.MX ASRC module need to be changed according to
>>> actual clock rate.
>>>
>>> Add V4L2_CID_USER_IMX_ASRC_RATIO_MOD control for user to
>>> adjust the ratio.
>>>
>>> Signed-off-by: Shengjiu Wang 
>>> ---
>>>  Documentation/userspace-api/media/v4l/control.rst | 5 +
>>>  drivers/media/v4l2-core/v4l2-ctrls-defs.c | 1 +
>>>  include/uapi/linux/v4l2-controls.h| 1 +
>>>  3 files changed, 7 insertions(+)
>>>
>>> diff --git a/Documentation/userspace-api/media/v4l/control.rst 
>>> b/Documentation/userspace-api/media/v4l/control.rst
>>> index 4463fce694b0..2bc175900a34 100644
>>> --- a/Documentation/userspace-api/media/v4l/control.rst
>>> +++ b/Documentation/userspace-api/media/v4l/control.rst
>>> @@ -318,6 +318,11 @@ Control IDs
>>>  depending on particular custom controls should check the driver name
>>>  and version, see :ref:`querycap`.
>>>
>>> +.. _v4l2-audio-imx:
>>> +
>>> +``V4L2_CID_USER_IMX_ASRC_RATIO_MOD``
>>> +sets the rasampler ratio modifier of i.MX asrc module.
>>
>> rasampler -> resampler (I think?)
>>
>> This doesn't document at all what the type of the control is or how to 
>> interpret it.
>>
>>> +
>>>  Applications can enumerate the available controls with the
>>>  :ref:`VIDIOC_QUERYCTRL` and
>>>  :ref:`VIDIOC_QUERYMENU ` ioctls, get and set a
>>> diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c 
>>> b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>>> index 8696eb1cdd61..16f66f66198c 100644
>>> --- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>>> +++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>>> @@ -1242,6 +1242,7 @@ const char *v4l2_ctrl_get_name(u32 id)
>>>   case V4L2_CID_COLORIMETRY_CLASS:return "Colorimetry Controls";
>>>   case V4L2_CID_COLORIMETRY_HDR10_CLL_INFO:   return "HDR10 
>>> Content Light Info";
>>>   case V4L2_CID_COLORIMETRY_HDR10_MASTERING_DISPLAY:  return "HDR10 
>>> Mastering Display";
>>> + case V4L2_CID_USER_IMX_ASRC_RATIO_MOD:  return "ASRC 
>>> RATIO MOD";
>>
>> Let's stay consistent with the other control names:
>>
>> "ASRC Ratio Modifier"
>>
>> But if this is a driver specific control, then this doesn't belong here.
>>
>> Driver specific controls are defined in the driver itself, including this
>> description.
>>
>> Same for the control documentation: if it is driver specific, then that
>> typically is documented either in a driver-specific public header, or
>> possibly in driver-specific documentation (Documentation/admin-guide/media/).
>>
>> But is this imx specific? Wouldn't other similar devices need this?
> 
> It is imx specific.

Why? I'm not opposed to this, but I wonder if you looked at datasheets of
similar devices from other vendors: would they use something similar?

And the very short description you gave in the commit log refers to input
and output clock: how would userspace know those clock frequencies? In
other words, what information does userspace need in order to set this
control correctly? And is that information actually available? How would
you use this control?

I don't really understand how this is supposed to be used.

> 
> Does this mean that I need to create a header file in include/uapi/linux
> folder to put this definition?  I just hesitate if this is necessary.

Yes, put it there. There are some examples of this already:

include/uapi/linux/aspeed-video.h
include/uapi/linux/max2175.h

> 
> There is folder Documentation/userspace-api/media/drivers/ for drivers
> Should this document in this folder, not in the
> Documentation/admin-guide/media/?

Yes, you are correct. For the headers above, the corresponding documentation
is in:

Documentation/userspace-api/media/drivers/aspeed-video.rst
Documentation/userspace-api/media/drivers/max2175.rst

So you have some examples as reference.

Frankly, what is in admin-guide and in userspace-api is a bit random, it
probably could use a cleanup.

Regards,

Hans

> 
> Best regards
> Wang shengjiu
>>
>>>   default:
>>>   return NULL;
>>>   }
>>> diff --git a/include/uapi/linux/v4l2-controls.h 
>>> b/include/uapi/linux/v4l2-controls.h
>>> index c3604a0a3e30..b1c319906d12 100644
>>> --- a/include/uapi/linux/v4l2-controls.h
>>> +++ b/include/uapi/linux/v4l2-controls.h
>>> @@ -162,6 +162,7 @@ enum v4l2_colorfx {
>>>  /* The base for the imx driver controls.
>>>   * We reserve 16 controls for this driver. */
>>>  #define V4L2_CID_USER_IMX_BASE   (V4L2_CID_USER_BASE + 
>>> 0x10b0)
>>> +#define V4L2_CID_USER_IMX_ASRC_RATIO_MOD (V4L2_CID_USER_IMX_BASE + 0)
>>>
>>>  /*
>>>   * The base for the atmel isc driver controls.
>>
>> Regards,
>>
>> Hans