Re: [PATCH] powerpc/64s: Add support for ASB_Notify on POWER9
On Fri, 2017-08-04 at 16:56 +0200, Christophe Lombard wrote: > The POWER9 core supports a new feature: ASB_Notify which requires the > support of the Special Purpose Register: TIDR. > > The ASB_Notify command, generated by the AFU, will attempt to > wake-up the host thread identified by the particular LPID:PID:TID. > > The special register TIDR has to be updated to with the same value > present in the process element. > > If the length of the register TIDR is 64bits, the CAPI Translation > Service Layer core (XSL9) for Power9 systems limits the size (16bits) of > the Thread ID when it generates the ASB_Notify message adding > PID:LPID:TID information from the context. > > The content of the internal kernel Thread ID (32bits) can not therefore > be used to fulfill the register TIDR. > > This patch allows to avoid this limitation by adding a new interface > for the user. The instructions mfspr/mtspr SPRN_TIDR are emulated, > save/restore SPRs (context switch) are updated and a new feature > (CPU_FTR_TIDR) is added to POWER9 system. Those CPU_FTR_* are internal to the kernel. You probably also need a feature in AT_HWCAP2 to indicate to userspace that this is supported. Also you put the onus of allocating the TIDs onto userspace which is a bit tricky. What happens if there are duplicate TIDs for example ? (ie, userspace doesn't allocate it or uses a library that spawns a thread) Ben. > > Signed-off-by: Christophe Lombard> --- > arch/powerpc/include/asm/cputable.h | 4 +++- > arch/powerpc/include/asm/emulated_ops.h | 2 ++ > arch/powerpc/include/asm/ppc-opcode.h | 4 > arch/powerpc/include/asm/processor.h| 1 + > arch/powerpc/kernel/process.c | 8 > arch/powerpc/kernel/traps.c | 21 + > 6 files changed, 39 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/include/asm/cputable.h > b/arch/powerpc/include/asm/cputable.h > index d02ad93..706f668 100644 > --- a/arch/powerpc/include/asm/cputable.h > +++ b/arch/powerpc/include/asm/cputable.h > @@ -215,6 +215,7 @@ enum { > #define CPU_FTR_DABRX > LONG_ASM_CONST(0x0800) > #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000) > #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000) > +#define CPU_FTR_TIDR LONG_ASM_CONST(0x8000) > > #ifndef __ASSEMBLY__ > > @@ -474,7 +475,8 @@ enum { > CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ > CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ > CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ > - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300) > + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ > + CPU_FTR_TIDR) > #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ >(~CPU_FTR_SAO)) > #define CPU_FTRS_CELL(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ > diff --git a/arch/powerpc/include/asm/emulated_ops.h > b/arch/powerpc/include/asm/emulated_ops.h > index f00e10e..e83ad42 100644 > --- a/arch/powerpc/include/asm/emulated_ops.h > +++ b/arch/powerpc/include/asm/emulated_ops.h > @@ -54,6 +54,8 @@ extern struct ppc_emulated { > #ifdef CONFIG_PPC64 > struct ppc_emulated_entry mfdscr; > struct ppc_emulated_entry mtdscr; > + struct ppc_emulated_entry mftidr; > + struct ppc_emulated_entry mttidr; > struct ppc_emulated_entry lq_stq; > #endif > } ppc_emulated; > diff --git a/arch/powerpc/include/asm/ppc-opcode.h > b/arch/powerpc/include/asm/ppc-opcode.h > index fa9ebae..3ebc446 100644 > --- a/arch/powerpc/include/asm/ppc-opcode.h > +++ b/arch/powerpc/include/asm/ppc-opcode.h > @@ -241,6 +241,10 @@ > #define PPC_INST_MFSPR_DSCR_USER_MASK0xfc1e > #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 > #define PPC_INST_MTSPR_DSCR_USER_MASK0xfc1e > +#define PPC_INST_MFSPR_TIDR 0x7d2452a6 > +#define PPC_INST_MFSPR_TIDR_MASK 0xfd2e > +#define PPC_INST_MTSPR_TIDR 0x7d2453a6 > +#define PPC_INST_MTSPR_TIDR_MASK 0xfd2e > #define PPC_INST_MFVSRD 0x7c66 > #define PPC_INST_MTVSRD 0x7c000166 > #define PPC_INST_SLBFEE 0x7c0007a7 > diff --git a/arch/powerpc/include/asm/processor.h > b/arch/powerpc/include/asm/processor.h > index fab7ff8..58cc212 100644 > --- a/arch/powerpc/include/asm/processor.h > +++ b/arch/powerpc/include/asm/processor.h > @@ -329,6 +329,7 @@ struct thread_struct { >*/ > int dscr_inherit; > unsigned long ppr;/* used to save/restore SMT priority */ > + unsigned long tidr; > #endif > #ifdef CONFIG_PPC_BOOK3S_64 > unsigned long tar; > diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c > index 9f3e2c9..f06ea10 100644 > ---
Re: [PATCH] mtd: nand: Rename nand.h into rawnand.h
Hi Boris, On 04.08.2017 18:29, Boris Brezillon wrote: > We are planning to share more code between different NAND based > devices (SPI NAND, OneNAND and raw NANDs), but before doing that > we need to move the existing include/linux/mtd/nand.h file into > include/linux/mtd/rawnand.h so we can later create a nand.h header > containing all common structure and function prototypes. > > Signed-off-by: Boris Brezillon> Signed-off-by: Peter Pan > Cc: Jonathan Corbet > Cc: Sekhar Nori > Cc: Kevin Hilman > Cc: Jason Cooper > Cc: Andrew Lunn > Cc: Sebastian Hesselbarth > Cc: Gregory Clement > Cc: Hartley Sweeten > Cc: Alexander Sverdlin > Cc: Shawn Guo > Cc: Sascha Hauer > Cc: Fabio Estevam > Cc: Imre Kaloz > Cc: Krzysztof Halasa > Cc: Eric Miao > Cc: Haojian Zhuang > Cc: Aaro Koskinen > Cc: Tony Lindgren > Cc: Alexander Clouter > Cc: Daniel Mack > Cc: Robert Jarzmik > Cc: Marek Vasut > Cc: Kukjin Kim > Cc: Krzysztof Kozlowski > Cc: Simtec Linux Team > Cc: Steven Miao > Cc: Mikael Starvik > Cc: Jesper Nilsson > Cc: Ralf Baechle > Cc: Yoshinori Sato > Cc: Rich Felker > Cc: Wenyou Yang > Cc: Josh Wu > Cc: Kamal Dasu > Cc: Masahiro Yamada > Cc: Han Xu > Cc: Harvey Hunt > Cc: Vladimir Zapolskiy > Cc: Sylvain Lemieux > Cc: Matthias Brugger > Cc: Wan ZongShun > Cc: Neil Armstrong > Cc: Ezequiel Garcia > Cc: Maxim Levitsky > Cc: Marc Gonzalez > Cc: Stefan Agner > Cc: Greg Kroah-Hartman > Cc: Mauro Carvalho Chehab > Cc: linux-...@vger.kernel.org > Cc: linux-arm-ker...@lists.infradead.org > Cc: linux-o...@vger.kernel.org > Cc: linux-samsung-...@vger.kernel.org > Cc: adi-buildroot-de...@lists.sourceforge.net > Cc: linux-cris-ker...@axis.com > Cc: linux-m...@linux-mips.org > Cc: linux...@vger.kernel.org > Cc: bcm-kernel-feedback-l...@broadcom.com > Cc: linux-media...@lists.infradead.org > Cc: linux-ox...@lists.tuxfamily.org > Cc: linuxppc-dev@lists.ozlabs.org > Cc: de...@driverdev.osuosl.org > --- > Hi All, > > Sorry for the huge Cc list, but I'd like to collect as much acks as > possible for this patch which is actually part of a bigger series [1]. > > Note that there's nothing complicated here, it's just a mechanical > s/nand\.h/rawnand\.h/ replacement, but it impacts several architectures, > the doc and staging directories. > > Regards, > > Boris > > [1]https://lwn.net/Articles/723694/ > --- [snip] > drivers/mtd/nand/lpc32xx_mlc.c | 2 +- > drivers/mtd/nand/lpc32xx_slc.c | 2 +- For LPC32xx drivers Acked-by: Vladimir Zapolskiy -- With best wishes, Vladimir
Re: [PATCH v3] powerpc/mm: Implemented default_hugepagesz verification for powerpc
On 07/24/2017 04:52 PM, Victor Aoqui wrote: > Implemented default hugepage size verification (default_hugepagesz=) > in order to allow allocation of defined number of pages (hugepages=) > only for supported hugepage sizes. > > Signed-off-by: Victor Aoqui> --- > v2: > > - Renamed default_hugepage_setup_sz function to hugetlb_default_size_setup; > - Added powerpc string to error message. > > v3: > > - Renamed hugetlb_default_size_setup() to hugepage_default_setup_sz(); > - Implemented hugetlb_bad_default_size(); > - Reimplemented hugepage_setup_sz() to just parse default_hugepagesz= and > check if it's a supported size; > - Added verification of default_hugepagesz= value on hugetlb_nrpages_setup() > before allocating hugepages. > > arch/powerpc/mm/hugetlbpage.c | 15 +++ > include/linux/hugetlb.h | 1 + > mm/hugetlb.c | 17 +++-- > 3 files changed, 31 insertions(+), 2 deletions(-) > > diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c > index e1bf5ca..5990381 100644 > --- a/arch/powerpc/mm/hugetlbpage.c > +++ b/arch/powerpc/mm/hugetlbpage.c > @@ -780,6 +780,21 @@ static int __init hugepage_setup_sz(char *str) > } > __setup("hugepagesz=", hugepage_setup_sz); > > +static int __init hugepage_default_setup_sz(char *str) > +{ > + unsigned long long size; > + > + size = memparse(str, ); > + > + if (add_huge_page_size(size) != 0) { > + hugetlb_bad_default_size(); > + pr_err("Invalid ppc default huge page size specified(%llu)\n", > size); > + } > + > + return 1; > +} > +__setup("default_hugepagesz=", hugepage_default_setup_sz); > + > struct kmem_cache *hugepte_cache; > static int __init hugetlbpage_init(void) > { > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h > index 0ed8e41..2927200 100644 > --- a/include/linux/hugetlb.h > +++ b/include/linux/hugetlb.h > @@ -361,6 +361,7 @@ int huge_add_to_page_cache(struct page *page, struct > address_space *mapping, > int __init alloc_bootmem_huge_page(struct hstate *h); > > void __init hugetlb_bad_size(void); > +void __init hugetlb_bad_default_size(void); > void __init hugetlb_add_hstate(unsigned order); > struct hstate *size_to_hstate(unsigned long size); > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index bc48ee7..3c24266 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -54,6 +54,7 @@ > static unsigned long __initdata default_hstate_max_huge_pages; > static unsigned long __initdata default_hstate_size; > static bool __initdata parsed_valid_hugepagesz = true; > +static bool __initdata parsed_valid_default_hugepagesz = true; > > /* > * Protects updates to hugepage_freelists, hugepage_activelist, > nr_huge_pages, > @@ -2804,6 +2805,12 @@ void __init hugetlb_bad_size(void) > parsed_valid_hugepagesz = false; > } > > +/* Should be called on processing a default_hugepagesz=... option */ > +void __init hugetlb_bad_default_size(void) > +{ > + parsed_valid_default_hugepagesz = false; > +} > + > void __init hugetlb_add_hstate(unsigned int order) > { > struct hstate *h; > @@ -2846,8 +2853,14 @@ static int __init hugetlb_nrpages_setup(char *s) >* !hugetlb_max_hstate means we haven't parsed a hugepagesz= parameter > yet, >* so this hugepages= parameter goes to the "default hstate". >*/ > - else if (!hugetlb_max_hstate) > - mhp = _hstate_max_huge_pages; > + else if (!hugetlb_max_hstate) { > + if (!parsed_valid_default_hugepagesz) { > + pr_warn("hugepages = %s cannot be allocated for " > + "unsupported default_hugepagesz, ignoring\n", > s); > + parsed_valid_default_hugepagesz = true; > + } else > + mhp = _hstate_max_huge_pages; > + } > else > mhp = _hstate->max_huge_pages; > > My compiler tells me, mm/hugetlb.c: In function ‘hugetlb_nrpages_setup’: mm/hugetlb.c:2873:8: warning: ‘mhp’ may be used uninitialized in this function [-Wmaybe-uninitialized] You have added a way of getting out of that big if/else if statement without setting mhp. mhp will be examined later in the code, so this is indeed a bug. Like Aneesh, I am not sure if there is great benefit in this patch. You added this change in functionality only for powerpc. IMO, it would be best if behavior was consistent in all architectures. So, if we change it for powerpc we may want to change everywhere. -- Mike Kravetz
Re: [PATCH] mtd: nand: Rename nand.h into rawnand.h
Hello Boris, you could easily split this patch per architecture/subsystem if you in a first patch move the content of nand.h to rawnand.h and make nand.h just #include rawnand.h. Then you can switch one user at a time and when all are converted to use rawnand.h you can drop the #include. Best regards Uwe -- Pengutronix e.K. | Uwe Kleine-König| Industrial Linux Solutions | http://www.pengutronix.de/ |
Re: [PATCH] mtd: nand: Rename nand.h into rawnand.h
On 08/04/2017 05:29 PM, Boris Brezillon wrote: > We are planning to share more code between different NAND based > devices (SPI NAND, OneNAND and raw NANDs), but before doing that > we need to move the existing include/linux/mtd/nand.h file into > include/linux/mtd/rawnand.h so we can later create a nand.h header > containing all common structure and function prototypes. > > Signed-off-by: Boris Brezillon> Signed-off-by: Peter Pan > Cc: Jonathan Corbet > Cc: Sekhar Nori > Cc: Kevin Hilman > Cc: Jason Cooper > Cc: Andrew Lunn > Cc: Sebastian Hesselbarth > Cc: Gregory Clement > Cc: Hartley Sweeten > Cc: Alexander Sverdlin > Cc: Shawn Guo > Cc: Sascha Hauer > Cc: Fabio Estevam > Cc: Imre Kaloz > Cc: Krzysztof Halasa > Cc: Eric Miao > Cc: Haojian Zhuang > Cc: Aaro Koskinen > Cc: Tony Lindgren > Cc: Alexander Clouter > Cc: Daniel Mack > Cc: Robert Jarzmik > Cc: Marek Vasut > Cc: Kukjin Kim > Cc: Krzysztof Kozlowski > Cc: Simtec Linux Team > Cc: Steven Miao > Cc: Mikael Starvik > Cc: Jesper Nilsson > Cc: Ralf Baechle > Cc: Yoshinori Sato > Cc: Rich Felker > Cc: Wenyou Yang > Cc: Josh Wu > Cc: Kamal Dasu > Cc: Masahiro Yamada > Cc: Han Xu > Cc: Harvey Hunt > Cc: Vladimir Zapolskiy > Cc: Sylvain Lemieux > Cc: Matthias Brugger > Cc: Wan ZongShun > Cc: Neil Armstrong > Cc: Ezequiel Garcia > Cc: Maxim Levitsky > Cc: Marc Gonzalez > Cc: Stefan Agner > Cc: Greg Kroah-Hartman > Cc: Mauro Carvalho Chehab > Cc: linux-...@vger.kernel.org > Cc: linux-arm-ker...@lists.infradead.org > Cc: linux-o...@vger.kernel.org > Cc: linux-samsung-...@vger.kernel.org > Cc: adi-buildroot-de...@lists.sourceforge.net > Cc: linux-cris-ker...@axis.com > Cc: linux-m...@linux-mips.org > Cc: linux...@vger.kernel.org > Cc: bcm-kernel-feedback-l...@broadcom.com > Cc: linux-media...@lists.infradead.org > Cc: linux-ox...@lists.tuxfamily.org > Cc: linuxppc-dev@lists.ozlabs.org > Cc: de...@driverdev.osuosl.org > --- > Hi All, > > Sorry for the huge Cc list, but I'd like to collect as much acks as > possible for this patch which is actually part of a bigger series [1]. > > Note that there's nothing complicated here, it's just a mechanical > s/nand\.h/rawnand\.h/ replacement, but it impacts several architectures, > the doc and staging directories. > > Regards, > > Boris > > [1]https://lwn.net/Articles/723694/ > --- > Documentation/driver-api/mtdnand.rst| 8 > MAINTAINERS | 2 +- > arch/arm/mach-davinci/board-da850-evm.c | 2 +- > arch/arm/mach-davinci/board-dm355-evm.c | 2 +- > arch/arm/mach-davinci/board-dm355-leopard.c | 2 +- > arch/arm/mach-davinci/board-dm365-evm.c | 2 +- > arch/arm/mach-davinci/board-dm644x-evm.c| 2 +- > arch/arm/mach-davinci/board-dm646x-evm.c| 2 +- > arch/arm/mach-davinci/board-sffsdr.c| 2 +- > arch/arm/mach-dove/dove-db-setup.c | 2 +- > arch/arm/mach-ep93xx/snappercl15.c | 2 +- > arch/arm/mach-ep93xx/ts72xx.c | 2 +- > arch/arm/mach-imx/mach-qong.c | 2 +- > arch/arm/mach-ixp4xx/ixdp425-setup.c| 2 +- > arch/arm/mach-mmp/aspenite.c| 2 +- > arch/arm/mach-omap1/board-fsample.c | 2 +- > arch/arm/mach-omap1/board-h2.c | 2 +- > arch/arm/mach-omap1/board-h3.c | 2 +- > arch/arm/mach-omap1/board-nand.c| 2 +- > arch/arm/mach-omap1/board-perseus2.c| 2 +- > arch/arm/mach-orion5x/db88f5281-setup.c | 2 +- > arch/arm/mach-orion5x/kurobox_pro-setup.c | 2 +- > arch/arm/mach-orion5x/ts209-setup.c | 2 +- > arch/arm/mach-orion5x/ts78xx-setup.c| 2 +- > arch/arm/mach-pxa/balloon3.c| 2 +- >
Re: [PATCH] mtd: nand: Rename nand.h into rawnand.h
Hi Boris, On ven., août 04 2017, Boris Brezillonwrote: > We are planning to share more code between different NAND based > devices (SPI NAND, OneNAND and raw NANDs), but before doing that > we need to move the existing include/linux/mtd/nand.h file into > include/linux/mtd/rawnand.h so we can later create a nand.h header > containing all common structure and function prototypes. > > Signed-off-by: Boris Brezillon > Signed-off-by: Peter Pan > Cc: Jonathan Corbet > Cc: Sekhar Nori > Cc: Kevin Hilman > Cc: Jason Cooper > Cc: Andrew Lunn > Cc: Sebastian Hesselbarth > Cc: Gregory Clement > Cc: Hartley Sweeten > Cc: Alexander Sverdlin > Cc: Shawn Guo > Cc: Sascha Hauer > Cc: Fabio Estevam > Cc: Imre Kaloz > Cc: Krzysztof Halasa > Cc: Eric Miao > Cc: Haojian Zhuang > Cc: Aaro Koskinen > Cc: Tony Lindgren > Cc: Alexander Clouter > Cc: Daniel Mack > Cc: Robert Jarzmik > Cc: Marek Vasut > Cc: Kukjin Kim > Cc: Krzysztof Kozlowski > Cc: Simtec Linux Team > Cc: Steven Miao > Cc: Mikael Starvik > Cc: Jesper Nilsson > Cc: Ralf Baechle > Cc: Yoshinori Sato > Cc: Rich Felker > Cc: Wenyou Yang > Cc: Josh Wu > Cc: Kamal Dasu > Cc: Masahiro Yamada > Cc: Han Xu > Cc: Harvey Hunt > Cc: Vladimir Zapolskiy > Cc: Sylvain Lemieux > Cc: Matthias Brugger > Cc: Wan ZongShun > Cc: Neil Armstrong > Cc: Ezequiel Garcia > Cc: Maxim Levitsky > Cc: Marc Gonzalez > Cc: Stefan Agner > Cc: Greg Kroah-Hartman > Cc: Mauro Carvalho Chehab > Cc: linux-...@vger.kernel.org > Cc: linux-arm-ker...@lists.infradead.org > Cc: linux-o...@vger.kernel.org > Cc: linux-samsung-...@vger.kernel.org > Cc: adi-buildroot-de...@lists.sourceforge.net > Cc: linux-cris-ker...@axis.com > Cc: linux-m...@linux-mips.org > Cc: linux...@vger.kernel.org > Cc: bcm-kernel-feedback-l...@broadcom.com > Cc: linux-media...@lists.infradead.org > Cc: linux-ox...@lists.tuxfamily.org > Cc: linuxppc-dev@lists.ozlabs.org > Cc: de...@driverdev.osuosl.org > --- > Hi All, > > Sorry for the huge Cc list, but I'd like to collect as much acks as > possible for this patch which is actually part of a bigger series [1]. > > Note that there's nothing complicated here, it's just a mechanical > s/nand\.h/rawnand\.h/ replacement, but it impacts several architectures, > the doc and staging directories. > > Regards, > > Boris > > [1]https://lwn.net/Articles/723694/ > --- [...] > arch/arm/mach-orion5x/db88f5281-setup.c | 2 +- > arch/arm/mach-orion5x/kurobox_pro-setup.c | 2 +- > arch/arm/mach-orion5x/ts209-setup.c | 2 +- > arch/arm/mach-orion5x/ts78xx-setup.c| 2 +- For the orion5x file you have my Acked-by: Gregory CLEMENT Indeed there is anything controversial, and I don't expect any change on these file for the next release so no merge conflict to fear of! Thanks, Gregory -- Gregory Clement, Free Electrons Kernel, drivers, real-time and embedded Linux development, consulting, training and support. http://free-electrons.com
[PATCH] mtd: nand: Rename nand.h into rawnand.h
We are planning to share more code between different NAND based devices (SPI NAND, OneNAND and raw NANDs), but before doing that we need to move the existing include/linux/mtd/nand.h file into include/linux/mtd/rawnand.h so we can later create a nand.h header containing all common structure and function prototypes. Signed-off-by: Boris BrezillonSigned-off-by: Peter Pan Cc: Jonathan Corbet Cc: Sekhar Nori Cc: Kevin Hilman Cc: Jason Cooper Cc: Andrew Lunn Cc: Sebastian Hesselbarth Cc: Gregory Clement Cc: Hartley Sweeten Cc: Alexander Sverdlin Cc: Shawn Guo Cc: Sascha Hauer Cc: Fabio Estevam Cc: Imre Kaloz Cc: Krzysztof Halasa Cc: Eric Miao Cc: Haojian Zhuang Cc: Aaro Koskinen Cc: Tony Lindgren Cc: Alexander Clouter Cc: Daniel Mack Cc: Robert Jarzmik Cc: Marek Vasut Cc: Kukjin Kim Cc: Krzysztof Kozlowski Cc: Simtec Linux Team Cc: Steven Miao Cc: Mikael Starvik Cc: Jesper Nilsson Cc: Ralf Baechle Cc: Yoshinori Sato Cc: Rich Felker Cc: Wenyou Yang Cc: Josh Wu Cc: Kamal Dasu Cc: Masahiro Yamada Cc: Han Xu Cc: Harvey Hunt Cc: Vladimir Zapolskiy Cc: Sylvain Lemieux Cc: Matthias Brugger Cc: Wan ZongShun Cc: Neil Armstrong Cc: Ezequiel Garcia Cc: Maxim Levitsky Cc: Marc Gonzalez Cc: Stefan Agner Cc: Greg Kroah-Hartman Cc: Mauro Carvalho Chehab Cc: linux-...@vger.kernel.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-o...@vger.kernel.org Cc: linux-samsung-...@vger.kernel.org Cc: adi-buildroot-de...@lists.sourceforge.net Cc: linux-cris-ker...@axis.com Cc: linux-m...@linux-mips.org Cc: linux...@vger.kernel.org Cc: bcm-kernel-feedback-l...@broadcom.com Cc: linux-media...@lists.infradead.org Cc: linux-ox...@lists.tuxfamily.org Cc: linuxppc-dev@lists.ozlabs.org Cc: de...@driverdev.osuosl.org --- Hi All, Sorry for the huge Cc list, but I'd like to collect as much acks as possible for this patch which is actually part of a bigger series [1]. Note that there's nothing complicated here, it's just a mechanical s/nand\.h/rawnand\.h/ replacement, but it impacts several architectures, the doc and staging directories. Regards, Boris [1]https://lwn.net/Articles/723694/ --- Documentation/driver-api/mtdnand.rst| 8 MAINTAINERS | 2 +- arch/arm/mach-davinci/board-da850-evm.c | 2 +- arch/arm/mach-davinci/board-dm355-evm.c | 2 +- arch/arm/mach-davinci/board-dm355-leopard.c | 2 +- arch/arm/mach-davinci/board-dm365-evm.c | 2 +- arch/arm/mach-davinci/board-dm644x-evm.c| 2 +- arch/arm/mach-davinci/board-dm646x-evm.c| 2 +- arch/arm/mach-davinci/board-sffsdr.c| 2 +- arch/arm/mach-dove/dove-db-setup.c | 2 +- arch/arm/mach-ep93xx/snappercl15.c | 2 +- arch/arm/mach-ep93xx/ts72xx.c | 2 +- arch/arm/mach-imx/mach-qong.c | 2 +- arch/arm/mach-ixp4xx/ixdp425-setup.c| 2 +- arch/arm/mach-mmp/aspenite.c| 2 +- arch/arm/mach-omap1/board-fsample.c | 2 +- arch/arm/mach-omap1/board-h2.c | 2 +- arch/arm/mach-omap1/board-h3.c | 2 +- arch/arm/mach-omap1/board-nand.c| 2 +- arch/arm/mach-omap1/board-perseus2.c| 2 +- arch/arm/mach-orion5x/db88f5281-setup.c | 2 +- arch/arm/mach-orion5x/kurobox_pro-setup.c | 2 +- arch/arm/mach-orion5x/ts209-setup.c | 2 +- arch/arm/mach-orion5x/ts78xx-setup.c| 2 +- arch/arm/mach-pxa/balloon3.c| 2 +- arch/arm/mach-pxa/em-x270.c | 2 +- arch/arm/mach-pxa/eseries.c | 2 +- arch/arm/mach-pxa/palmtx.c | 2 +- arch/arm/mach-pxa/tosa.c| 2 +- arch/arm/mach-s3c24xx/common-smdk.c | 2 +-
[PATCH] mtd: nand: Rename nand.h into rawnand.h
We are planning to share more code between different NAND based devices (SPI NAND, OneNAND and raw NANDs), but before doing that we need to move the existing include/linux/mtd/nand.h file into include/linux/mtd/rawnand.h so we can later create a nand.h header containing all common structure and function prototypes. Signed-off-by: Boris BrezillonSigned-off-by: Peter Pan Cc: Jonathan Corbet Cc: Sekhar Nori Cc: Kevin Hilman Cc: Jason Cooper Cc: Andrew Lunn Cc: Sebastian Hesselbarth Cc: Gregory Clement Cc: Hartley Sweeten Cc: Alexander Sverdlin Cc: Shawn Guo Cc: Sascha Hauer Cc: Fabio Estevam Cc: Imre Kaloz Cc: Krzysztof Halasa Cc: Eric Miao Cc: Haojian Zhuang Cc: Aaro Koskinen Cc: Tony Lindgren Cc: Alexander Clouter Cc: Daniel Mack Cc: Robert Jarzmik Cc: Marek Vasut Cc: Kukjin Kim Cc: Krzysztof Kozlowski Cc: Simtec Linux Team Cc: Steven Miao Cc: Mikael Starvik Cc: Jesper Nilsson Cc: Ralf Baechle Cc: Yoshinori Sato Cc: Rich Felker Cc: Wenyou Yang Cc: Josh Wu Cc: Kamal Dasu Cc: Masahiro Yamada Cc: Han Xu Cc: Harvey Hunt Cc: Vladimir Zapolskiy Cc: Sylvain Lemieux Cc: Matthias Brugger Cc: Wan ZongShun Cc: Neil Armstrong Cc: Ezequiel Garcia Cc: Maxim Levitsky Cc: Marc Gonzalez Cc: Stefan Agner Cc: Greg Kroah-Hartman Cc: Mauro Carvalho Chehab Cc: linux-...@vger.kernel.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-o...@vger.kernel.org Cc: linux-samsung-...@vger.kernel.org Cc: adi-buildroot-de...@lists.sourceforge.net Cc: linux-cris-ker...@axis.com Cc: linux-m...@linux-mips.org Cc: linux...@vger.kernel.org Cc: bcm-kernel-feedback-l...@broadcom.com Cc: linux-media...@lists.infradead.org Cc: linux-ox...@lists.tuxfamily.org Cc: linuxppc-dev@lists.ozlabs.org Cc: de...@driverdev.osuosl.org --- Hi All, Sorry for the huge Cc list, but I'd like to collect as much acks as possible for this patch which is actually part of a bigger series [1]. Note that there's nothing complicated here, it's just a mechanical s/nand\.h/rawnand\.h/ replacement, but it impacts several architectures, the doc and staging directories. Regards, Boris [1]https://lwn.net/Articles/723694/ --- Documentation/driver-api/mtdnand.rst| 8 MAINTAINERS | 2 +- arch/arm/mach-davinci/board-da850-evm.c | 2 +- arch/arm/mach-davinci/board-dm355-evm.c | 2 +- arch/arm/mach-davinci/board-dm355-leopard.c | 2 +- arch/arm/mach-davinci/board-dm365-evm.c | 2 +- arch/arm/mach-davinci/board-dm644x-evm.c| 2 +- arch/arm/mach-davinci/board-dm646x-evm.c| 2 +- arch/arm/mach-davinci/board-sffsdr.c| 2 +- arch/arm/mach-dove/dove-db-setup.c | 2 +- arch/arm/mach-ep93xx/snappercl15.c | 2 +- arch/arm/mach-ep93xx/ts72xx.c | 2 +- arch/arm/mach-imx/mach-qong.c | 2 +- arch/arm/mach-ixp4xx/ixdp425-setup.c| 2 +- arch/arm/mach-mmp/aspenite.c| 2 +- arch/arm/mach-omap1/board-fsample.c | 2 +- arch/arm/mach-omap1/board-h2.c | 2 +- arch/arm/mach-omap1/board-h3.c | 2 +- arch/arm/mach-omap1/board-nand.c| 2 +- arch/arm/mach-omap1/board-perseus2.c| 2 +- arch/arm/mach-orion5x/db88f5281-setup.c | 2 +- arch/arm/mach-orion5x/kurobox_pro-setup.c | 2 +- arch/arm/mach-orion5x/ts209-setup.c | 2 +- arch/arm/mach-orion5x/ts78xx-setup.c| 2 +- arch/arm/mach-pxa/balloon3.c| 2 +- arch/arm/mach-pxa/em-x270.c | 2 +- arch/arm/mach-pxa/eseries.c | 2 +- arch/arm/mach-pxa/palmtx.c | 2 +- arch/arm/mach-pxa/tosa.c| 2 +- arch/arm/mach-s3c24xx/common-smdk.c | 2 +-
[PATCH v4 7/7] ima: Support module-style appended signatures for appraisal
This patch introduces the modsig keyword to the IMA policy syntax to specify that a given hook should expect the file to have the IMA signature appended to it. Here is how it can be used in a rule: appraise func=KEXEC_KERNEL_CHECK appraise_type=modsig|imasig With this rule, IMA will accept either an appended signature or a signature stored in the extended attribute. In that case, it will first check whether there is an appended signature, and if not it will read it from the extended attribute. The format of the appended signature is the same used for signed kernel modules. This means that the file can be signed with the scripts/sign-file tool, with a command line such as this: $ sign-file sha256 privkey_ima.pem x509_ima.der vmlinux This code only works for files that are hashed from a memory buffer, not for files that are read from disk at the time of hash calculation. In other words, only hooks that use kernel_read_file can support appended signatures. This means that only FIRMWARE_CHECK, KEXEC_KERNEL_CHECK, KEXEC_INITRAMFS_CHECK and POLICY_CHECK can be supported. This feature warrants a separate config option because enabling it brings in many other config options. Signed-off-by: Thiago Jung Bauermann--- security/integrity/ima/Kconfig| 13 +++ security/integrity/ima/Makefile | 1 + security/integrity/ima/ima.h | 70 +++- security/integrity/ima/ima_appraise.c | 178 +- security/integrity/ima/ima_main.c | 7 +- security/integrity/ima/ima_modsig.c | 178 ++ security/integrity/ima/ima_policy.c | 26 +++-- security/integrity/ima/ima_template_lib.c | 14 ++- security/integrity/integrity.h| 4 +- 9 files changed, 443 insertions(+), 48 deletions(-) diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index 35ef69312811..55f734a6124b 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -163,6 +163,19 @@ config IMA_APPRAISE_BOOTPARAM This option enables the different "ima_appraise=" modes (eg. fix, log) from the boot command line. +config IMA_APPRAISE_MODSIG + bool "Support module-style signatures for appraisal" + depends on IMA_APPRAISE + depends on INTEGRITY_ASYMMETRIC_KEYS + select PKCS7_MESSAGE_PARSER + select MODULE_SIG_FORMAT + default n + help + Adds support for signatures appended to files. The format of the + appended signature is the same used for signed kernel modules. + The modsig keyword can be used in the IMA policy to allow a hook + to accept such signatures. + config IMA_TRUSTED_KEYRING bool "Require all keys on the .ima keyring be signed (deprecated)" depends on IMA_APPRAISE && SYSTEM_TRUSTED_KEYRING diff --git a/security/integrity/ima/Makefile b/security/integrity/ima/Makefile index 29f198bde02b..c72026acecc3 100644 --- a/security/integrity/ima/Makefile +++ b/security/integrity/ima/Makefile @@ -8,5 +8,6 @@ obj-$(CONFIG_IMA) += ima.o ima-y := ima_fs.o ima_queue.o ima_init.o ima_main.o ima_crypto.o ima_api.o \ ima_policy.o ima_template.o ima_template_lib.o ima-$(CONFIG_IMA_APPRAISE) += ima_appraise.o +ima-$(CONFIG_IMA_APPRAISE_MODSIG) += ima_modsig.o ima-$(CONFIG_HAVE_IMA_KEXEC) += ima_kexec.o obj-$(CONFIG_IMA_BLACKLIST_KEYRING) += ima_mok.o diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index d52b487ad259..5492af2cd7c7 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -190,6 +190,8 @@ enum ima_hooks { __ima_hooks(__ima_hook_enumify) }; +extern const char *const func_tokens[]; + /* LIM API function definitions */ int ima_get_action(struct inode *inode, int mask, enum ima_hooks func, int *pcr); @@ -236,9 +238,10 @@ int ima_policy_show(struct seq_file *m, void *v); #ifdef CONFIG_IMA_APPRAISE int ima_appraise_measurement(enum ima_hooks func, struct integrity_iint_cache *iint, -struct file *file, const unsigned char *filename, -struct evm_ima_xattr_data *xattr_value, -int xattr_len, int opened); +struct file *file, const void *buf, loff_t size, +const unsigned char *filename, +struct evm_ima_xattr_data **xattr_value, +int *xattr_len, int opened); int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func); void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file); enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint, @@ -248,13 +251,28 @@ enum hash_algo ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int ima_read_xattr(struct dentry *dentry,
[PATCH v4 6/7] ima: Store measurement after appraisal
When module-style signatures appended at the end of files are supported for IMA appraisal, the code will fallback to the xattr signature if the appended one fails to verify. The problem is that we don't know whether we need to fallback to the xattr signature until the appraise step, and by then the measure step was already completed and would need to be done again in case the template includes the signature. To avoid this problem, do the appraisal first so that the correct signature is stored by the template in the measure step. Signed-off-by: Thiago Jung Bauermann--- security/integrity/ima/ima_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 2aebb7984437..0b4845e7248d 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -244,12 +244,12 @@ static int process_measurement(struct file *file, char *buf, loff_t size, if (!pathbuf) /* ima_rdwr_violation possibly pre-fetched */ pathname = ima_d_path(>f_path, , filename); - if (action & IMA_MEASURE) - ima_store_measurement(iint, file, pathname, - xattr_value, xattr_len, pcr); if (action & IMA_APPRAISE_SUBMASK) rc = ima_appraise_measurement(func, iint, file, pathname, xattr_value, xattr_len, opened); + if (action & IMA_MEASURE) + ima_store_measurement(iint, file, pathname, + xattr_value, xattr_len, pcr); if (action & IMA_AUDIT) ima_audit_measurement(iint, pathname); -- 2.13.0
[PATCH v4 5/7] integrity: Select CONFIG_KEYS instead of depending on it
This avoids a dependency cycle in CONFIG_IMA_APPRAISE_MODSIG (introduced by a later patch in this series): it will select CONFIG_MODULE_SIG_FORMAT which in turn selects CONFIG_KEYS. Kconfig then complains that CONFIG_INTEGRITY_SIGNATURE depends on CONFIG_KEYS. Signed-off-by: Thiago Jung Bauermann--- security/integrity/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig index da9565891738..0d642e0317c7 100644 --- a/security/integrity/Kconfig +++ b/security/integrity/Kconfig @@ -17,8 +17,8 @@ if INTEGRITY config INTEGRITY_SIGNATURE bool "Digital signature verification using multiple keyrings" - depends on KEYS default n + select KEYS select SIGNATURE help This option enables digital signature verification support -- 2.13.0
[PATCH v4 4/7] integrity: Introduce integrity_keyring_from_id
IMA will need to obtain the keyring used to verify file signatures so that it can verify the module-style signature appended to files. Signed-off-by: Thiago Jung Bauermann--- security/integrity/digsig.c| 28 +++- security/integrity/integrity.h | 1 + 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c index 06554c448dce..bb5328ba2848 100644 --- a/security/integrity/digsig.c +++ b/security/integrity/digsig.c @@ -48,11 +48,10 @@ static bool init_keyring __initdata; #define restrict_link_to_ima restrict_link_by_builtin_trusted #endif -int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, - const char *digest, int digestlen) +struct key *integrity_keyring_from_id(const unsigned int id) { - if (id >= INTEGRITY_KEYRING_MAX || siglen < 2) - return -EINVAL; + if (id >= INTEGRITY_KEYRING_MAX) + return ERR_PTR(-EINVAL); if (!keyring[id]) { keyring[id] = @@ -61,18 +60,29 @@ int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, int err = PTR_ERR(keyring[id]); pr_err("no %s keyring: %d\n", keyring_name[id], err); keyring[id] = NULL; - return err; + return ERR_PTR(err); } } + return keyring[id]; +} + +int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, + const char *digest, int digestlen) +{ + struct key *keyring = integrity_keyring_from_id(id); + + if (IS_ERR(keyring) || siglen < 2) + return PTR_ERR(keyring); + switch (sig[1]) { case 1: /* v1 API expect signature without xattr type */ - return digsig_verify(keyring[id], sig + 1, siglen - 1, -digest, digestlen); + return digsig_verify(keyring, sig + 1, siglen - 1, digest, +digestlen); case 2: - return asymmetric_verify(keyring[id], sig, siglen, -digest, digestlen); + return asymmetric_verify(keyring, sig, siglen, digest, +digestlen); } return -EOPNOTSUPP; diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index 9b1762076f38..1f8f1a31d487 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -134,6 +134,7 @@ int __init integrity_read_file(const char *path, char **data); #ifdef CONFIG_INTEGRITY_SIGNATURE +struct key *integrity_keyring_from_id(const unsigned int id); int integrity_digsig_verify(const unsigned int id, const char *sig, int siglen, const char *digest, int digestlen); -- 2.13.0
[PATCH v4 3/7] PKCS#7: Introduce pkcs7_get_message_sig and verify_pkcs7_message_sig
IMA will need to access the digest used in the signature so that it can verify files containing module-style appended signatures. For this purpose, add function pkcs7_get_message_sig. It will also need to verify an already parsed PKCS#7 message. For this purpose, add function verify_pkcs7_message_signature which takes a struct pkcs7_message for verification instead of the raw bytes that verify_pkcs7_signature takes. Signed-off-by: Thiago Jung Bauermann--- certs/system_keyring.c| 60 +-- crypto/asymmetric_keys/pkcs7_parser.c | 12 +++ include/crypto/pkcs7.h| 2 ++ include/linux/verification.h | 10 ++ 4 files changed, 68 insertions(+), 16 deletions(-) diff --git a/certs/system_keyring.c b/certs/system_keyring.c index 6251d1b27f0c..6a8684959780 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -190,33 +190,26 @@ late_initcall(load_system_certificate_list); #ifdef CONFIG_SYSTEM_DATA_VERIFICATION /** - * verify_pkcs7_signature - Verify a PKCS#7-based signature on system data. + * verify_pkcs7_message_sig - Verify a PKCS#7-based signature on system data. * @data: The data to be verified (NULL if expecting internal data). * @len: Size of @data. - * @raw_pkcs7: The PKCS#7 message that is the signature. - * @pkcs7_len: The size of @raw_pkcs7. + * @pkcs7: The PKCS#7 message that is the signature. * @trusted_keys: Trusted keys to use (NULL for builtin trusted keys only, * (void *)1UL for all trusted keys). * @usage: The use to which the key is being put. * @view_content: Callback to gain access to content. * @ctx: Context for callback. */ -int verify_pkcs7_signature(const void *data, size_t len, - const void *raw_pkcs7, size_t pkcs7_len, - struct key *trusted_keys, - enum key_being_used_for usage, - int (*view_content)(void *ctx, - const void *data, size_t len, - size_t asn1hdrlen), - void *ctx) +int verify_pkcs7_message_sig(const void *data, size_t len, +struct pkcs7_message *pkcs7, +struct key *trusted_keys, +enum key_being_used_for usage, +int (*view_content)(void *ctx, const void *data, +size_t len, size_t asn1hdrlen), +void *ctx) { - struct pkcs7_message *pkcs7; int ret; - pkcs7 = pkcs7_parse_message(raw_pkcs7, pkcs7_len); - if (IS_ERR(pkcs7)) - return PTR_ERR(pkcs7); - /* The data should be detached - so we need to supply it. */ if (data && pkcs7_supply_detached_data(pkcs7, data, len) < 0) { pr_err("PKCS#7 signature with non-detached data\n"); @@ -258,6 +251,41 @@ int verify_pkcs7_signature(const void *data, size_t len, } error: + pr_devel("<==%s() = %d\n", __func__, ret); + return ret; +} + +/** + * verify_pkcs7_signature - Verify a PKCS#7-based signature on system data. + * @data: The data to be verified (NULL if expecting internal data). + * @len: Size of @data. + * @raw_pkcs7: The PKCS#7 message that is the signature. + * @pkcs7_len: The size of @raw_pkcs7. + * @trusted_keys: Trusted keys to use (NULL for builtin trusted keys only, + * (void *)1UL for all trusted keys). + * @usage: The use to which the key is being put. + * @view_content: Callback to gain access to content. + * @ctx: Context for callback. + */ +int verify_pkcs7_signature(const void *data, size_t len, + const void *raw_pkcs7, size_t pkcs7_len, + struct key *trusted_keys, + enum key_being_used_for usage, + int (*view_content)(void *ctx, + const void *data, size_t len, + size_t asn1hdrlen), + void *ctx) +{ + struct pkcs7_message *pkcs7; + int ret; + + pkcs7 = pkcs7_parse_message(raw_pkcs7, pkcs7_len); + if (IS_ERR(pkcs7)) + return PTR_ERR(pkcs7); + + ret = verify_pkcs7_message_sig(data, len, pkcs7, trusted_keys, usage, + view_content, ctx); + pkcs7_free_message(pkcs7); pr_devel("<==%s() = %d\n", __func__, ret); return ret; diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c index af4cd8649117..e41beda297a8 100644 --- a/crypto/asymmetric_keys/pkcs7_parser.c +++ b/crypto/asymmetric_keys/pkcs7_parser.c @@ -673,3 +673,15 @@ int pkcs7_note_signed_info(void *context, size_t
[PATCH v4 2/7] MODSIGN: Export module signature definitions
IMA will use the module_signature format for append signatures, so export the relevant definitions and factor out the code which verifies that the appended signature trailer is valid. Also, create a CONFIG_MODULE_SIG_FORMAT option so that IMA can select it and be able to use validate_module_signature without having to depend on CONFIG_MODULE_SIG. Signed-off-by: Thiago Jung Bauermann--- include/linux/module.h | 3 -- include/linux/module_signature.h | 47 + init/Kconfig | 6 +++- kernel/Makefile | 2 +- kernel/module.c | 1 + kernel/module_signing.c | 74 +--- 6 files changed, 85 insertions(+), 48 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index e7bdd549e527..672ad2016262 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -23,9 +23,6 @@ #include #include -/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */ -#define MODULE_SIG_STRING "~Module signature appended~\n" - /* Not Yet Implemented */ #define MODULE_SUPPORTED_DEVICE(name) diff --git a/include/linux/module_signature.h b/include/linux/module_signature.h new file mode 100644 index ..e80728e5b86c --- /dev/null +++ b/include/linux/module_signature.h @@ -0,0 +1,47 @@ +/* Module signature handling. + * + * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowe...@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_MODULE_SIGNATURE_H +#define _LINUX_MODULE_SIGNATURE_H + +/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */ +#define MODULE_SIG_STRING "~Module signature appended~\n" + +enum pkey_id_type { + PKEY_ID_PGP,/* OpenPGP generated key ID */ + PKEY_ID_X509, /* X.509 arbitrary subjectKeyIdentifier */ + PKEY_ID_PKCS7, /* Signature in PKCS#7 message */ +}; + +/* + * Module signature information block. + * + * The constituents of the signature section are, in order: + * + * - Signer's name + * - Key identifier + * - Signature data + * - Information block + */ +struct module_signature { + u8 algo; /* Public-key crypto algorithm [0] */ + u8 hash; /* Digest algorithm [0] */ + u8 id_type;/* Key identifier type [PKEY_ID_PKCS7] */ + u8 signer_len; /* Length of signer's name [0] */ + u8 key_id_len; /* Length of key identifier [0] */ + u8 __pad[3]; + __be32 sig_len;/* Length of signature data */ +}; + +int validate_module_sig(const struct module_signature *ms, size_t file_len); +int mod_verify_sig(const void *mod, unsigned long *_modlen); + +#endif /* _LINUX_MODULE_SIGNATURE_H */ diff --git a/init/Kconfig b/init/Kconfig index 8514b25db21c..c3ac1170b93a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1734,7 +1734,7 @@ config MODULE_SRCVERSION_ALL config MODULE_SIG bool "Module signature verification" depends on MODULES - select SYSTEM_DATA_VERIFICATION + select MODULE_SIG_FORMAT help Check modules for valid signatures upon load: the signature is simply appended to the module. For more information see @@ -1749,6 +1749,10 @@ config MODULE_SIG debuginfo strip done by some packagers (such as rpmbuild) and inclusion into an initramfs that wants the module size reduced. +config MODULE_SIG_FORMAT + def_bool n + select SYSTEM_DATA_VERIFICATION + config MODULE_SIG_FORCE bool "Require modules to be validly signed" depends on MODULE_SIG diff --git a/kernel/Makefile b/kernel/Makefile index 4cb8e8b23c6e..d5f9748ab19f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -56,7 +56,7 @@ obj-y += up.o endif obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_MODULE_SIG) += module_signing.o +obj-$(CONFIG_MODULE_SIG_FORMAT) += module_signing.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_CRASH_CORE) += crash_core.o diff --git a/kernel/module.c b/kernel/module.c index 40f983cbea81..52921fccb51a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/module_signing.c b/kernel/module_signing.c index 937c844bee4a..204c60d4cc9f 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c @@ -11,36 +11,38 @@ #include #include +#include #include #include #include #include "module-internal.h" -enum pkey_id_type { - PKEY_ID_PGP,/* OpenPGP
[PATCH v4 1/7] integrity: Introduce struct evm_xattr
Even though struct evm_ima_xattr_data includes a fixed-size array to hold a SHA1 digest, most of the code ignores the array and uses the struct to mean "type indicator followed by data of unspecified size" and tracks the real size of what the struct represents in a separate length variable. The only exception to that is the EVM code, which correctly uses the definition of struct evm_ima_xattr_data. This patch makes this explicit in the code by removing the length specification from the array in struct evm_ima_xattr_data. It also changes the name of the element from digest to data, since in most places the array doesn't hold a digest. A separate struct evm_xattr is introduced, with the original definition of evm_ima_xattr_data to be used in the places that actually expect that definition. Signed-off-by: Thiago Jung Bauermann--- security/integrity/evm/evm_crypto.c | 4 ++-- security/integrity/evm/evm_main.c | 10 +- security/integrity/ima/ima_appraise.c | 7 --- security/integrity/integrity.h| 5 + 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index 1d32cd20009a..6ee25d7e5141 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -252,13 +252,13 @@ int evm_update_evmxattr(struct dentry *dentry, const char *xattr_name, const char *xattr_value, size_t xattr_value_len) { struct inode *inode = d_backing_inode(dentry); - struct evm_ima_xattr_data xattr_data; + struct evm_xattr xattr_data; int rc = 0; rc = evm_calc_hmac(dentry, xattr_name, xattr_value, xattr_value_len, xattr_data.digest); if (rc == 0) { - xattr_data.type = EVM_XATTR_HMAC; + xattr_data.data.type = EVM_XATTR_HMAC; rc = __vfs_setxattr_noperm(dentry, XATTR_NAME_EVM, _data, sizeof(xattr_data), 0); diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 063d38aef64e..536694499515 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -116,7 +116,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, struct integrity_iint_cache *iint) { struct evm_ima_xattr_data *xattr_data = NULL; - struct evm_ima_xattr_data calc; + struct evm_xattr calc; enum integrity_status evm_status = INTEGRITY_PASS; int rc, xattr_len; @@ -147,7 +147,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, /* check value type */ switch (xattr_data->type) { case EVM_XATTR_HMAC: - if (xattr_len != sizeof(struct evm_ima_xattr_data)) { + if (xattr_len != sizeof(struct evm_xattr)) { evm_status = INTEGRITY_FAIL; goto out; } @@ -155,7 +155,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, xattr_value_len, calc.digest); if (rc) break; - rc = crypto_memneq(xattr_data->digest, calc.digest, + rc = crypto_memneq(xattr_data->data, calc.digest, sizeof(calc.digest)); if (rc) rc = -EINVAL; @@ -467,7 +467,7 @@ int evm_inode_init_security(struct inode *inode, const struct xattr *lsm_xattr, struct xattr *evm_xattr) { - struct evm_ima_xattr_data *xattr_data; + struct evm_xattr *xattr_data; int rc; if (!evm_initialized || !evm_protected_xattr(lsm_xattr->name)) @@ -477,7 +477,7 @@ int evm_inode_init_security(struct inode *inode, if (!xattr_data) return -ENOMEM; - xattr_data->type = EVM_XATTR_HMAC; + xattr_data->data.type = EVM_XATTR_HMAC; rc = evm_init_hmac(inode, lsm_xattr, xattr_data->digest); if (rc < 0) goto out; diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 809ba70fbbbf..87d2b601cf8e 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -156,7 +156,8 @@ enum hash_algo ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, return sig->hash_algo; break; case IMA_XATTR_DIGEST_NG: - ret = xattr_value->digest[0]; + /* first byte contains algorithm id */ + ret = xattr_value->data[0]; if (ret < HASH_ALGO__LAST) return ret; break; @@ -164,7 +165,7 @@ enum hash_algo ima_get_hash_algo(struct
[PATCH v4 0/7] Appended signatures support for IMA appraisal
Hello, This version implements an approach suggested by Mimi Zohar, which is that if the modsig is valid, ima_appraise_measurement will now copy the hash calculated by the verification process into the iint cache. This ensures that the hash will appear in the measurement list and used to extend the TPM PCR. Original cover letter: On the OpenPOWER platform, secure boot and trusted boot are being implemented using IMA for taking measurements and verifying signatures. Since the kernel image on Power servers is an ELF binary, kernels are signed using the scripts/sign-file tool and thus use the same signature format as signed kernel modules. This patch series adds support in IMA for verifying those signatures. It adds flexibility to OpenPOWER secure boot, because it allows it to boot kernels with the signature appended to them as well as kernels where the signature is stored in the IMA extended attribute. Since modsig is only supported on some specific hooks which don't get called often (cf. ima_hook_supports_modsig), it's possible to always check for the presence of an appended modsig before looking for the xattr sig. In that case, the policy doesn't need to be changed to support the modsig keyword. Is that preferable than requiring the policy to explicitly allow a modsig like this code does? I tested these patches with EVM and I believe they don't break it and things work as expected, but I'm not really familiar with EVM and its use cases so this should be taken with a grain of salt. I also verified that the code correctly recalculates the file hash if the modsig verification fails and the file also has an xattr signature which uses a different hash algorithm. These patches apply on top of today's linux-security/next. Changes since v3: - Patch "integrity: Introduce struct evm_hmac_xattr" - Renamed new struct to evm_xattr. - Define struct evm_xattr using struct evm_ima_xattr_data, and moved it from evm.h to integrity.h (suggested by Mimi Zohar). - Patch "PKCS#7: Introduce verify_pkcs7_message_sig" - Also introduce pkcs7_get_message_sig. - Patch "ima: Support appended signatures for appraisal" - Moved check for buffer presence and size from ima_appraise_measurement to ima_read_modsig (suggested by Mimi Zohar). - Factored out handling of ima_read_xattr return value into process_xattr_error in ima_appraise_measurement so that it can be used if the modsig verification fails. - Pass NULL xattr_value to evm_verifyxattr even in the case of xattr signature in ima_appraise_measurement (suggested by Mimi Zohar). - Use switch statement provided by Mimi Zohar to check result of evm_verifyxattr. - If the modsig verification succeeds, copy the hash calculated during the verification to the iint cache (suggested by Mimi Zohar). - Substitute recursion in ima_appraise_measurement by a goto statement back to the main switch statement (suggested by Mimi Zohar). Changes since v2: - Patch "MODSIGN: Export module signature definitions." - Put change introducing function verify_pkcs7_message_signature into its own patch (suggested by Mimi Zohar). - Shortened validate_module_signature to validate_module_sig. - Patch "PKCS#7: Introduce verify_pkcs7_message_sig" - New patch in this series. - Shortened verify_pkcs7_message_signature to verify_pkcs7_message_sig. - Patch "integrity: Introduce integrity_keyring_from_id" - New patch in this series. - Patch "integrity: Select CONFIG_KEYS instead of depending on it" - New patch in this series. - Patch "ima: Store measurement after appraisal" - New patch in this series. - Instead of creating function measure_and_appraise, simply call ima_appraise_measurement before ima_store_measurement in process_measurement (suggested by Mimi Zohar). - Patch "ima: Support appended signatures for appraisal" - Put change introducing function integrity_keyring_from_id into its own patch (suggested by Mimi Zohar). - Put change to select CONFIG_KEYS in its own patch. - Put change in the order of measure and appraise steps into its own patch (suggested by Mimi Zohar). - Add buf and size arguments to ima_appraise_measurement. Also, pass xattr_value and xattr_len by reference so that the function can change them to point to the modsig. - Don't pass buf_len by reference in ima_read_modsig. It doesn't need to be changed anymore now that the hash calculated by the collect step covers the whole file instead of skipping the modsig at the end. - Don't add pkcs7_get_message_sig. It's not necessary anymore. Ditto for ima_get_modsig_hash_algo. - Don't change ima_collect_measurement anymore to recalculate the file hash if the algorithm is different, since now it doesn't have anything to do with the hash used by the modsig. - Don't change ima_get_hash_alog anymore to obtain the hash algo used by the modsig, since it isn't used in the collect step. - Change
Re: 4.13-rc3: Unrecoverable exception 4100
No, this is really a 4.13-rc1 regression. Andreas. -- Andreas Schwab, sch...@linux-m68k.org GPG Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5 "And now for something completely different."
Re: 4.13-rc3: Unrecoverable exception 4100
This is actually a 4.13-rc2 regression. Andreas. -- Andreas Schwab, sch...@linux-m68k.org GPG Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5 "And now for something completely different."
Re: [PATCH 1/4] powerpc/prom: avoid endian conversions for linux, memory-limit node
On Friday 04 August 2017 03:44 PM, Michael Ellerman wrote: Hari Bathiniwrites: On Friday 04 August 2017 09:21 AM, Michael Ellerman wrote: Hari Bathini writes: As linux,memory-limit node is set and also later used by the kernel, avoid endian conversions for this property. Fixes: 493adffcb43f ("powerpc: Make prom_init.c endian safe") Cc: sta...@vger.kernel.org # 3.12+ Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Signed-off-by: Hari Bathini --- arch/powerpc/kernel/prom_init.c |3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) As Ben said, this is not OK. The flat device tree is a data structure with a specified format[1], we don't violate the spec just to avoid an endian swap. Is there an actual bug you're trying to solve? Yep. While retrieving this property in prom.c, no endian conversion is being done. It was broken for a while. Let me do the endian swap in prom.c while retrieving.. Does it actually not work though, mem=x on the command line? mem=X works fine. The problem is with the early cmdline parsing of 'mem=' in prom_init, which is treating fadump_reserve_mem=X as mem=X. So, when fadump_reserve_mem=X is passed, endian swapped version of X is set to memory_limit as early parser takes it for mem=X and linux,memory-limit read is not endian safe currently. This bug was not hit so far as prom_memory_limit is set only when X is < ram_top && > alloc_bottom which is not the case generally. I think that code in prom.c is basically dead code, it's still there because we were afraid removing it would break something. These days we parse the command line early enough that we don't need those properties. This problem is not seen with mem=X as memory_limit is overwritten with the right value as soon as parse_early_param() is called in prom. Should I just get rid of linux,memory-limit node and mem=X handling from early_cmdline_parse() in prom_init as this has been broken for a while and nobody seem to have had a problem with that? Thanks Hari
Re: [PATCH v3] powerpc/powernv: Use darn instr for random_seed on p9
On 08/03/2017 06:12 PM, Matt Brown wrote: > This adds the powernv_get_random_darn function which utilises the darn > instruction, introduced in POWER9. The powernv_get_random_darn function > is used as the ppc_md.get_random_seed on P9. > > The DARN instruction can potentially throw an error, so we attempt to > register the powernv_get_random_darn function up to 10 times before > failing. > > Signed-off-by: Matt Brown> --- > v3: > - add repeat attempts to register the ppc_md.get_random_seed > - fixed the PPC_DARN macro > - move DARN_ERR definition > - fixed commit message > v2: > - remove repeat darn attempts > - move hook to rng_init > --- > arch/powerpc/include/asm/ppc-opcode.h | 4 > arch/powerpc/platforms/powernv/rng.c | 35 > ++- > 2 files changed, 38 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/include/asm/ppc-opcode.h > b/arch/powerpc/include/asm/ppc-opcode.h > index c4ced1d..aabd150 100644 > --- a/arch/powerpc/include/asm/ppc-opcode.h > +++ b/arch/powerpc/include/asm/ppc-opcode.h > @@ -134,6 +134,7 @@ > #define PPC_INST_COPY0x7c00060c > #define PPC_INST_COPY_FIRST 0x7c20060c > #define PPC_INST_CP_ABORT0x7c00068c > +#define PPC_INST_DARN0x7c0005e6 > #define PPC_INST_DCBA0x7c0005ec > #define PPC_INST_DCBA_MASK 0xfc0007fe > #define PPC_INST_DCBAL 0x7c2005ec > @@ -325,6 +326,9 @@ > > /* Deal with instructions that older assemblers aren't aware of */ > #define PPC_CP_ABORTstringify_in_c(.long PPC_INST_CP_ABORT) > +#define PPC_DARN(t, l) stringify_in_c(.long PPC_INST_DARN | \ > + ___PPC_RT(t) | \ > + (((l) & 0x3) << 16)) > #define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ > __PPC_RA(a) | __PPC_RB(b)) > #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ > diff --git a/arch/powerpc/platforms/powernv/rng.c > b/arch/powerpc/platforms/powernv/rng.c > index 5dcbdea..83b925c 100644 > --- a/arch/powerpc/platforms/powernv/rng.c > +++ b/arch/powerpc/platforms/powernv/rng.c > @@ -16,11 +16,13 @@ > #include > #include > #include > +#include > #include > #include > #include > #include > > +#define DARN_ERR 0xul > > struct powernv_rng { > void __iomem *regs; > @@ -67,6 +69,21 @@ int powernv_get_random_real_mode(unsigned long *v) > return 1; > } > > +int powernv_get_random_darn(unsigned long *v) > +{ > + unsigned long val; > + > + /* Using DARN with L=1 - 64-bit conditioned random number */ > + asm volatile(PPC_DARN(%0, 1) : "=r"(val)); > + > + if (val == DARN_ERR) > + return 0; > + > + *v = val; > + > + return 1; > +} > + > int powernv_get_random_long(unsigned long *v) > { > struct powernv_rng *rng; > @@ -135,8 +152,9 @@ static __init int rng_create(struct device_node *dn) > > static __init int rng_init(void) > { > + unsigned long darn_test; > struct device_node *dn; > - int rc; > + int rc, i; > > for_each_compatible_node(dn, NULL, "ibm,power-rng") { > rc = rng_create(dn); > @@ -150,6 +168,21 @@ static __init int rng_init(void) > of_platform_device_create(dn, NULL, NULL); > } > > + if (cpu_has_feature(CPU_FTR_ARCH_300)) { > + for (i = 0; i < 10; i++) { > + if (powernv_get_random_darn(_test)) { > + ppc_md.get_random_seed = > + powernv_get_random_darn; > + break; If you return directly here you can avoid the (i == 9) conditional every iteration of the loop by moving the pr_warn to just outside the loop. -Tyrel > + } > + > + if (i == 9) { > + pr_warn("Failed to use powernv_get_random_darn"\ > + "as get_random_seed"); > + } > + } > + } > + > return 0; > } > machine_subsys_initcall(powernv, rng_init); >
Re: [PATCH] powernv:idle: Disable LOSE_FULL_CONTEXT states when stop-api fails.
On Fri, Aug 04, 2017 at 12:34:22PM +0530, Gautham R. Shenoy wrote: > From: "Gautham R. Shenoy"> > Currently, we use the opal call opal_slw_set_reg() to inform the > Sleep-Winkle Engine (SLW) to restore the contents of some of the > Hypervisor state on wakeup from deep idle states that lose full > hypervisor context (characterized by the flag > OPAL_PM_LOSE_FULL_CONTEXT). > > However, the current code has a bug in that if opal_slw_set_reg() > fails, we don't disable the use of these deep states (winkle on > POWER8, stop4 onwards on POWER9). > > This patch fixes this bug by ensuring that if programing the > sleep-winkle engine to restore the hypervisor states in > pnv_save_sprs_for_deep_states() fails, then we exclude such states by > excluding their flags supported_cpuidle_states. Further, we ensure in > the initialization of the cpuidle-powernv driver to only include those > states whose flags are present in supported_cpuidle_states. > > Fixes: 1e1601b38e6 ("powerpc/powernv/idle: Restore SPRs for deep idle > states via stop API.") > Forgot to add Reported-by: Akshay Adiga > Signed-off-by: Gautham R. Shenoy -- Thanks and Regards gautham.
Re: [PATCH 4/4] axonram: Delete an unnecessary variable initialisation in axon_ram_probe()
On 08/03/2017 12:17 PM, SF Markus Elfring wrote: > From: Markus Elfring> Date: Thu, 3 Aug 2017 20:34:00 +0200 > > The local variable "rc" will eventually be set only to an error code. > Thus omit the explicit initialisation at the beginning. > > Signed-off-by: Markus Elfring > --- > arch/powerpc/sysdev/axonram.c | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c > index 93cc902350db..5677f3371e30 100644 > --- a/arch/powerpc/sysdev/axonram.c > +++ b/arch/powerpc/sysdev/axonram.c > @@ -184,7 +184,6 @@ static int axon_ram_probe(struct platform_device *device) > static int axon_ram_bank_id = -1; > struct axon_ram_bank *bank; > struct resource resource; > - int rc = 0; You've completely removed the decleration of "rc" instead of removing the "= 0" initialization. I would expect a compilation test to have turned up an undeclared use error for "rc". -Tyrel > > axon_ram_bank_id++; >
Re: [PATCH] PCI: Convert to using %pOF instead of full_name
On 07/18/2017 02:43 PM, Rob Herring wrote: > Now that we have a custom printf format specifier, convert users of > full_name to use %pOF instead. This is preparation to remove storing > of the full path string for each node. > > Signed-off-by: Rob Herring> Cc: Thomas Petazzoni > Cc: Jason Cooper > Cc: Bjorn Helgaas > Cc: Thierry Reding > Cc: Jonathan Hunter > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Michael Ellerman > Cc: linux-...@vger.kernel.org > Cc: linux-arm-ker...@lists.infradead.org > Cc: linux-te...@vger.kernel.org > Cc: linuxppc-dev@lists.ozlabs.org > --- > drivers/pci/host/pci-mvebu.c| 8 > drivers/pci/host/pci-tegra.c| 3 +-- > drivers/pci/hotplug/pnv_php.c | 4 ++-- > drivers/pci/hotplug/rpadlpar_core.c | 4 ++-- > drivers/pci/hotplug/rpaphp_core.c | 2 +- > drivers/pci/hotplug/rpaphp_pci.c| 4 ++-- > drivers/pci/hotplug/rpaphp_slot.c | 4 ++-- > drivers/pci/pci-sysfs.c | 4 ++-- > drivers/pci/pci.c | 4 ++-- > 9 files changed, 18 insertions(+), 19 deletions(-) > Reviewed-by: Tyrel Datwyler
Re: [PATCH] uapi: fix another asm/shmbuf.h userspace compilation error
On Thu, Mar 02, 2017 at 02:07:20PM +0100, Arnd Bergmann wrote: > On Thu, Mar 2, 2017 at 1:46 AM, Dmitry V. Levinwrote: > > Replace size_t with __kernel_size_t to fix asm/shmbuf.h userspace > > compilation errors like this: > > > > /usr/include/asm-generic/shmbuf.h:28:2: error: unknown type name 'size_t' > > size_t shm_segsz; /* size of segment (bytes) */ > > > > x32 is the only architecture where sizeof(size_t) is less than > > sizeof(__kernel_size_t), but as the kernel treats shm_segsz field as > > __kernel_size_t anyway, UAPI should follow. Thanks to little-endiannes > > of x32 and 64-bit alignment of the field following shm_segsz, this > > change doesn't break ABI, and the difference doesn't manifest itself > > easily. > > > > Signed-off-by: Dmitry V. Levin > > Acked-by: Arnd Bergmann Out of interest, is there a plan for merging these patches from Dmitry? Cheers James signature.asc Description: Digital signature
RE: [PATCH v9 14/14] powerpc: rewrite local_t using soft_irq
From: Nicholas Piggin > Sent: 04 August 2017 10:04 > On Fri, 04 Aug 2017 11:40:43 +1000 > Benjamin Herrenschmidtwrote: > > > On Fri, 2017-08-04 at 03:50 +1000, Nicholas Piggin wrote: > > > Hey, so... why are any of these implemented in asm? We should > > > just do them all in C, right? I looked a bit harder at code gen > > > and a couple of them are still emitting larx/stcx. > > > > As long as we can guarantee that the C compiler won't play games > > moving stuff around. But yes, I tend to agree. > > > I believe so. I mean we already depend on the same pattern for any > other sequence of local_irq_disable(); c code; local_irq_enable(); > so we'd have other problems if we couldn't. I'd guess that a "memory" clobber on the irq_disable/enable would be enough. It could be restricted to the memory area being updated. David
[PATCH] powerpc/64s: Add support for ASB_Notify on POWER9
The POWER9 core supports a new feature: ASB_Notify which requires the support of the Special Purpose Register: TIDR. The ASB_Notify command, generated by the AFU, will attempt to wake-up the host thread identified by the particular LPID:PID:TID. The special register TIDR has to be updated to with the same value present in the process element. If the length of the register TIDR is 64bits, the CAPI Translation Service Layer core (XSL9) for Power9 systems limits the size (16bits) of the Thread ID when it generates the ASB_Notify message adding PID:LPID:TID information from the context. The content of the internal kernel Thread ID (32bits) can not therefore be used to fulfill the register TIDR. This patch allows to avoid this limitation by adding a new interface for the user. The instructions mfspr/mtspr SPRN_TIDR are emulated, save/restore SPRs (context switch) are updated and a new feature (CPU_FTR_TIDR) is added to POWER9 system. Signed-off-by: Christophe Lombard--- arch/powerpc/include/asm/cputable.h | 4 +++- arch/powerpc/include/asm/emulated_ops.h | 2 ++ arch/powerpc/include/asm/ppc-opcode.h | 4 arch/powerpc/include/asm/processor.h| 1 + arch/powerpc/kernel/process.c | 8 arch/powerpc/kernel/traps.c | 21 + 6 files changed, 39 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index d02ad93..706f668 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -215,6 +215,7 @@ enum { #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000) #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000) +#define CPU_FTR_TIDR LONG_ASM_CONST(0x8000) #ifndef __ASSEMBLY__ @@ -474,7 +475,8 @@ enum { CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ + CPU_FTR_TIDR) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h index f00e10e..e83ad42 100644 --- a/arch/powerpc/include/asm/emulated_ops.h +++ b/arch/powerpc/include/asm/emulated_ops.h @@ -54,6 +54,8 @@ extern struct ppc_emulated { #ifdef CONFIG_PPC64 struct ppc_emulated_entry mfdscr; struct ppc_emulated_entry mtdscr; + struct ppc_emulated_entry mftidr; + struct ppc_emulated_entry mttidr; struct ppc_emulated_entry lq_stq; #endif } ppc_emulated; diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index fa9ebae..3ebc446 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -241,6 +241,10 @@ #define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1e #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 #define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1e +#define PPC_INST_MFSPR_TIDR0x7d2452a6 +#define PPC_INST_MFSPR_TIDR_MASK 0xfd2e +#define PPC_INST_MTSPR_TIDR0x7d2453a6 +#define PPC_INST_MTSPR_TIDR_MASK 0xfd2e #define PPC_INST_MFVSRD0x7c66 #define PPC_INST_MTVSRD0x7c000166 #define PPC_INST_SLBFEE0x7c0007a7 diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index fab7ff8..58cc212 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -329,6 +329,7 @@ struct thread_struct { */ int dscr_inherit; unsigned long ppr;/* used to save/restore SMT priority */ + unsigned long tidr; #endif #ifdef CONFIG_PPC_BOOK3S_64 unsigned long tar; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9f3e2c9..f06ea10 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1084,6 +1084,9 @@ static inline void save_sprs(struct thread_struct *t) if (cpu_has_feature(CPU_FTR_DSCR)) t->dscr = mfspr(SPRN_DSCR); + if (cpu_has_feature(CPU_FTR_TIDR)) + t->tidr = mfspr(SPRN_TIDR); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { t->bescr = mfspr(SPRN_BESCR); t->ebbhr = mfspr(SPRN_EBBHR); @@ -1120,6 +1123,11 @@ static inline void restore_sprs(struct thread_struct *old_thread, mtspr(SPRN_DSCR, dscr); }
[PATCH] powerpc/eeh: Delete an error message for a failed memory allocation in two functions
From: Markus ElfringDate: Fri, 4 Aug 2017 16:37:56 +0200 Omit an extra message for a memory allocation failure in these functions. This issue was detected by using the Coccinelle software. Link: http://events.linuxfoundation.org/sites/events/files/slides/LCJ16-Refactor_Strings-WSang_0.pdf Signed-off-by: Markus Elfring --- arch/powerpc/kernel/eeh_dev.c | 5 + arch/powerpc/kernel/eeh_event.c | 5 ++--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index d6b2ca70d14d..2742ecbadf18 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -55,11 +55,8 @@ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) /* Allocate EEH device */ edev = kzalloc(sizeof(*edev), GFP_KERNEL); - if (!edev) { - pr_warn("%s: out of memory\n", - __func__); + if (!edev) return NULL; - } /* Associate EEH device with OF node */ pdn->edev = edev; diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index accbf8b5fd46..c62bd90e038f 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -132,10 +132,9 @@ int eeh_send_failure_event(struct eeh_pe *pe) struct eeh_event *event; event = kzalloc(sizeof(*event), GFP_ATOMIC); - if (!event) { - pr_err("EEH: out of memory, event not handled\n"); + if (!event) return -ENOMEM; - } + event->pe = pe; /* We may or may not be called in an interrupt context */ -- 2.13.4
Is SPARSE_IRQ really needed for all powerpcs ?
The help associated with SPARSE_IRQ says: Sparse irq numbering is useful for distro kernels that want to define a high CONFIG_NR_CPUS value but still want to have low kernel memory footprint on smaller machines. ( Sparse irqs can also be beneficial on NUMA boxes, as they spread out the interrupt descriptors in a more NUMA-friendly way. ) If you don't know what to do here, say N. Is it really usefull/necessary to select it for all powerpc as done by arch/powerpc/Kconfig ? Shouldn't we at least make if conditionnal to SMP ? Christophe
Re: [v5 11/15] arm64/kasan: explicitly zero kasan shadow memory
Hi Ard, Thank you very much for reviewing this. I will fix the bug you found in the next iteration. +zero_vemmap_populated_memory(void) Typo here: vemmap -> vmemmap Yeap, will rename here, and in Intel variant. +{ + struct memblock_region *reg; + u64 start, end; + + for_each_memblock(memory, reg) { + start = __phys_to_virt(reg->base); + end = __phys_to_virt(reg->base + reg->size); + + if (start >= end) How would this ever be true? And why is it a stop condition? Yes this is a stop condition. Also look at the way kasan allocates its shadow memory in this file kasan_init(): 187 for_each_memblock(memory, reg) { 188 void *start = (void *)__phys_to_virt(reg->base); 189 void *end = (void *)__phys_to_virt(reg->base + reg->size); 190 191 if (start >= end) 192 break; ... 200 vmemmap_populate(...) + Are you missing a couple of kasan_mem_to_shadow() calls here? I can't believe your intention is to wipe all of DRAM. True. Thank you for catching this bug. I have not really tested on arm, only compiled for sanity checking. Need to figure out how to configure qemu to run most generic arm code. I tested on x86 and sparc both real and qemu hardware. KASAN uses vmemmap_populate as a convenience: kasan has nothing to do with vmemmap, but the function already existed and happened to do what KASAN requires. Given that that will no longer be the case, it would be far better to stop using vmemmap_populate altogether, and clone it into a KASAN specific version (with an appropriate name) with the zeroing folded into it. I agree, but this would be outside of the scope of this project. Pasha
Re: [v5 09/15] sparc64: optimized struct page zeroing
Hi Sam, Thank you for looking at this. I will update patch description, and as you suggested replace memset() via static assert in next iteration. Pasha On 08/04/2017 01:37 AM, Sam Ravnborg wrote: Hi Pavel. On Thu, Aug 03, 2017 at 05:23:47PM -0400, Pavel Tatashin wrote: Add an optimized mm_zero_struct_page(), so struct page's are zeroed without calling memset(). We do eight regular stores, thus avoid cost of membar. The commit message does no longer reflect the implementation, and should be updated. Signed-off-by: Pavel TatashinReviewed-by: Steven Sistare Reviewed-by: Daniel Jordan Reviewed-by: Bob Picco --- arch/sparc/include/asm/pgtable_64.h | 32 1 file changed, 32 insertions(+) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 6fbd931f0570..be47537e84c5 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -230,6 +230,38 @@ extern unsigned long _PAGE_ALL_SZ_BITS; extern struct page *mem_map_zero; #define ZERO_PAGE(vaddr) (mem_map_zero) +/* This macro must be updated when the size of struct page grows above 80 + * or reduces below 64. + * The idea that compiler optimizes out switch() statement, and only + * leaves clrx instructions or memset() call. + */ +#definemm_zero_struct_page(pp) do { \ + unsigned long *_pp = (void *)(pp); \ + \ + /* Check that struct page is 8-byte aligned */ \ + BUILD_BUG_ON(sizeof(struct page) & 7); \ Would also be good to catch if sizeof > 80 so we do not silently migrate to the suboptimal version (silent at build time). Can you at build time catch if size is no any of: 64, 72, 80 and simplify the below a little? Sam
Re: [PATCH 4/4] axonram: Delete an unnecessary variable initialisation in axon_ram_probe()
Hi Markus, [auto build test ERROR on powerpc/next] [also build test ERROR on v4.13-rc3 next-20170804] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/SF-Markus-Elfring/axonram-Adjustments-for-axon_ram_probe/20170804-170436 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next config: powerpc-defconfig (attached as .config) compiler: powerpc64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705 reproduce: wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree make.cross ARCH=powerpc All errors (new ones prefixed by >>): arch/powerpc/sysdev/axonram.c: In function 'axon_ram_probe': >> arch/powerpc/sysdev/axonram.c:203:3: error: 'rc' undeclared (first use in >> this function) rc = -EFAULT; ^~ arch/powerpc/sysdev/axonram.c:203:3: note: each undeclared identifier is reported only once for each function it appears in arch/powerpc/sysdev/axonram.c:308:1: error: control reaches end of non-void function [-Werror=return-type] } ^ cc1: all warnings being treated as errors vim +/rc +203 arch/powerpc/sysdev/axonram.c dbdf04c40 Maxim Shchetynin 2007-07-20 177 dbdf04c40 Maxim Shchetynin 2007-07-20 178 /** dbdf04c40 Maxim Shchetynin 2007-07-20 179 * axon_ram_probe - probe() method for platform driver 61245 Grant Likely 2011-02-22 180 * @device: see platform_driver method dbdf04c40 Maxim Shchetynin 2007-07-20 181 */ 61245 Grant Likely 2011-02-22 182 static int axon_ram_probe(struct platform_device *device) dbdf04c40 Maxim Shchetynin 2007-07-20 183 { dbdf04c40 Maxim Shchetynin 2007-07-20 184static int axon_ram_bank_id = -1; dbdf04c40 Maxim Shchetynin 2007-07-20 185struct axon_ram_bank *bank; dbdf04c40 Maxim Shchetynin 2007-07-20 186struct resource resource; dbdf04c40 Maxim Shchetynin 2007-07-20 187 dbdf04c40 Maxim Shchetynin 2007-07-20 188axon_ram_bank_id++; dbdf04c40 Maxim Shchetynin 2007-07-20 189 dbdf04c40 Maxim Shchetynin 2007-07-20 190dev_info(>dev, "Found memory controller on %s\n", 61c7a080a Grant Likely 2010-04-13 191 device->dev.of_node->full_name); dbdf04c40 Maxim Shchetynin 2007-07-20 192 12d94c464 Markus Elfring 2017-08-03 193bank = kzalloc(sizeof(*bank), GFP_KERNEL); e54eff030 Markus Elfring 2017-08-03 194if (!bank) e54eff030 Markus Elfring 2017-08-03 195return -ENOMEM; dbdf04c40 Maxim Shchetynin 2007-07-20 196 dbdf04c40 Maxim Shchetynin 2007-07-20 197device->dev.platform_data = bank; dbdf04c40 Maxim Shchetynin 2007-07-20 198 dbdf04c40 Maxim Shchetynin 2007-07-20 199bank->device = device; dbdf04c40 Maxim Shchetynin 2007-07-20 200 61c7a080a Grant Likely 2010-04-13 201if (of_address_to_resource(device->dev.of_node, 0, ) != 0) { dbdf04c40 Maxim Shchetynin 2007-07-20 202dev_err(>dev, "Cannot access device tree\n"); dbdf04c40 Maxim Shchetynin 2007-07-20 @203rc = -EFAULT; dbdf04c40 Maxim Shchetynin 2007-07-20 204goto failed; dbdf04c40 Maxim Shchetynin 2007-07-20 205} dbdf04c40 Maxim Shchetynin 2007-07-20 206 28f65c11f Joe Perches2011-06-09 207bank->size = resource_size(); dbdf04c40 Maxim Shchetynin 2007-07-20 208 dbdf04c40 Maxim Shchetynin 2007-07-20 209if (bank->size == 0) { dbdf04c40 Maxim Shchetynin 2007-07-20 210dev_err(>dev, "No DDR2 memory found for %s%d\n", dbdf04c40 Maxim Shchetynin 2007-07-20 211 AXON_RAM_DEVICE_NAME, axon_ram_bank_id); dbdf04c40 Maxim Shchetynin 2007-07-20 212rc = -ENODEV; dbdf04c40 Maxim Shchetynin 2007-07-20 213goto failed; dbdf04c40 Maxim Shchetynin 2007-07-20 214} dbdf04c40 Maxim Shchetynin 2007-07-20 215 dbdf04c40 Maxim Shchetynin 2007-07-20 216dev_info(>dev, "Register DDR2 memory device %s%d with %luMB\n", dbdf04c40 Maxim Shchetynin 2007-07-20 217 AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20); dbdf04c40 Maxim Shchetynin 2007-07-20 218 dbdf04c40 Maxim Shchetynin 2007-07-20 219bank->ph_addr = resource.start; 40f1ce7fb Anton Blanchard2011-05-08 220bank->io_addr = (unsigned long) ioremap_prot( dbdf04c40 Maxim Shchetynin 2007-07-20 221bank->ph_addr, bank->size, _PAGE_NO_CACHE); dbdf04c40 Maxim Shchetynin 2007-07-20 222if (bank->io_addr == 0) { dbdf04c40 Maxim Shchetynin 2007-07-20 223dev_err(>dev, "ioremap() failed\n"); dbdf04c40 Maxim Shchetynin 2007-07-20 224
Re: 4.13-rc3: Unrecoverable exception 4100
On Fri, 2017-08-04 at 12:59 +0200, Andreas Schwab wrote: > I'm getting a lot of Unrecoverable exception 4100 with 4.13-rc3: Hi Andeas ! Any chance you can bisect this ? Thanks ! Cheers, Ben.
Re: [PATCH 5/6] powerpc/mm: Optimize detection of thread local mm's
On Fri, 2017-08-04 at 14:06 +0200, Frederic Barrat wrote: > > +#ifdef CONFIG_PPC_BOOK3S_64 > > +static inline int mm_is_thread_local(struct mm_struct *mm) > > +{ > > + if (atomic_read(>context.active_cpus) > 1) > > + return false; > > + return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)); > > +} > > +#else /* CONFIG_PPC_BOOK3S_64 */ > > > While working on something related (mark memory context as needing > global TLBI if used behind a NPU or PSL): > http://patchwork.ozlabs.org/patch/796775/ > > Michael raised the point that the store for the pte update cannot be > reordered with the load which decides the scope of the TLBI, and had > convinced me that a memory barrier was required. > > Couldn't we have the same problem here, where the atomic read is > reordered with the store of the invalid PTE? The store of the invalid PTE is done with a pte_update which contains a sync as far as I can tell. Cheers, Ben.
Re: [PATCH 5/6] powerpc/mm: Optimize detection of thread local mm's
Le 24/07/2017 à 06:28, Benjamin Herrenschmidt a écrit : Instead of comparing the whole CPU mask every time, let's keep a counter of how many bits are set in the mask. Thus testing for a local mm only requires testing if that counter is 1 and the current CPU bit is set in the mask. Signed-off-by: Benjamin Herrenschmidt--- arch/powerpc/include/asm/book3s/64/mmu.h | 3 +++ arch/powerpc/include/asm/mmu_context.h | 9 + arch/powerpc/include/asm/tlb.h | 11 ++- arch/powerpc/mm/mmu_context_book3s64.c | 2 ++ 4 files changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 1a220cdff923..c3b00e8ff791 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -83,6 +83,9 @@ typedef struct { mm_context_id_t id; u16 user_psize; /* page size index */ + /* Number of bits in the mm_cpumask */ + atomic_t active_cpus; + /* NPU NMMU context */ struct npu_context *npu_context; diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index ff1aeb2cd19f..cf8f50cd4030 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -96,6 +96,14 @@ static inline void switch_mm_pgdir(struct task_struct *tsk, struct mm_struct *mm) { } #endif +#ifdef CONFIG_PPC_BOOK3S_64 +static inline void inc_mm_active_cpus(struct mm_struct *mm) +{ + atomic_inc(>context.active_cpus); +} +#else +static inline void inc_mm_active_cpus(struct mm_struct *mm) { } +#endif /* * switch_mm is the entry point called from the architecture independent @@ -110,6 +118,7 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, /* Mark this context has been used on the new CPU */ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + inc_mm_active_cpus(next); smp_mb(); new_on_cpu = true; } diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index 609557569f65..a7eabff27a0f 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -69,13 +69,22 @@ static inline int mm_is_core_local(struct mm_struct *mm) topology_sibling_cpumask(smp_processor_id())); } +#ifdef CONFIG_PPC_BOOK3S_64 +static inline int mm_is_thread_local(struct mm_struct *mm) +{ + if (atomic_read(>context.active_cpus) > 1) + return false; + return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)); +} +#else /* CONFIG_PPC_BOOK3S_64 */ While working on something related (mark memory context as needing global TLBI if used behind a NPU or PSL): http://patchwork.ozlabs.org/patch/796775/ Michael raised the point that the store for the pte update cannot be reordered with the load which decides the scope of the TLBI, and had convinced me that a memory barrier was required. Couldn't we have the same problem here, where the atomic read is reordered with the store of the invalid PTE? Thanks, Fred static inline int mm_is_thread_local(struct mm_struct *mm) { return cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())); } +#endif /* !CONFIG_PPC_BOOK3S_64 */ -#else +#else /* CONFIG_SMP */ static inline int mm_is_core_local(struct mm_struct *mm) { return 1; diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 8159f5219137..de17d3e714aa 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -174,6 +174,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) #ifdef CONFIG_SPAPR_TCE_IOMMU mm_iommu_init(mm); #endif + atomic_set(>context.active_cpus, 0); + return 0; }
4.13-rc3: Unrecoverable exception 4100
I'm getting a lot of Unrecoverable exception 4100 with 4.13-rc3: [13483.295173] Unrecoverable exception 4100 at c000a1ec [13483.295186] Oops: Unrecoverable exception, sig: 6 [#1] [13483.295190] SMP NR_CPUS=2 [13483.295191] PowerMac [13483.295197] Modules linked in: nfsd auth_rpcgss oid_registry lockd grace nfs_acl sunrpc tun af_packet ip6table_mangle nf_conntrack_ipv6 nf_defrag_ipv6 ip6t_REJECT nf_log_ipv6 ip6table_filter ip6_tables xt_TCPMSS iptable_mangle snd_aoa_fabric_layout snd_aoa_i2sbus snd_aoa_soundbus snd_pcm_oss snd_pcm sr_mod snd_aoa_codec_tas cdrom snd_aoa snd_seq snd_timer snd_seq_device xt_recent xt_nat snd_mixer_oss firewire_ohci snd sungem sungem_phy pata_macio firewire_core crc_itu_t soundcore xt_conntrack ipt_REJECT nf_log_ipv4 nf_log_common xt_LOG xt_tcpudp iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c iptable_filter ip_tables x_tables sg linear md_mod hid_generic usbhid ohci_pci ohci_hcd ehci_pci ehci_hcd usbcore usb_common dm_snapshot dm_bufio dm_mirror dm_region_hash [13483.295297] dm_log dm_mod sata_svw [13483.295306] CPU: 0 PID: 18626 Comm: rm Not tainted 4.13.0-rc3 #1 [13483.295311] task: c0018335e080 task.stack: c00139e5 [13483.295314] NIP: c000a1ec LR: c000a118 CTR: [13483.295318] REGS: c00139e53bb0 TRAP: 4100 Not tainted (4.13.0-rc3) [13483.295321] MSR: 90001030[13483.295329] CR: 2444 XER: 2000 [13483.295333] SOFTE: 1 GPR00: c00139e53e30 c0abb500 fffe GPR04: c001eb866298 c0018335e080 GPR08: 9000d032 0002 f001 GPR12: c00139e5 c000 3fffa8c0dca0 3fffa8c0dc88 GPR16: 1000 0001 3fffa8c0eaa0 GPR20: 3fffa8c27528 3fffa8c27b00 GPR24: 3fffa8c0d918 31b3efa0 3fffa8c26d68 GPR28: 3fffa8c249e8 3fffa8c263d0 3fffa8c27550 31b3ef10 [13483.295393] NIP [c000a1ec] system_call_exit+0xc0/0x21c [13483.295398] LR [c000a118] system_call+0x58/0x6c [13483.295400] Call Trace: [13483.295405] [c00139e53e30] [c000a118] system_call+0x58/0x6c (unreliable) [13483.295410] Instruction dump: [13483.295415] 64a51000 7c6300d0 f8a101a0 4b9c 3c00 6006 780007c6 6400 [13483.295425] 6000 7c004039 4082001c e8ed0170 <88070b78> 88c70b79 7c003214 2c20 [13483.295437] ---[ end trace 79af5598e0243808 ]--- [13697.100080] Unrecoverable exception 4100 at c000a1ec [13697.100093] Oops: Unrecoverable exception, sig: 6 [#2] [13697.100096] SMP NR_CPUS=2 [13697.100098] PowerMac [13697.100104] Modules linked in: nfsd auth_rpcgss oid_registry lockd grace nfs_acl sunrpc tun af_packet ip6table_mangle nf_conntrack_ipv6 nf_defrag_ipv6 ip6t_REJECT nf_log_ipv6 ip6table_filter ip6_tables xt_TCPMSS iptable_mangle snd_aoa_fabric_layout snd_aoa_i2sbus snd_aoa_soundbus snd_pcm_oss snd_pcm sr_mod snd_aoa_codec_tas cdrom snd_aoa snd_seq snd_timer snd_seq_device xt_recent xt_nat snd_mixer_oss firewire_ohci snd sungem sungem_phy pata_macio firewire_core crc_itu_t soundcore xt_conntrack ipt_REJECT nf_log_ipv4 nf_log_common xt_LOG xt_tcpudp iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c iptable_filter ip_tables x_tables sg linear md_mod hid_generic usbhid ohci_pci ohci_hcd ehci_pci ehci_hcd usbcore usb_common dm_snapshot dm_bufio dm_mirror dm_region_hash [13697.100205] dm_log dm_mod sata_svw [13697.100214] CPU: 0 PID: 21001 Comm: sh Tainted: G D 4.13.0-rc3 #1 [13697.100219] task: c00173f9f080 task.stack: c001eb7b8000 [13697.100222] NIP: c000a1ec LR: c000a118 CTR: [13697.100226] REGS: c001eb70 TRAP: 4100 Tainted: G D (4.13.0-rc3) [13697.100229] MSR: 92003030 [13697.100239] CR: 24024482 XER: 2000 [13697.100243] SOFTE: 1 GPR00: c001eb7bbe30 c0abb500 GPR04: c0007b51 c019fa04 c00173f9fe10 c00173f9f080 GPR08: b000d032 0002 f001 GPR12: c001eb7b8000 c000 1002e380 1002e350 GPR16: 1002e318 1002e2e0 0001 GPR20: 1002bca8 1002bd30 10044c58 1002bc80 GPR24: 10044c60 1ac15fd0 10047790 1ac23490 GPR28: 1ac20d20 1ac23020 1ac20d20 3d882280 [13697.100301] NIP [c000a1ec] system_call_exit+0xc0/0x21c [13697.100306] LR [c000a118] system_call+0x58/0x6c [13697.100309] Call Trace: [13697.100314] [c001eb7bbe30] [c000a118] system_call+0x58/0x6c (unreliable) [13697.100319] Instruction dump:
[PATCH 3/3] powerpc: replace vga_fixup() with generic code
Currently, we do a PCI fixup to mark a default card so that Xorg autoconfiguration works. There is a new generic method to do this sort of vga fixup, and it occurs by default. Drop our old method. This method is different: - it will only mark a card as default if a driver is bound - the marking will happen at late_initcall time, or even later if a card is enabled later on (via an ENABLE hook). Currently things are enabled in a FINAL hook. This *does* change behaviour under some circumstances. For example, pseries_le_defconfig doesn't have DRM drivers for many of the qemu GPU models, including the 'standard' vga. So when a VM with that GPU boots, no driver binds the GPU, and it does *not* get marked as default. Previously, it would have been marked as default. As it turns out Xorg (at least Xorg v1.19.3) can still autoconfigure it, as Xorg is smart about OpenFirmware framebuffer devices. If the right GPU driver is available, and the OpenFirmware fb driver is removed, the device *is* marked as a boot GPU. (If the OpenFirmware driver is around, it enables the PCI device but doesn't bind to it, making it ineligible to be the default card. Then, when the right driver is loaded, the enable hook doesn't fire because the card has already been enabled. Fun!) So everything works as intended, I guess. Cc: Brian KingSigned-off-by: Daniel Axtens --- This would benefit from some tests on real hardware. --- arch/powerpc/kernel/pci-common.c | 16 1 file changed, 16 deletions(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 6cfaec107374..65cd5bad5ad6 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include @@ -1741,18 +1740,3 @@ static void fixup_hide_host_resource_fsl(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl); - -static void fixup_vga(struct pci_dev *pdev) -{ - u16 cmd; - - if (vga_default_device()) - return; - - pci_read_config_word(pdev, PCI_COMMAND, ); - if (cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) - vga_set_default_device(pdev); - -} -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_DISPLAY_VGA, 8, fixup_vga); -- 2.11.0
[PATCH 2/3] Split VGA default device handler out of VGA arbiter
A system without PCI legacy resources (e.g. ARM64, powerpc) may find that no default/boot VGA device has been marked, because the VGA arbiter checks for legacy resource decoding before marking a card as default. Split the small bit of code that does default VGA handling out from the arbiter. Add a Kconfig option to allow the kernel to be built with just the default handling, or the arbiter and default handling. (You can try with the arbiter and no default handling - I can't see why that wouldn't work but it seems a bit odd to try.) Add handling for devices that should be marked as default but aren't handled by the vga arbiter by adding a late initcall and a class enable hook. If there is no default from vgaarb then the first card that is enabled, has a driver bound, and can decode memory or I/O will be marked as default. Signed-off-by: Daniel Axtens--- I haven't tested this particularly deeply just yet as I wanted to see if anyone had Strong Feelings before I put too much time into testing. I have verified that it works on ppc with qemu TCG, both with and without the following patch. I know this adds another config option and that's a bit sad, but we can't include it unconditionally as it depends on PCI. Suggestions welcome. --- arch/ia64/pci/fixup.c| 2 +- arch/powerpc/kernel/pci-common.c | 2 +- arch/x86/pci/fixup.c | 2 +- arch/x86/video/fbdev.c | 2 +- drivers/gpu/vga/Kconfig | 12 +++ drivers/gpu/vga/Makefile | 1 + drivers/gpu/vga/vga_default.c| 159 +++ drivers/gpu/vga/vga_switcheroo.c | 2 +- drivers/gpu/vga/vgaarb.c | 41 +- drivers/pci/pci-sysfs.c | 2 +- include/linux/vga_default.h | 44 +++ include/linux/vgaarb.h | 14 12 files changed, 225 insertions(+), 58 deletions(-) create mode 100644 drivers/gpu/vga/vga_default.c create mode 100644 include/linux/vga_default.h diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c index 41caa99add51..b35d1cf4501a 100644 --- a/arch/ia64/pci/fixup.c +++ b/arch/ia64/pci/fixup.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index c95fdda3a2dc..6cfaec107374 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 11e407489db0..b1254bc09a45 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c index 9fd24846d094..62cfa74ea86e 100644 --- a/arch/x86/video/fbdev.c +++ b/arch/x86/video/fbdev.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include int fb_is_primary_device(struct fb_info *info) { diff --git a/drivers/gpu/vga/Kconfig b/drivers/gpu/vga/Kconfig index 29437eabe095..81d4105aecf6 100644 --- a/drivers/gpu/vga/Kconfig +++ b/drivers/gpu/vga/Kconfig @@ -1,3 +1,14 @@ +config VGA_DEFAULT + bool "VGA Default Device Support" if EXPERT + default y + depends on PCI + help + Some programs find it helpful to know what VGA device is the default. + On platforms like x86 this means the device used by the BIOS to show + early boot messages. On other platforms this may be an arbitrary PCI + graphics card. Select this to have a default device recorded within + the kernel and exposed to userspace through sysfs. + config VGA_ARB bool "VGA Arbitration" if EXPERT default y @@ -22,6 +33,7 @@ config VGA_SWITCHEROO depends on X86 depends on ACPI select VGA_ARB + select VGA_DEFAULT help Many laptops released in 2008/9/10 have two GPUs with a multiplexer to switch between them. This adds support for dynamic switching when diff --git a/drivers/gpu/vga/Makefile b/drivers/gpu/vga/Makefile index 14ca30b75d0a..1e30f90d40fb 100644 --- a/drivers/gpu/vga/Makefile +++ b/drivers/gpu/vga/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_VGA_ARB) += vgaarb.o +obj-$(CONFIG_VGA_DEFAULT) += vga_default.o obj-$(CONFIG_VGA_SWITCHEROO) += vga_switcheroo.o diff --git a/drivers/gpu/vga/vga_default.c b/drivers/gpu/vga/vga_default.c new file mode 100644 index ..f6fcb0eb1507 --- /dev/null +++ b/drivers/gpu/vga/vga_default.c @@ -0,0 +1,159 @@ +/* + * vga_default.c: What is the default/boot PCI VGA device? + * + * (C) Copyright 2005 Benjamin Herrenschmidt + * (C) Copyright 2007 Paulo R. Zanoni + * (C) Copyright 2007, 2009 Tiago Vignatti + * (C) Copyright 2017 Canonical Ltd. (Author: Daniel Axtens ) + *
[PATCH 1/3] powerpc: simplify and fix VGA default device behaviour
Some powerpc devices provide a PCI display that isn't picked up by the VGA arbiter, presumably because it doesn't support the PCI legacy VGA ranges. Commit c2e1d84523ad ("powerpc: Set default VGA device") introduced an arch quirk to mark these devices as default to fix X autoconfig. The commit message stated that the patch: Ensures a default VGA is always set if a graphics adapter is present, even if firmware did not initialize it. If more than one graphics adapter is present, ensure the one initialized by firmware is set as the default VGA device. The patch used the following test to decide whether or not to mark a device as default: pci_read_config_word(pdev, PCI_COMMAND, ); if ((cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) || !vga_default_device()) vga_set_default_device(pdev); This doesn't seem like it works quite as intended. Because of the logical OR, the default device will be set in 2 cases: 1) if there is no default device OR 2) if this device has normal memory/IO decoding turned on This will work as intended if there is only one device, but if there are multiple devices, we may override the device the VGA arbiter picked. Instead, set a device as default if there is no default device AND this device decodes. This will not change behaviour on single-headed systems. Cc: Brian KingSigned-off-by: Daniel Axtens --- Tested in TCG (the card provided by qemu doesn't automatically register with vgaarb, so the relevant code path has been tested) but I would appreciate any tests on real hardware. Informal benh ack: https://patchwork.kernel.org/patch/9850235/ --- arch/powerpc/kernel/pci-common.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 341a7469cab8..c95fdda3a2dc 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1746,8 +1746,11 @@ static void fixup_vga(struct pci_dev *pdev) { u16 cmd; + if (vga_default_device()) + return; + pci_read_config_word(pdev, PCI_COMMAND, ); - if ((cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) || !vga_default_device()) + if (cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) vga_set_default_device(pdev); } -- 2.11.0
[PATCH 0/3] Split VGA default selection from VGA arbiter
This is approach 3 of my patch series to sort out Xorg autoconfiguration for the Hibmc card beind a Hisilicon bridge on arm64. Approach 1 was a simple quirk for the card+bridge to mark it as default. This higlighted the fact that the default card was picked by the arbiter, which assumed legacy resources. The lack of legacy resources leads to quirks in ppc and concerns in arm land, so a more generic approach was desired. Approach 2 allowed platforms to opt in to a class enable hook that added a card as default if there was no default. This: - was possibly racy as ACPI PCI init and vgaarb are both subsys initcalls. - didn't check to see if a card had a driver. - meant that platforms for which the vga arbiter didn't make sense still needed it. This is approach 3. It pulls the default handling out of the arbiter, into its own file and behind its own Kconfig option. It adds the extra detection as a late initcall and an enable hook that only operates after the initcall, so it's not racy. It checks for drivers. It means people can turn off the vga arbiter. It works sensibly for modules too. Patch 1 cleans up the powerpc fixup, as with approach 2. Patch 2 is the big split. Patch 3 moves ppc over, as with approach 2. There is no need for an arm-specific patch this time as the Kconfig option is on by default. Regards, Daniel Daniel Axtens (3): powerpc: simplify and fix VGA default device behaviour Split VGA default device handler out of VGA arbiter powerpc: replace vga_fixup() with generic code arch/ia64/pci/fixup.c| 2 +- arch/powerpc/kernel/pci-common.c | 13 arch/x86/pci/fixup.c | 2 +- arch/x86/video/fbdev.c | 2 +- drivers/gpu/vga/Kconfig | 12 +++ drivers/gpu/vga/Makefile | 1 + drivers/gpu/vga/vga_default.c| 159 +++ drivers/gpu/vga/vga_switcheroo.c | 2 +- drivers/gpu/vga/vgaarb.c | 41 +- drivers/pci/pci-sysfs.c | 2 +- include/linux/vga_default.h | 44 +++ include/linux/vgaarb.h | 14 12 files changed, 224 insertions(+), 70 deletions(-) create mode 100644 drivers/gpu/vga/vga_default.c create mode 100644 include/linux/vga_default.h -- 2.11.0
Re: [PATCH 1/4] powerpc/prom: avoid endian conversions for linux, memory-limit node
Hari Bathiniwrites: > On Friday 04 August 2017 09:21 AM, Michael Ellerman wrote: >> Hari Bathini writes: >> >>> As linux,memory-limit node is set and also later used by the kernel, >>> avoid endian conversions for this property. >>> >>> Fixes: 493adffcb43f ("powerpc: Make prom_init.c endian safe") >>> Cc: sta...@vger.kernel.org # 3.12+ >>> Cc: Anton Blanchard >>> Cc: Benjamin Herrenschmidt >>> Signed-off-by: Hari Bathini >>> --- >>> arch/powerpc/kernel/prom_init.c |3 +-- >>> 1 file changed, 1 insertion(+), 2 deletions(-) >> As Ben said, this is not OK. The flat device tree is a data >> structure with a specified format[1], we don't violate the spec just to >> avoid an endian swap. >> >> Is there an actual bug you're trying to solve? > > Yep. While retrieving this property in prom.c, no endian conversion is > being done. > It was broken for a while. Let me do the endian swap in prom.c while > retrieving.. Does it actually not work though, mem=x on the command line? I think that code in prom.c is basically dead code, it's still there because we were afraid removing it would break something. These days we parse the command line early enough that we don't need those properties. cheers
[GIT PULL] Please pull powerpc/linux.git powerpc-4.13-5 tag
Hi Linus, Please pull some more powerpc fixes for 4.13. I had to merge rc1 into my fixes branch, so that I had the code that needed fixing. I don't think it should cause any problems, other than I had to generate the diffstat by hand. cheers The following changes since commit 0da12a7a81f1e2255e89dc783c565e84801475a2: powerpc/mm/hash: Free the subpage_prot_table correctly (2017-07-27 13:05:50 +1000) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git tags/powerpc-4.13-5 for you to fetch changes up to 3db40c312c2c1eb2187c5731102fa8ff380e6e40: powerpc/64: Fix __check_irq_replay missing decrementer interrupt (2017-08-04 12:55:49 +1000) powerpc fixes for 4.13 #5 Fixes for recently merged code: - a fix for the _PAGE_DEVMAP support, which was breaking KVM on Power9 radix - avoid a (harmless) lockdep warning in the early SMP code - return failure for some uses of dma_set_mask() rather than falling back to 32-bits - fix stack setup in watchdog soft_nmi_common() to use emergency stack - fix of_irq_to_resource() error check in of_fsl_spi_probe() Two fixes going to stable: - fix saving of Transactional Memory SPRs in core dump - fix __check_irq_replay missing decrementer interrupt And two misc: - fix 64-bit boot wrapper build with non-biarch compiler - work around a POWER9 PMU hang after state-loss idle Thanks to: Alistair Popple, Aneesh Kumar K.V, Cyril Bur, Gustavo Romero, Jose Ricardo Ziviani, Laurent Vivier, Nicholas Piggin, Oliver O'Halloran, Sergei Shtylyov, Suraj Jitindar Singh, Thomas Gleixner. Alistair Popple (1): powerpc/powernv/pci: Return failure for some uses of dma_set_mask() Gustavo Romero (1): powerpc/tm: Fix saving of TM SPRs in core dump Michael Ellerman (3): powerpc/smp: Call smp_ops->setup_cpu() directly on the boot CPU powerpc/boot: Fix 64-bit boot wrapper build with non-biarch compiler Merge tag 'v4.13-rc1' into fixes Nicholas Piggin (3): powerpc/64s: Fix stack setup in watchdog soft_nmi_common() powerpc/perf: POWER9 PMU stops after idle workaround powerpc/64: Fix __check_irq_replay missing decrementer interrupt Oliver O'Halloran (1): powerpc/mm: Fix pmd/pte_devmap() on non-leaf entries Sergei Shtylyov (1): powerpc/83xx/mpc832x_rdb: fix of_irq_to_resource() error check arch/powerpc/boot/Makefile | 14 +++--- arch/powerpc/include/asm/book3s/64/pgtable.h | 10 +- arch/powerpc/kernel/exceptions-64s.S | 10 +- arch/powerpc/kernel/idle_book3s.S| 8 +++- arch/powerpc/kernel/irq.c| 15 ++- arch/powerpc/kernel/ptrace.c | 13 ++--- arch/powerpc/kernel/smp.c| 12 ++-- arch/powerpc/platforms/83xx/mpc832x_rdb.c| 2 +- arch/powerpc/platforms/powernv/pci-ioda.c| 8 9 files changed, 71 insertions(+), 21 deletions(-) signature.asc Description: PGP signature
[PATCH v4] powerpc/powernv: Enable PCI peer-to-peer
P9 has support for PCI peer-to-peer, enabling a device to write in the mmio space of another device directly, without interrupting the CPU. This patch adds support for it on powernv, by adding a new API to be called by drivers. The pnv_pci_set_p2p(...) call configures an 'initiator', i.e the device which will issue the mmio operation, and a 'target', i.e. the device on the receiving side. P9 really only supports mmio stores for the time being but that's expected to change in the future, so the API allows to define both load and store operations. /* PCI p2p descriptor */ #define OPAL_PCI_P2P_ENABLE 0x1 #define OPAL_PCI_P2P_LOAD 0x2 #define OPAL_PCI_P2P_STORE 0x4 int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, uint64_t desc) It uses a new OPAL call, as the configuration magic is done on the PHBs by skiboot. Signed-off-by: Frederic Barrat--- Requires skiboot patch: 700611a48025c5a556bb0aa011ac81bb5d1bcbc1 Changelog: v4: - resubmit with correct opal call IDs, now that the skiboot portion is merged v3: - move target reference count from skiboot to linux v2: - change of API - allow disabling of p2p setting arch/powerpc/include/asm/opal-api.h| 13 - arch/powerpc/include/asm/opal.h| 2 + arch/powerpc/include/asm/pnv-pci.h | 2 + arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + arch/powerpc/platforms/powernv/pci-ioda.c | 3 +- arch/powerpc/platforms/powernv/pci.c | 71 ++ arch/powerpc/platforms/powernv/pci.h | 5 ++ 7 files changed, 94 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 7df005965634..2f52182a4c63 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -193,7 +193,13 @@ #define OPAL_IMC_COUNTERS_INIT 149 #define OPAL_IMC_COUNTERS_START150 #define OPAL_IMC_COUNTERS_STOP 151 -#define OPAL_LAST 151 +#define OPAL_GET_POWERCAP 152 +#define OPAL_SET_POWERCAP 153 +#define OPAL_GET_POWER_SHIFT_RATIO 154 +#define OPAL_SET_POWER_SHIFT_RATIO 155 +#define OPAL_SENSOR_GROUP_CLEAR 156 +#define OPAL_PCI_SET_P2P157 +#define OPAL_LAST 157 /* Device tree flags */ @@ -1094,6 +1100,11 @@ enum { }; +/* PCI p2p descriptor */ +#define OPAL_PCI_P2P_ENABLE0x1 +#define OPAL_PCI_P2P_LOAD 0x2 +#define OPAL_PCI_P2P_STORE 0x4 + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_API_H */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 6b8513c3ad40..5a715e66f910 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -267,6 +267,8 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id); int64_t opal_xive_free_irq(uint32_t girq); int64_t opal_xive_sync(uint32_t type, uint32_t id); int64_t opal_xive_dump(uint32_t type, uint32_t id); +int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target, + uint64_t desc, uint16_t pe_number); int64_t opal_imc_counters_init(uint32_t type, uint64_t address, uint64_t cpu_pir); diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index de9681034353..59a548909d0b 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -26,6 +26,8 @@ extern int pnv_pci_get_presence_state(uint64_t id, uint8_t *state); extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state); extern int pnv_pci_set_power_state(uint64_t id, uint8_t state, struct opal_msg *msg); +extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, + uint64_t desc); int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index b77f52ee8263..3369a6f2b2f1 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -313,3 +313,4 @@ OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT); OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START); OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP); +OPAL_CALL(opal_pci_set_p2p,OPAL_PCI_SET_P2P); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index
Re: [PATCH v9 14/14] powerpc: rewrite local_t using soft_irq
On Fri, 04 Aug 2017 11:40:43 +1000 Benjamin Herrenschmidtwrote: > On Fri, 2017-08-04 at 03:50 +1000, Nicholas Piggin wrote: > > Hey, so... why are any of these implemented in asm? We should > > just do them all in C, right? I looked a bit harder at code gen > > and a couple of them are still emitting larx/stcx. > > As long as we can guarantee that the C compiler won't play games > moving stuff around. But yes, I tend to agree. I believe so. I mean we already depend on the same pattern for any other sequence of local_irq_disable(); c code; local_irq_enable(); so we'd have other problems if we couldn't. I can easily believe there have been bugs with the fixed r13 handling in gcc in the past, but it looks like it does the right thing now AFAIKS. Thanks, Nick
Re: [PATCH v9 4/4] irqchip/qeic: remove PPCisms for QEIC
[Please add all the irqchip maintainers when posting irqchip patches...] On 03/08/17 04:38, Zhao Qiang wrote: > QEIC was supported on PowerPC, and dependent on PPC, > Now it is supported on other platforms, so remove PPCisms. > > Signed-off-by: Zhao Qiang> --- > arch/powerpc/platforms/83xx/km83xx.c | 1 - > arch/powerpc/platforms/83xx/misc.c| 1 - > arch/powerpc/platforms/83xx/mpc832x_mds.c | 1 - > arch/powerpc/platforms/83xx/mpc832x_rdb.c | 1 - > arch/powerpc/platforms/83xx/mpc836x_mds.c | 1 - > arch/powerpc/platforms/83xx/mpc836x_rdk.c | 1 - > arch/powerpc/platforms/85xx/corenet_generic.c | 1 - > arch/powerpc/platforms/85xx/mpc85xx_mds.c | 1 - > arch/powerpc/platforms/85xx/mpc85xx_rdb.c | 1 - > arch/powerpc/platforms/85xx/twr_p102x.c | 1 - > drivers/irqchip/irq-qeic.c| 219 > +- > include/soc/fsl/qe/qe_ic.h| 132 > 12 files changed, 111 insertions(+), 250 deletions(-) > delete mode 100644 include/soc/fsl/qe/qe_ic.h > [...] > diff --git a/drivers/irqchip/irq-qeic.c b/drivers/irqchip/irq-qeic.c > index a2d8084..21e3b43 100644 > --- a/drivers/irqchip/irq-qeic.c > +++ b/drivers/irqchip/irq-qeic.c > @@ -18,8 +18,11 @@ > #include > #include > #include > +#include > #include > #include > +#include > +#include > #include > #include > #include > @@ -27,9 +30,8 @@ > #include > #include > #include > -#include > +#include > #include > -#include > > #define NR_QE_IC_INTS64 > > @@ -87,6 +89,43 @@ > #define SIGNAL_HIGH 2 > #define SIGNAL_LOW 0 > > +#define NUM_OF_QE_IC_GROUPS 6 > + > +/* Flags when we init the QE IC */ > +#define QE_IC_SPREADMODE_GRP_W 0x0001 > +#define QE_IC_SPREADMODE_GRP_X 0x0002 > +#define QE_IC_SPREADMODE_GRP_Y 0x0004 > +#define QE_IC_SPREADMODE_GRP_Z 0x0008 > +#define QE_IC_SPREADMODE_GRP_RISCA 0x0010 > +#define QE_IC_SPREADMODE_GRP_RISCB 0x0020 > + > +#define QE_IC_LOW_SIGNAL 0x0100 > +#define QE_IC_HIGH_SIGNAL0x0200 > + > +#define QE_IC_GRP_W_PRI0_DEST_SIGNAL_HIGH0x1000 > +#define QE_IC_GRP_W_PRI1_DEST_SIGNAL_HIGH0x2000 > +#define QE_IC_GRP_X_PRI0_DEST_SIGNAL_HIGH0x4000 > +#define QE_IC_GRP_X_PRI1_DEST_SIGNAL_HIGH0x8000 > +#define QE_IC_GRP_Y_PRI0_DEST_SIGNAL_HIGH0x0001 > +#define QE_IC_GRP_Y_PRI1_DEST_SIGNAL_HIGH0x0002 > +#define QE_IC_GRP_Z_PRI0_DEST_SIGNAL_HIGH0x0004 > +#define QE_IC_GRP_Z_PRI1_DEST_SIGNAL_HIGH0x0008 > +#define QE_IC_GRP_RISCA_PRI0_DEST_SIGNAL_HIGH0x0010 > +#define QE_IC_GRP_RISCA_PRI1_DEST_SIGNAL_HIGH0x0020 > +#define QE_IC_GRP_RISCB_PRI0_DEST_SIGNAL_HIGH0x0040 > +#define QE_IC_GRP_RISCB_PRI1_DEST_SIGNAL_HIGH0x0080 > +#define QE_IC_GRP_W_DEST_SIGNAL_SHIFT(12) > + > +/* QE interrupt sources groups */ > +enum qe_ic_grp_id { > + QE_IC_GRP_W = 0,/* QE interrupt controller group W */ > + QE_IC_GRP_X,/* QE interrupt controller group X */ > + QE_IC_GRP_Y,/* QE interrupt controller group Y */ > + QE_IC_GRP_Z,/* QE interrupt controller group Z */ > + QE_IC_GRP_RISCA,/* QE interrupt controller RISC group A */ > + QE_IC_GRP_RISCB /* QE interrupt controller RISC group B */ > +}; > + > struct qe_ic { > /* Control registers offset */ > u32 __iomem *regs; > @@ -265,15 +304,15 @@ static struct qe_ic_info qe_ic_info[] = { > }, > }; > > -static inline u32 qe_ic_read(volatile __be32 __iomem * base, unsigned int > reg) > +static inline u32 qe_ic_read(__be32 __iomem *base, unsigned int reg) Why are these tagged "inline"? In general, the compiler does a pretty good job at inlining what makes sense to be inlined without having to be told so. > { > - return in_be32(base + (reg >> 2)); > + return ioread32be(base + (reg >> 2)); > } > > -static inline void qe_ic_write(volatile __be32 __iomem * base, unsigned int > reg, > +static inline void qe_ic_write(__be32 __iomem *base, unsigned int reg, > u32 value) > { > - out_be32(base + (reg >> 2), value); > + iowrite32be(value, base + (reg >> 2)); > } > > static inline struct qe_ic *qe_ic_from_irq(unsigned int virq) > @@ -375,8 +414,8 @@ static const struct irq_domain_ops qe_ic_host_ops = { > .xlate = irq_domain_xlate_onetwocell, > }; > > -/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */ > -unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic) > +/* Return an interrupt vector or 0 if no interrupt is pending. */ > +static unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic) > { > int irq;
[PATCH] powernv:idle: Disable LOSE_FULL_CONTEXT states when stop-api fails.
From: "Gautham R. Shenoy"Currently, we use the opal call opal_slw_set_reg() to inform the Sleep-Winkle Engine (SLW) to restore the contents of some of the Hypervisor state on wakeup from deep idle states that lose full hypervisor context (characterized by the flag OPAL_PM_LOSE_FULL_CONTEXT). However, the current code has a bug in that if opal_slw_set_reg() fails, we don't disable the use of these deep states (winkle on POWER8, stop4 onwards on POWER9). This patch fixes this bug by ensuring that if programing the sleep-winkle engine to restore the hypervisor states in pnv_save_sprs_for_deep_states() fails, then we exclude such states by excluding their flags supported_cpuidle_states. Further, we ensure in the initialization of the cpuidle-powernv driver to only include those states whose flags are present in supported_cpuidle_states. Fixes: 1e1601b38e6 ("powerpc/powernv/idle: Restore SPRs for deep idle states via stop API.") Signed-off-by: Gautham R. Shenoy --- arch/powerpc/platforms/powernv/idle.c | 126 +++--- drivers/cpuidle/cpuidle-powernv.c | 9 +++ 2 files changed, 110 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 2abee07..5f4c206 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -184,9 +184,6 @@ static void pnv_alloc_idle_core_states(void) } update_subcore_sibling_mask(); - - if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) - pnv_save_sprs_for_deep_states(); } u32 pnv_get_supported_cpuidle_states(void) @@ -467,8 +464,39 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) return err; } +static void __init pnv_power8_idle_init(struct device_node *np, u32 *flags, + int dt_idle_states) +{ + bool disable_full_context_loss = false; + bool sprs_for_lose_full_context_saved = false; + + int rc = 0, i; + + for (i = 0; i < dt_idle_states; i++) { + if (flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) { + if (sprs_for_lose_full_context_saved) + goto add_flags; + + if (disable_full_context_loss) + continue; + + rc = pnv_save_sprs_for_deep_states(); + + if (unlikely(rc)) { + pr_warn("cpuidle-powernv: Disabling full context loss idle states.\n"); + pr_warn("cpuidle-powernv: Offlined CPUs will be put to shallow idle state.\n"); + disable_full_context_loss = true; + continue; + } + + sprs_for_lose_full_context_saved = true; + } +add_flags: + supported_cpuidle_states |= flags[i]; + } +} /* - * pnv_arch300_idle_init: Initializes the default idle state, first + * pnv_power9_idle_init: Initializes the default idle state, first *deep idle state and deepest idle state on *ISA 3.0 CPUs. * @@ -485,6 +513,9 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, u32 *residency_ns = NULL; u64 max_residency_ns = 0; int rc = 0, i; + bool save_sprs_for_full_context_loss = false; + bool disable_full_context_loss = false; + unsigned long invalid_states_mask = 0; psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL); psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL); @@ -521,35 +552,83 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, } /* +* States that have OPAL_PM_LOSE_FULL_CONTEXT flag set require +* the assistance of the slw engine to restore certain SPRs on +* wakeup from these states. The function to program the slw +* engine via stop-api expects pnv_deep_stop_psscr_val to be +* set before it is called. +* +* Hence, we first set the pnv_deepest_stop_psscr_{val,mask} +* to the value corresponding to deepest state. +*/ + for (i = 0; i < dt_idle_states; i++) { + int err; + + err = validate_psscr_val_mask(_val[i], _mask[i], + flags[i]); + if (err) { + report_invalid_psscr_val(psscr_val[i], err); + set_bit(i, _states_mask); + continue; + } + + if (flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) + save_sprs_for_full_context_loss = true; + + if (max_residency_ns < residency_ns[i]) { + max_residency_ns =