Re: [PATCH v6 1/5] x86/paravirt: introduce ALT_NOT_XEN

2023-12-10 Thread H. Peter Anvin
On December 9, 2023 10:21:34 PM PST, Juergen Gross  wrote:
>Introduce the macro ALT_NOT_XEN as a short form of
>ALT_NOT(X86_FEATURE_XENPV).
>
>Suggested-by: Peter Zijlstra (Intel) 
>Signed-off-by: Juergen Gross 
>---
>V3:
>- split off from next patch
>V5:
>- move patch to the start of the series (Boris Petkov)
>---
> arch/x86/include/asm/paravirt.h   | 42 ---
> arch/x86/include/asm/paravirt_types.h |  3 ++
> 2 files changed, 21 insertions(+), 24 deletions(-)
>
>diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
>index 693c61dbdd9c..aa76ac7c806c 100644
>--- a/arch/x86/include/asm/paravirt.h
>+++ b/arch/x86/include/asm/paravirt.h
>@@ -142,8 +142,7 @@ static inline void write_cr0(unsigned long x)
> static __always_inline unsigned long read_cr2(void)
> {
>   return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
>-  "mov %%cr2, %%rax;",
>-  ALT_NOT(X86_FEATURE_XENPV));
>+  "mov %%cr2, %%rax;", ALT_NOT_XEN);
> }
> 
> static __always_inline void write_cr2(unsigned long x)
>@@ -154,13 +153,12 @@ static __always_inline void write_cr2(unsigned long x)
> static inline unsigned long __read_cr3(void)
> {
>   return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
>-"mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV));
>+"mov %%cr3, %%rax;", ALT_NOT_XEN);
> }
> 
> static inline void write_cr3(unsigned long x)
> {
>-  PVOP_ALT_VCALL1(mmu.write_cr3, x,
>-  "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV));
>+  PVOP_ALT_VCALL1(mmu.write_cr3, x, "mov %%rdi, %%cr3", ALT_NOT_XEN);
> }
> 
> static inline void __write_cr4(unsigned long x)
>@@ -182,7 +180,7 @@ extern noinstr void pv_native_wbinvd(void);
> 
> static __always_inline void wbinvd(void)
> {
>-  PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
>+  PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT_XEN);
> }
> 
> static inline u64 paravirt_read_msr(unsigned msr)
>@@ -390,27 +388,25 @@ static inline void paravirt_release_p4d(unsigned long 
>pfn)
> static inline pte_t __pte(pteval_t val)
> {
>   return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
>-"mov %%rdi, %%rax",
>-ALT_NOT(X86_FEATURE_XENPV)) };
>+"mov %%rdi, %%rax", ALT_NOT_XEN) };
> }
> 
> static inline pteval_t pte_val(pte_t pte)
> {
>   return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
>-  "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
>+  "mov %%rdi, %%rax", ALT_NOT_XEN);
> }
> 
> static inline pgd_t __pgd(pgdval_t val)
> {
>   return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
>-"mov %%rdi, %%rax",
>-ALT_NOT(X86_FEATURE_XENPV)) };
>+"mov %%rdi, %%rax", ALT_NOT_XEN) };
> }
> 
> static inline pgdval_t pgd_val(pgd_t pgd)
> {
>   return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
>-  "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
>+  "mov %%rdi, %%rax", ALT_NOT_XEN);
> }
> 
> #define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
>@@ -444,14 +440,13 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
> static inline pmd_t __pmd(pmdval_t val)
> {
>   return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val,
>-"mov %%rdi, %%rax",
>-ALT_NOT(X86_FEATURE_XENPV)) };
>+"mov %%rdi, %%rax", ALT_NOT_XEN) };
> }
> 
> static inline pmdval_t pmd_val(pmd_t pmd)
> {
>   return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd,
>-  "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
>+  "mov %%rdi, %%rax", ALT_NOT_XEN);
> }
> 
> static inline void set_pud(pud_t *pudp, pud_t pud)
>@@ -464,7 +459,7 @@ static inline pud_t __pud(pudval_t val)
>   pudval_t ret;
> 
>   ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val,
>- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
>+ "mov %%rdi, %%rax", ALT_NOT_XEN);
> 
>   return (pud_t) { ret };
> }
>@@ -472,7 +467,7 @@ static inline pud_t __pud(pudval_t val)
> static inline pudval_t pud_val(pud_t pud)
> {
>   return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud,
>-  "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
>+  "mov %%rdi, %%rax", ALT_NOT_XEN);
> }
> 
> static inline void pud_clear(pud_t *pudp)
>@@ -492,8 +487,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
> static inline p4d_t __p4d(p4dval_t val)
> {
>   p4dval_t ret = 

RE: [PATCH 0/3] x86 disk image and modules initramfs generation

2021-04-20 Thread H. Peter Anvin
Ok... not really sure how this relates to my patch. You are mentioned three 
separate things: modules, headers, and enough of the kernel machinery to build 
out of tree modules. The latter it normally done with a tree that corresponds 
to the state after build + "make clean"; which I *believe* is the same as after 
"make prepare". The former two are "make modules_install" and "make 
headers_install", respectively. Note you can direct them to directory 
hierarchies other than the local system ones for archiving.

It is simply not possible to provide a *general* solution that fits all 
distributions × all boot scenarios.

On April 20, 2021 1:30:07 AM PDT, David Laight  wrote:
>From: H. Peter Anvin
>> Sent: 20 April 2021 00:03
>> 
>> When compiling on a different machine than the runtime target,
>> including but not limited to simulators, it is rather handy to be
>able
>> to produce a bootable image. The scripts for that in x86 are
>> relatively old, and assume a BIOS system.
>
>I've given up and copied the kernel tree onto all my test systems.
>
>I needed something like 'make modules_install' and 'make install'
>that would generated a directory tree that could be copied (scp -r)
>onto the target system.
>
>But the script to run 'update-grub' is all intwined in the commands.
>
>You also don't get a copy of the headers.
>Even for the local system (as root) you just get a symlink into
>the source tree.
>This causes a problem trying to build 'out of tree' modules
>after updating the kernel source tree (but not rebulding).
>
>I can (and do) write 'horrid' makefiles (gmake and nmake)
>but this seemed to need more refactoring that I wanted to do.
>
>   David
>
>-
>Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes,
>MK1 1PT, UK
>Registration No: 1397386 (Wales)

-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.


[PATCH v2 1/3] x86/boot: Modernize genimage script; hdimage support; remove bzlilo

2021-04-19 Thread H. Peter Anvin
From: "H. Peter Anvin (Intel)" 

The image generation scripts in arch/x86/boot are pretty out of date,
except for the isoimage target. Update and clean up the
genimage.sh script, and make it support an arbitrary number of
initramfs files in the image.

Add a "hdimage" target, which can be booted by either BIOS or
EFI (if the kernel is compiled with the EFI stub.) For EFI to be able
to pass the command line to the kernel, we need the EFI shell, but the
firmware builtin EFI shell, if it even exists, is pretty much always
the last resort boot option, so search for OVMF or EDK2 and explicitly
include a copy of the EFI shell.

To make this all work, use bash features in the script.  Furthermore,
this version of the script makes use of some mtools features,
especially mpartition, that might not exist in very old version of
mtools, but given all the other dependencies on this script this
doesn't seem such a big deal.

Signed-off-by: H. Peter Anvin (Intel) 
---
 arch/x86/Makefile|   5 +-
 arch/x86/boot/.gitignore |   1 +
 arch/x86/boot/Makefile   |  44 +++---
 arch/x86/boot/genimage.sh| 294 ++-
 arch/x86/boot/mtools.conf.in |   3 +
 5 files changed, 247 insertions(+), 100 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 9a85eae37b17..943f26a32834 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -247,7 +247,7 @@ drivers-$(CONFIG_FB) += arch/x86/video/
 
 boot := arch/x86/boot
 
-BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 isoimage
+BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 hdimage isoimage
 
 PHONY += bzImage $(BOOT_TARGETS)
 
@@ -305,8 +305,9 @@ define archhelp
   echo  '  fdimage - Create 1.4MB boot floppy image 
(arch/x86/boot/fdimage)'
   echo  '  fdimage144  - Create 1.4MB boot floppy image 
(arch/x86/boot/fdimage)'
   echo  '  fdimage288  - Create 2.8MB boot floppy image 
(arch/x86/boot/fdimage)'
+  echo  '  hdimage - Create a BIOS/EFI hard disk image 
(arch/x86/boot/hdimage)'
   echo  '  isoimage- Create a boot CD-ROM image 
(arch/x86/boot/image.iso)'
-  echo  'bzdisk/fdimage*/isoimage also accept:'
+  echo  'bzdisk/fdimage*/hdimage/isoimage also accept:'
   echo  'FDARGS="..."  arguments for the booted kernel'
   echo  'FDINITRD=file initrd for the booted kernel'
   echo  ''
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index 9cc7f1357b9b..1189be057ebd 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -11,3 +11,4 @@ setup.elf
 fdimage
 mtools.conf
 image.iso
+hdimage
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index fe605205b4ce..dfbc26a8e924 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -29,7 +29,7 @@ KCOV_INSTRUMENT   := n
 SVGA_MODE  := -DSVGA_MODE=NORMAL_VGA
 
 targets:= vmlinux.bin setup.bin setup.elf bzImage
-targets+= fdimage fdimage144 fdimage288 image.iso mtools.conf
+targets+= fdimage fdimage144 fdimage288 image.iso hdimage
 subdir-:= compressed
 
 setup-y+= a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o 
cpucheck.o
@@ -115,47 +115,49 @@ $(obj)/compressed/vmlinux: FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed $@
 
 # Set this if you want to pass append arguments to the
-# bzdisk/fdimage/isoimage kernel
+# bzdisk/fdimage/hdimage/isoimage kernel
 FDARGS =
-# Set this if you want an initrd included with the
-# bzdisk/fdimage/isoimage kernel
+# Set this if you want one or more initrds included in the image
 FDINITRD =
 
-image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,)
+imgdeps = $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh
 
 $(obj)/mtools.conf: $(src)/mtools.conf.in
sed -e 's|@OBJ@|$(obj)|g' < $< > $@
 
+targets += mtools.conf
+
+# genimage.sh requires bash, but it also has a bunch of other
+# external dependencies.
 quiet_cmd_genimage = GENIMAGE $3
-cmd_genimage = sh $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \
-   $(obj)/mtools.conf '$(image_cmdline)' $(FDINITRD)
+cmd_genimage = $(BASH) $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \
+   $(obj)/mtools.conf '$(FDARGS)' $(FDINITRD)
 
-PHONY += bzdisk fdimage fdimage144 fdimage288 isoimage bzlilo install
+PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage install
 
 # This requires write access to /dev/fd0
-bzdisk: $(obj)/bzImage $(obj)/mtools.conf
+# All images require syslinux to be installed; hdimage also requires
+# EDK2/OVMF if the kernel is compiled with the EFI stub.
+bzdisk: $(imgdeps)
$(call cmd,genimage,bzdisk,/dev/fd0)
 
-# These require being root or having syslinux 2.02 or higher installed
-fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf
+fdimage fd

[PATCH v2 3/3] x86/boot: Add option to add modules.img to {fd,hd,iso}image

2021-04-19 Thread H. Peter Anvin
From: "H. Peter Anvin (Intel)" 

Make it easy to generate a disk image which includes the all-modules
initramfs image.

Signed-off-by: H. Peter Anvin (Intel) 
---
 arch/x86/Makefile  |  3 ++-
 arch/x86/boot/Makefile | 20 
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 943f26a32834..74f4e66568d7 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -309,7 +309,8 @@ define archhelp
   echo  '  isoimage- Create a boot CD-ROM image 
(arch/x86/boot/image.iso)'
   echo  'bzdisk/fdimage*/hdimage/isoimage also accept:'
   echo  'FDARGS="..."  arguments for the booted kernel'
-  echo  'FDINITRD=file initrd for the booted kernel'
+  echo  'FDINITRD=file initrd for the booted kernel'
+  echo  'FDMODS=1 to include all modules as an initrd'
   echo  ''
   echo  '  kvm_guest.config- Enable Kconfig items for running this kernel 
as a KVM guest'
   echo  '  xen.config  - Enable Kconfig items for running this kernel 
as a Xen guest'
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index dfbc26a8e924..a4f8c66a63d0 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -116,11 +116,23 @@ $(obj)/compressed/vmlinux: FORCE
 
 # Set this if you want to pass append arguments to the
 # bzdisk/fdimage/hdimage/isoimage kernel
-FDARGS =
+FDARGS   :=
 # Set this if you want one or more initrds included in the image
-FDINITRD =
+FDINITRD :=
+# Set this to 1 if you want usr/modules.img included in the image
+FDMODS   :=
 
-imgdeps = $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh
+imgdeps   := $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh
+fdinitrds := $(FDINITRD)
+
+ifneq ($(FDMODS),)
+imgdeps   += $(objtree)/usr/modules.img
+fdinitrds += $(objtree)/usr/modules.img
+
+$(objtree)/usr/modules.img:
+   $(Q)$(MAKE) -f $(srctree)/Makefile usr/modules.img
+KBUILD_MODULES := 1
+endif
 
 $(obj)/mtools.conf: $(src)/mtools.conf.in
sed -e 's|@OBJ@|$(obj)|g' < $< > $@
@@ -131,7 +143,7 @@ targets += mtools.conf
 # external dependencies.
 quiet_cmd_genimage = GENIMAGE $3
 cmd_genimage = $(BASH) $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \
-   $(obj)/mtools.conf '$(FDARGS)' $(FDINITRD)
+   $(obj)/mtools.conf '$(FDARGS)' $(fdinitrds)
 
 PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage install
 
-- 
2.30.2



[PATCH v2 2/3] usr, modules: Add build target for creating a modules initramfs image

2021-04-19 Thread H. Peter Anvin
From: "H. Peter Anvin (Intel)" 

Some distributions really cannot be booted without modules
anymore. To allow an externally built kernel to be run, it is handy to
be able to create an initramfs image with all the modules, that can
appended to an existing initramfs image (preferrably without any
modules, but not necessarily, since the /lib/modules path includes the
kernel version.)

This image is generated in usr/modules.img.

Since this image may end up being rather large, change the default for
CONFIG_INITRAMFS_COMPRESSION_*. We already have a special case in the
Makefile to not re-compress an existing initramfs image; add the very
small default builtin initramfs as another case to suppress the
compression. To avoid losing the default compression setting, add a
separate variable for the possibly overridden builtin initramfs
compression method.

Signed-off-by: H. Peter Anvin (Intel) 
---
 Makefile   | 17 -
 usr/.gitignore |  3 +++
 usr/Kconfig| 31 +--
 usr/Makefile   | 39 +++
 4 files changed, 67 insertions(+), 23 deletions(-)

diff --git a/Makefile b/Makefile
index 4730cf156f6b..89e40760384d 100644
--- a/Makefile
+++ b/Makefile
@@ -622,7 +622,7 @@ KBUILD_MODULES :=
 KBUILD_BUILTIN := 1
 
 # If we have only "make modules", don't compile built-in objects.
-ifeq ($(MAKECMDGOALS),modules)
+ifeq (modules,$(MAKECMDGOALS))
   KBUILD_BUILTIN :=
 endif
 
@@ -630,7 +630,8 @@ endif
 # in addition to whatever we do anyway.
 # Just "make" or "make all" shall build modules as well
 
-ifneq ($(filter all modules nsdeps %compile_commands.json 
clang-%,$(MAKECMDGOALS)),)
+ifneq ($(filter all modules nsdeps %compile_commands.json clang-% \
+   modules.img usr/modules.img,$(MAKECMDGOALS)),)
   KBUILD_MODULES := 1
 endif
 
@@ -1461,15 +1462,17 @@ modules_prepare: prepare
 PHONY += modules_install
 modules_install: _modinst_ _modinst_post
 
+INSTALL_MOD_LN ?= ln -s
+
 PHONY += _modinst_
 _modinst_:
@rm -rf $(MODLIB)/kernel
@rm -f $(MODLIB)/source
@mkdir -p $(MODLIB)/kernel
-   @ln -s $(abspath $(srctree)) $(MODLIB)/source
+   @$(INSTALL_MOD_LN) $(abspath $(srctree)) $(MODLIB)/source
@if [ ! $(objtree) -ef  $(MODLIB)/build ]; then \
-   rm -f $(MODLIB)/build ; \
-   ln -s $(CURDIR) $(MODLIB)/build ; \
+   rm -rf $(MODLIB)/build ; \
+   $(INSTALL_MOD_LN) $(CURDIR) $(MODLIB)/build ; \
fi
@sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order
@cp -f modules.builtin $(MODLIB)/
@@ -1489,6 +1492,10 @@ modules_sign:
$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modsign
 endif
 
+# initramfs image containing all modules
+modules.img usr/modules.img: modules
+   $(Q)$(MAKE) $(build)=usr usr/modules.img
+
 else # CONFIG_MODULES
 
 # Modules not configured
diff --git a/usr/.gitignore b/usr/.gitignore
index 935442ed1eb2..14e2fae88d8a 100644
--- a/usr/.gitignore
+++ b/usr/.gitignore
@@ -2,3 +2,6 @@
 gen_init_cpio
 initramfs_data.cpio
 /initramfs_inc_data
+/modules
+modules.cpio
+modules.img
diff --git a/usr/Kconfig b/usr/Kconfig
index 8bbcf699fe3b..625372682b5e 100644
--- a/usr/Kconfig
+++ b/usr/Kconfig
@@ -109,18 +109,23 @@ config RD_ZSTD
  If unsure, say N.
 
 choice
-   prompt "Built-in initramfs compression mode"
-   depends on INITRAMFS_SOURCE != ""
-   help
- This option allows you to decide by which algorithm the builtin
- initramfs will be compressed.  Several compression algorithms are
- available, which differ in efficiency, compression and
- decompression speed.  Compression speed is only relevant
- when building a kernel.  Decompression speed is relevant at
- each boot. Also the memory usage during decompression may become
- relevant on memory constrained systems. This is usually based on the
- dictionary size of the algorithm with algorithms like XZ and LZMA
- featuring large dictionary sizes.
+   prompt "Preferred initramfs compression mode"
+   default INITRAMFS_COMPRESSION_XZ   if RD_XZ
+   default INITRAMFS_COMPRESSION_GZIP if RD_GZIP
+   default INITRAMFS_COMPRESSION_LZO  if RD_LZO
+   default INITRAMFS_COMPRESSION_LZ4  if RD_LZ4
+   default INITRAMFS_COMPRESSION_NONE
+   help
+ This option allows you to decide by which algorithm the
+ builtin initramfs and usr/modules.img will be compressed.
+ Several compression algorithms are available, which differ
+ in efficiency, compression and decompression speed.
+ Compression speed is only relevant when building a kernel.
+ Decompression speed is relevant at each boot. Also the
+ memory usage during decompression may become relevant on
+ memory constrained systems. This is usually based on the
+ di

[PATCH 0/3] x86 disk image and modules initramfs generation

2021-04-19 Thread H. Peter Anvin
From: "H. Peter Anvin" (Intel) 

When compiling on a different machine than the runtime target,
including but not limited to simulators, it is rather handy to be able
to produce a bootable image. The scripts for that in x86 are
relatively old, and assume a BIOS system.

This adds a build target to generate a hdimage which can be booted
either from BIOS or EFI, and modernizes the genimage.sh script
including adding the ability to add an arbitrary number of initramfs
files (limited only by the length of the command line.)

Possibly more controversial, at least from a Kbuild design perspective
(as usual I'm the guy who wants to do something with Kbuild which it
seems it was never really designed to do), is add the ability to
create an initramfs image which includes all the built modules. Some
distributions cannot be easily booted without modules in initramfs,
and this creates an image which can be added to initramfs to provide
the kernel modules, as finalized by "make modules_install".

The final patch put these two together, and allows the modules
initramfs to be included in the x86 boot image.

---

Fixes in v2:

1/3: correct command line generation for multiple initrds
2/3: no change
3/3: append, don't overwrite FDINITRD when adding modules.img

 Makefile |  17 ++-
 arch/x86/Makefile|   8 +-
 arch/x86/boot/.gitignore |   1 +
 arch/x86/boot/Makefile   |  62 +
 arch/x86/boot/genimage.sh| 294 +++
 arch/x86/boot/mtools.conf.in |   3 +
 usr/.gitignore   |   3 +
 usr/Kconfig  |  31 ++---
 usr/Makefile |  39 +-
 9 files changed, 331 insertions(+), 127 deletions(-)


[PATCH 2/3] usr, modules: Add build target for creating a modules initramfs image

2021-04-19 Thread H. Peter Anvin
From: "H. Peter Anvin" (Intel) 

Some distributions really cannot be booted without modules
anymore. To allow an externally built kernel to be run, it is handy to
be able to create an initramfs image with all the modules, that can
appended to an existing initramfs image (preferrably without any
modules, but not necessarily, since the /lib/modules path includes the
kernel version.)

This image is generated in usr/modules.img.

Since this image may end up being rather large, change the default for
CONFIG_INITRAMFS_COMPRESSION_*. We already have a special case in the
Makefile to not re-compress an existing initramfs image; add the very
small default builtin initramfs as another case to suppress the
compression. To avoid losing the default compression setting, add a
separate variable for the possibly overridden builtin initramfs
compression method.

Signed-off-by: H. Peter Anvin (Intel) 
---
 Makefile   | 17 -
 usr/.gitignore |  3 +++
 usr/Kconfig| 31 +--
 usr/Makefile   | 39 +++
 4 files changed, 67 insertions(+), 23 deletions(-)

diff --git a/Makefile b/Makefile
index 4730cf156f6b..89e40760384d 100644
--- a/Makefile
+++ b/Makefile
@@ -622,7 +622,7 @@ KBUILD_MODULES :=
 KBUILD_BUILTIN := 1
 
 # If we have only "make modules", don't compile built-in objects.
-ifeq ($(MAKECMDGOALS),modules)
+ifeq (modules,$(MAKECMDGOALS))
   KBUILD_BUILTIN :=
 endif
 
@@ -630,7 +630,8 @@ endif
 # in addition to whatever we do anyway.
 # Just "make" or "make all" shall build modules as well
 
-ifneq ($(filter all modules nsdeps %compile_commands.json 
clang-%,$(MAKECMDGOALS)),)
+ifneq ($(filter all modules nsdeps %compile_commands.json clang-% \
+   modules.img usr/modules.img,$(MAKECMDGOALS)),)
   KBUILD_MODULES := 1
 endif
 
@@ -1461,15 +1462,17 @@ modules_prepare: prepare
 PHONY += modules_install
 modules_install: _modinst_ _modinst_post
 
+INSTALL_MOD_LN ?= ln -s
+
 PHONY += _modinst_
 _modinst_:
@rm -rf $(MODLIB)/kernel
@rm -f $(MODLIB)/source
@mkdir -p $(MODLIB)/kernel
-   @ln -s $(abspath $(srctree)) $(MODLIB)/source
+   @$(INSTALL_MOD_LN) $(abspath $(srctree)) $(MODLIB)/source
@if [ ! $(objtree) -ef  $(MODLIB)/build ]; then \
-   rm -f $(MODLIB)/build ; \
-   ln -s $(CURDIR) $(MODLIB)/build ; \
+   rm -rf $(MODLIB)/build ; \
+   $(INSTALL_MOD_LN) $(CURDIR) $(MODLIB)/build ; \
fi
@sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order
@cp -f modules.builtin $(MODLIB)/
@@ -1489,6 +1492,10 @@ modules_sign:
$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modsign
 endif
 
+# initramfs image containing all modules
+modules.img usr/modules.img: modules
+   $(Q)$(MAKE) $(build)=usr usr/modules.img
+
 else # CONFIG_MODULES
 
 # Modules not configured
diff --git a/usr/.gitignore b/usr/.gitignore
index 935442ed1eb2..14e2fae88d8a 100644
--- a/usr/.gitignore
+++ b/usr/.gitignore
@@ -2,3 +2,6 @@
 gen_init_cpio
 initramfs_data.cpio
 /initramfs_inc_data
+/modules
+modules.cpio
+modules.img
diff --git a/usr/Kconfig b/usr/Kconfig
index 8bbcf699fe3b..625372682b5e 100644
--- a/usr/Kconfig
+++ b/usr/Kconfig
@@ -109,18 +109,23 @@ config RD_ZSTD
  If unsure, say N.
 
 choice
-   prompt "Built-in initramfs compression mode"
-   depends on INITRAMFS_SOURCE != ""
-   help
- This option allows you to decide by which algorithm the builtin
- initramfs will be compressed.  Several compression algorithms are
- available, which differ in efficiency, compression and
- decompression speed.  Compression speed is only relevant
- when building a kernel.  Decompression speed is relevant at
- each boot. Also the memory usage during decompression may become
- relevant on memory constrained systems. This is usually based on the
- dictionary size of the algorithm with algorithms like XZ and LZMA
- featuring large dictionary sizes.
+   prompt "Preferred initramfs compression mode"
+   default INITRAMFS_COMPRESSION_XZ   if RD_XZ
+   default INITRAMFS_COMPRESSION_GZIP if RD_GZIP
+   default INITRAMFS_COMPRESSION_LZO  if RD_LZO
+   default INITRAMFS_COMPRESSION_LZ4  if RD_LZ4
+   default INITRAMFS_COMPRESSION_NONE
+   help
+ This option allows you to decide by which algorithm the
+ builtin initramfs and usr/modules.img will be compressed.
+ Several compression algorithms are available, which differ
+ in efficiency, compression and decompression speed.
+ Compression speed is only relevant when building a kernel.
+ Decompression speed is relevant at each boot. Also the
+ memory usage during decompression may become relevant on
+ memory constrained systems. This is usually based on the
+ di

[PATCH 1/3] x86/boot: Modernize genimage script; hdimage support; remove bzlilo

2021-04-19 Thread H. Peter Anvin
From: "H. Peter Anvin" (Intel) 

The image generation scripts in arch/x86/boot are pretty out of date,
except for the isoimage target. Update and clean up the
genimage.sh script, and make it support an arbitrary number of
initramfs files in the image.

Add a "hdimage" target, which can be booted by either BIOS or
EFI (if the kernel is compiled with the EFI stub.) For EFI to be able
to pass the command line to the kernel, we need the EFI shell, but the
firmware builtin EFI shell, if it even exists, is pretty much always
the last resort boot option, so search for OVMF or EDK2 and explicitly
include a copy of the EFI shell.

To make this all work, use bash features in the script.  Furthermore,
this version of the script makes use of some mtools features,
especially mpartition, that might not exist in very old version of
mtools, but given all the other dependencies on this script this
doesn't seem such a big deal.

Signed-off-by: H. Peter Anvin (Intel) 
---
 arch/x86/Makefile|   5 +-
 arch/x86/boot/.gitignore |   1 +
 arch/x86/boot/Makefile   |  44 +++---
 arch/x86/boot/genimage.sh| 284 +--
 arch/x86/boot/mtools.conf.in |   3 +
 5 files changed, 238 insertions(+), 99 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 9a85eae37b17..943f26a32834 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -247,7 +247,7 @@ drivers-$(CONFIG_FB) += arch/x86/video/
 
 boot := arch/x86/boot
 
-BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 isoimage
+BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 hdimage isoimage
 
 PHONY += bzImage $(BOOT_TARGETS)
 
@@ -305,8 +305,9 @@ define archhelp
   echo  '  fdimage - Create 1.4MB boot floppy image 
(arch/x86/boot/fdimage)'
   echo  '  fdimage144  - Create 1.4MB boot floppy image 
(arch/x86/boot/fdimage)'
   echo  '  fdimage288  - Create 2.8MB boot floppy image 
(arch/x86/boot/fdimage)'
+  echo  '  hdimage - Create a BIOS/EFI hard disk image 
(arch/x86/boot/hdimage)'
   echo  '  isoimage- Create a boot CD-ROM image 
(arch/x86/boot/image.iso)'
-  echo  'bzdisk/fdimage*/isoimage also accept:'
+  echo  'bzdisk/fdimage*/hdimage/isoimage also accept:'
   echo  'FDARGS="..."  arguments for the booted kernel'
   echo  'FDINITRD=file initrd for the booted kernel'
   echo  ''
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index 9cc7f1357b9b..1189be057ebd 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -11,3 +11,4 @@ setup.elf
 fdimage
 mtools.conf
 image.iso
+hdimage
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index fe605205b4ce..dfbc26a8e924 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -29,7 +29,7 @@ KCOV_INSTRUMENT   := n
 SVGA_MODE  := -DSVGA_MODE=NORMAL_VGA
 
 targets:= vmlinux.bin setup.bin setup.elf bzImage
-targets+= fdimage fdimage144 fdimage288 image.iso mtools.conf
+targets+= fdimage fdimage144 fdimage288 image.iso hdimage
 subdir-:= compressed
 
 setup-y+= a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o 
cpucheck.o
@@ -115,47 +115,49 @@ $(obj)/compressed/vmlinux: FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed $@
 
 # Set this if you want to pass append arguments to the
-# bzdisk/fdimage/isoimage kernel
+# bzdisk/fdimage/hdimage/isoimage kernel
 FDARGS =
-# Set this if you want an initrd included with the
-# bzdisk/fdimage/isoimage kernel
+# Set this if you want one or more initrds included in the image
 FDINITRD =
 
-image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,)
+imgdeps = $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh
 
 $(obj)/mtools.conf: $(src)/mtools.conf.in
sed -e 's|@OBJ@|$(obj)|g' < $< > $@
 
+targets += mtools.conf
+
+# genimage.sh requires bash, but it also has a bunch of other
+# external dependencies.
 quiet_cmd_genimage = GENIMAGE $3
-cmd_genimage = sh $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \
-   $(obj)/mtools.conf '$(image_cmdline)' $(FDINITRD)
+cmd_genimage = $(BASH) $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \
+   $(obj)/mtools.conf '$(FDARGS)' $(FDINITRD)
 
-PHONY += bzdisk fdimage fdimage144 fdimage288 isoimage bzlilo install
+PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage install
 
 # This requires write access to /dev/fd0
-bzdisk: $(obj)/bzImage $(obj)/mtools.conf
+# All images require syslinux to be installed; hdimage also requires
+# EDK2/OVMF if the kernel is compiled with the EFI stub.
+bzdisk: $(imgdeps)
$(call cmd,genimage,bzdisk,/dev/fd0)
 
-# These require being root or having syslinux 2.02 or higher installed
-fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf
+fdimage fd

[PATCH 0/3] x86 disk image and modules initramfs generation

2021-04-19 Thread H. Peter Anvin
From: "H. Peter Anvin" (Intel) 

When compiling on a different machine than the runtime target,
including but not limited to simulators, it is rather handy to be able
to produce a bootable image. The scripts for that in x86 are
relatively old, and assume a BIOS system.

This adds a build target to generate a hdimage which can be booted
either from BIOS or EFI, and modernizes the genimage.sh script
including adding the ability to add an arbitrary number of initramfs
files (limited only by the length of the command line.)

Possibly more controversial, at least from a Kbuild design perspective
(as usual I'm the guy who wants to do something with Kbuild which it
seems it was never really designed to do), is add the ability to
create an initramfs image which includes all the built modules. Some
distributions cannot be easily booted without modules in initramfs,
and this creates an image which can be added to initramfs to provide
the kernel modules, as finalized by "make modules_install".

The final patch put these two together, and allows the modules
initramfs to be included in the x86 boot image.

 Makefile |  17 ++-
 arch/x86/Makefile|   8 +-
 arch/x86/boot/.gitignore |   1 +
 arch/x86/boot/Makefile   |  55 +
 arch/x86/boot/genimage.sh| 284 +++
 arch/x86/boot/mtools.conf.in |   3 +
 usr/.gitignore   |   3 +
 usr/Kconfig  |  31 ++---
 usr/Makefile |  39 +-
 9 files changed, 318 insertions(+), 123 deletions(-)


[PATCH 3/3] x86/boot: Add option to add modules.img to {fd,hd,iso}image

2021-04-19 Thread H. Peter Anvin
From: "H. Peter Anvin" (Intel) 

Make it easy to generate a disk image which includes the all-modules
initramfs image.

Signed-off-by: H. Peter Anvin (Intel) 
---
 arch/x86/Makefile  |  3 ++-
 arch/x86/boot/Makefile | 11 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 943f26a32834..74f4e66568d7 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -309,7 +309,8 @@ define archhelp
   echo  '  isoimage- Create a boot CD-ROM image 
(arch/x86/boot/image.iso)'
   echo  'bzdisk/fdimage*/hdimage/isoimage also accept:'
   echo  'FDARGS="..."  arguments for the booted kernel'
-  echo  'FDINITRD=file initrd for the booted kernel'
+  echo  'FDINITRD=file initrd for the booted kernel'
+  echo  'FDMODS=1 to include all modules as an initrd'
   echo  ''
   echo  '  kvm_guest.config- Enable Kconfig items for running this kernel 
as a KVM guest'
   echo  '  xen.config  - Enable Kconfig items for running this kernel 
as a Xen guest'
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index dfbc26a8e924..601ade7adc70 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -119,9 +119,20 @@ $(obj)/compressed/vmlinux: FORCE
 FDARGS =
 # Set this if you want one or more initrds included in the image
 FDINITRD =
+# Set this to 1 if you want usr/modules.img included in the image
+FDMODS =
 
 imgdeps = $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh
 
+ifneq ($(FDMODS),)
+imgdeps  += $(objtree)/usr/modules.img
+FDINITRD += $(objtree)/usr/modules.img
+
+$(objtree)/usr/modules.img:
+   $(Q)$(MAKE) -f $(srctree)/Makefile usr/modules.img
+KBUILD_MODULES := 1
+endif
+
 $(obj)/mtools.conf: $(src)/mtools.conf.in
sed -e 's|@OBJ@|$(obj)|g' < $< > $@
 
-- 
2.30.2



Re: [RFC PATCH 1/2] Documentation/admin-guide: README & svga: remove use of "rdev"

2020-08-31 Thread H. Peter Anvin
On 2020-08-31 22:38, Randy Dunlap wrote:
>  
> --- linux-next-20200828.orig/Documentation/admin-guide/svga.rst
> +++ linux-next-20200828/Documentation/admin-guide/svga.rst
> @@ -12,7 +12,7 @@ Intro
>  This small document describes the "Video Mode Selection" feature which
>  allows the use of various special video modes supported by the video BIOS. 
> Due
>  to usage of the BIOS, the selection is limited to boot time (before the
> -kernel decompression starts) and works only on 80X86 machines.
> +kernel decompression starts) and works only on 32-bit 80X86 machines.
>  

Incorrect. What controls if this is available is whether or not the kernel is
booted through BIOS firmware (as opposed to UEFI, kexec, etc.)

-hpa



Re: [PATCH] Documentation/x86/boot.rst: minor improvement

2020-08-31 Thread H. Peter Anvin
If you are going to fix the language...

On 2020-08-31 22:25, Cao jin wrote:
> Sorry, I mis-copied 2 addresses. make sure they are CCed.
> 
> On 9/1/20 11:41 AM, Cao jin wrote:
>> Typo fix & file name update
>>
>> Signed-off-by: Cao jin 
>> ---
>>  Documentation/x86/boot.rst | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst
>> index 7fafc7ac00d7..c04afec90486 100644
>> --- a/Documentation/x86/boot.rst
>> +++ b/Documentation/x86/boot.rst
>> @@ -1379,7 +1379,7 @@ can be calculated as follows::
>>  In addition to read/modify/write the setup header of the struct
>>  boot_params as that of 16-bit boot protocol, the boot loader should
>>  also fill the additional fields of the struct boot_params as described
>> -in zero-page.txt.
>> +in zero-page.rst.
>>  
>>  After setting up the struct boot_params, the boot loader can load
>>  64-bit kernel in the same way as that of 16-bit boot protocol, but
>> @@ -1391,7 +1391,7 @@ In 64-bit boot protocol, the kernel is started by 
>> jumping to the
>>  
>>  At entry, the CPU must be in 64-bit mode with paging enabled.

(Paging enabled is redundant here.)

>>  The range with setup_header.init_size from start address of loaded
>> -kernel and zero page and command line buffer get ident mapping;
>> +kernel and zero page and command line buffer get identity mapping;

The range with setup_header.init_size from start address of the loaded kernel,
the zero page, and the command line buffer get identity-mapped, anda GDT must
be loaded with the descriptors for selectors __BOOT_CS(0x10) and
__BOOT_DS(0x18): both descriptors must be 4G flat segment with __BOOT_CS
having execute/read
permission and __BOOT_DS...

Also, it might be useful to take a look to see if other data structures, like
setup_data and the initrd also need to be in the identity map.

-hpa



Re: [PATCH] [v2] Documentation: clarify driver licensing rules

2020-08-31 Thread H. Peter Anvin
On 2020-08-14 07:56, Dave Hansen wrote:
> 
> From: Dave Hansen 
> 
> Greg has challenged some recent driver submitters on their license
> choices. He was correct to do so, as the choices in these instances
> did not always advance the aims of the submitters.
> 
> But, this left submitters (and the folks who help them pick licenses)
> a bit confused. They have read things like
> Documentation/process/license-rules.rst which says:
> 
>   individual source files can have a different license
>   which is required to be compatible with the GPL-2.0
> 
> and Documentation/process/submitting-drivers.rst:
> 
>   We don't insist on any kind of exclusive GPL licensing,
>   and if you wish ... you may well wish to release under
>   multiple licenses.
> 
> As written, these appear a _bit_ more laissez faire than we've been in
> practice lately. It sounds like we at least expect submitters to make
> a well-reasoned license choice and to explain their rationale. It does
> not appear that we blindly accept anything that is simply
> GPLv2-compatible.
> 
> Drivers appear to be the most acute source of misunderstanding, so fix
> the driver documentation first. Update it to clarify expectations.
> 

Well written! Retroactive Ack from me :)

-hpa



Re: [PATCH v2] x86: Use xorl %0,%0 in __get_user_asm

2020-08-27 Thread H. Peter Anvin
On 2020-08-27 11:09, Uros Bizjak wrote:
> xorl %0,%0 is equivalent to xorq %0,%0 as both will zero the
> entire register.  Use xorl %0,%0 for all operand sizes to avoid
> REX prefix byte when legacy registers are used and to avoid size
> prefix byte when 16bit registers are used.
> 
> Zeroing the full register is OK in this use case.  xorl %0,%0 also
> breaks register dependency chains, avoiding potential partial
> register stalls with 8 and 16bit operands.
> 
> The patch lowers the size of .fixup section by 20 bytes.
> 
> Changes since v1:
> - Rewrite commit message.
> 
> Signed-off-by: Uros Bizjak 
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar 
> Cc: Borislav Petkov 
> Cc: "H. Peter Anvin" 

Reviewed-by: H. Peter Anvin (Intel) 


Re: [PATCH] x86: Use XORL r32,r32 in __get_user_asm

2020-08-27 Thread H. Peter Anvin
On 2020-08-27 09:49, Uros Bizjak wrote:
> Use XORL r32,r32 for all operand sizes. x86_64 zero extends 32bit
> operations, so for 64bit operands, XORL r32,r32 is functionally
> equal to XORL r64,r64, but avoids a REX prefix byte when legacy
> registers are used.

Please make it clear that this refers specifically to the use case of both
registers being the same, for zeroing. This could otherwise be misread.

"xorl r64,r64" is nonsensical: you're referring to xorq.

The apparent visual mix here between Intel and gas syntax is also confusing.

I would explicily say, using gcc inline syntax:

xorl %0,%0 is equivalent to xorq %0,%0 as both will zero the entire register.


> 32bit operation also avoids 0x66 size prefix for 16bit operands
> and REX prefix when %sil, %dil and %bpl 8bit registers are used.
> 
> As a bonus, 32bit XORL breaks register dependency chains, avoiding
> potential partial register stalls with 8 and 16bit operands.

Please make it clear that zeroing the full register is OK in this use case
(which it is.)

> The patch lowers the size of .fixup section by 20 bytes.

-hpa




Re: [REGRESSION] x86/cpu fsgsbase breaks TLS in 32 bit rr tracees on a 64 bit system

2020-08-24 Thread H. Peter Anvin
On 2020-08-24 14:10, Andy Lutomirski wrote:
> 
> PTRACE_READ_SEGMENT_DESCRIPTOR to read a segment descriptor.
> 
> PTRACE_SET_FS / PTRACE_SET_GS: Sets FS or GS and updates the base accordingly.
> 
> PTRACE_READ_SEGMENT_BASE: pass in a segment selector, get a base out.
> You would use this to populate the base fields.
> 
> or perhaps a ptrace SETREGS variant that tries to preserve the old
> base semantics and magically sets the bases to match the selectors if
> the selectors are nonzero.
> 
> Do any of these choices sound preferable to any of you?
> 

My suggestion would be to export the GDT and LDT as a (readonly or mostly
readonly) regset(s) rather than adding entirely new operations. We could allow
the LDT and the per-thread GDT entries to be written, subject to the same
limitations as the corresponding system calls.

-hpa



Re: [PATCH 0/4] -ffreestanding/-fno-builtin-* patches

2020-08-20 Thread H. Peter Anvin
On 2020-08-18 13:58, Nick Desaulniers wrote:
> On Tue, Aug 18, 2020 at 1:27 PM Nick Desaulniers
>  wrote:
>>
>> On Tue, Aug 18, 2020 at 1:24 PM Arvind Sankar  wrote:
>>>
>>> On Tue, Aug 18, 2020 at 12:13:22PM -0700, Linus Torvalds wrote:
>>>> On Tue, Aug 18, 2020 at 12:03 PM H. Peter Anvin  wrote:
>>>>>
>>>>> I'm not saying "change the semantics", nor am I saying that playing
>>>>> whack-a-mole *for a limited time* is unreasonable. But I would like to go 
>>>>> back
>>>>> to the compiler authors and get them to implement such a #pragma: "this
>>>>> freestanding implementation *does* support *this specific library 
>>>>> function*,
>>>>> and you are free to call it."
>>>>
>>>> I'd much rather just see the library functions as builtins that always
>>>> do the right thing (with the fallback being "just call the standard
>>>> function").
>>>>
>>>> IOW, there's nothing wrong with -ffreestanding if you then also have
>>>> __builtin_memcpy() etc, and they do the sane compiler optimizations
>>>> for memcpy().
>>>>
>>>> What we want to avoid is the compiler making *assumptions* based on
>>>> standard names, because we may implement some of those things
>>>> differently.
>>>>
>>>
>>> -ffreestanding as it stands today does have __builtin_memcpy and
>>> friends. But you need to then use #define memcpy __builtin_memcpy etc,
>>> which is messy and also doesn't fully express what you want. #pragma, or
>>> even just allowing -fbuiltin-foo options would be useful.
> 
> I do really like the idea of -fbuiltin-foo.  For example, you'd specify:
> 
> -ffreestanding -fbuiltin-bcmp
> 
> as an example. `-ffreestanding` would opt you out of ALL libcall
> optimizations, `-fbuiltin-bcmp` would then opt you back in to
> transforms that produce bcmp.  That way you're informing the compiler
> more precisely about the environment you'd be targeting.  It feels
> symmetric to existing `-fno-` flags (clang makes -f vs -fno- pretty
> easy when there is such symmetry).  And it's already convention that
> if you specify multiple conflicting compiler flags, then the latter
> one specified "wins."  In that sense, turning back on specific
> libcalls after disabling the rest looks more ergonomic to me.
> 
> Maybe Eli or David have thoughts on why that may or may not be as
> ergonomic or possible to implement as I imagine?
> 

I would prefer this to be a #pragma for a header file, rather than
having a very long command line for everything...

-hpa



Re: [PATCH 0/4] -ffreestanding/-fno-builtin-* patches

2020-08-18 Thread H. Peter Anvin
On 2020-08-18 10:56, Nick Desaulniers wrote:
>>
>> The problem here is twofold:
>>
>> 1. The user would be expected to know what kind of the optimizations the
>> compiler can do on what function, which is private knowledge to the
>> compiler.
>>
>> 2. The only way to override -fno-builtin is by a header file with macros
>> overriding the function names with __builtin, but that doesn't tell the
>> compiler proper anything about the execution environment.
>>
>> So the Right Thing is for the compiler authors to change the way
>> -ffreestanding works.
> 
> Sir, this is an Arby's
> 
> There are things all across the compilation landscape that make we
> want to pontificate or even throw a tantrum in an Arby's.  Would I?
> Well, no, I'm just trying to flip burgers or shovel the elephant
> sh...or w/e they do at Arby's (I've never actually been; I detest
> roast beef).
> 
> Would it be interesting to have a way of opting in, as you describe,
> such that your compiler knew exactly what kind of embedded environment
> it was targeting?  Maybe, but I'd argue that opting out is just the
> other side of the same coin. Heads, I win; tails, you lose. That the
> opt in or opt out list is shorter for a given project is not
> particularly interesting.  Should we change the semantics of a fairly
> commonly used compiler flag that multiple toolchains are in agreement
> of, then fix all of the breakage in all of the code that relied on
> those semantics?  I'm afraid that ship may have already
> sailed...probably 20 or 30 years ago.
> 
>> -ffreestanding means, by definition, that there
>> are no library calls (other than libgcc or whatever else is supplied
>> with the compiler) that the compiler can call. That is currently an
>> all-or-nothing choice, or at least one choice per C standard implemented.
> 
> Yes?
> 

I'm not saying "change the semantics", nor am I saying that playing
whack-a-mole *for a limited time* is unreasonable. But I would like to go back
to the compiler authors and get them to implement such a #pragma: "this
freestanding implementation *does* support *this specific library function*,
and you are free to call it." The only way we can get what we really need from
the compilers is by speaking up and requesting it, and we have done so very
successfully recently; further back we tended to get a lot of
language-lawyering, but these days both the gcc and the clang teams have been
wonderfully responsive.

-hpa



Re: [PATCH 0/4] -ffreestanding/-fno-builtin-* patches

2020-08-17 Thread H. Peter Anvin
On 2020-08-17 15:02, Nick Desaulniers wrote:
> -ffreestanding typically inhibits "libcall optimizations" where calls to
> certain library functions can be replaced by the compiler in certain
> cases to calls to other library functions that may be more efficient.
> This can be problematic for embedded targets that don't provide full
> libc implementations.
> 
> -ffreestanding inhibits all such optimizations, which is the safe
> choice, but generally we want the optimizations that are performed. The
> Linux kernel does implement a fair amount of libc routines. Instead of
> -ffreestanding (which makes more sense in smaller images like kexec's
> purgatory image), prefer -fno-builtin-* flags to disable the compiler
> from emitting calls to functions which may not be defined.
> 
> If you see a linkage failure due to a missing symbol that's typically
> defined in a libc, and not explicitly called from the source code, then
> the compiler may have done such a transform.  You can either implement
> such a function (ie. in lib/string.c) or disable the transform outright
> via -fno-builtin-* flag (where * is the name of the library routine, ie.
> -fno-builtin-bcmp).
> 

This is arguably exactly the wrong way around.

The way this *should* be done is by opt-in, not opt-out, which by almost
definition ends up being a game of whack-a-mole, like in this case
stpcpy(). Furthermore, it is unlikely that people will remember what
options to flip when and if stpcpy() happens to be implemented in the
kernel.

The problem here is twofold:

1. The user would be expected to know what kind of the optimizations the
compiler can do on what function, which is private knowledge to the
compiler.

2. The only way to override -fno-builtin is by a header file with macros
overriding the function names with __builtin, but that doesn't tell the
compiler proper anything about the execution environment.

So the Right Thing is for the compiler authors to change the way
-ffreestanding works.  -ffreestanding means, by definition, that there
are no library calls (other than libgcc or whatever else is supplied
with the compiler) that the compiler can call. That is currently an
all-or-nothing choice, or at least one choice per C standard implemented.

Instead, a compile job with -ffreestanding should be able to provide a
list of standard C functions that the compiler may call, and thus the
compiler actually can do the right thing depending on which exact
functions it would consider calling. This list is probably most easily
supplied in the form of a header file with #pragma directives.

-hpa




Re: [PATCH 1/4] Makefile: add -fno-builtin-stpcpy

2020-08-17 Thread H. Peter Anvin
On 2020-08-17 15:02, Nick Desaulniers wrote:
> LLVM implemented a recent "libcall optimization" that lowers calls to
> `sprintf(dest, "%s", str)` where the return value is used to
> `stpcpy(dest, str) - dest`. This generally avoids the machinery involved
> in parsing format strings. This optimization was introduced into
> clang-12. Because the kernel does not provide an implementation of
> stpcpy, we observe linkage failures for almost all targets when building
> with ToT clang.
> 
> The interface is unsafe as it does not perform any bounds checking.
> Disable this "libcall optimization" via `-fno-builtin-stpcpy`.
> 
> Unlike
> commit 5f074f3e192f ("lib/string.c: implement a basic bcmp")
> which cited failures with `-fno-builtin-*` flags being retained in LLVM
> LTO, that bug seems to have been fixed by
> https://reviews.llvm.org/D71193, so the above sha can now be reverted in
> favor of `-fno-builtin-bcmp`.
> 

stpcpy() and (to a lesser degree) mempcpy() are fairly useful routines
in general. Perhaps we *should* provide them?

-hpa



Re: [PATCH] decompress_bunzip2: fix sizeof type in start_bunzip

2020-07-12 Thread H. Peter Anvin
On 2020-07-12 05:59, t...@redhat.com wrote:
> From: Tom Rix 
> 
> clang static analysis flags this error
> 
> lib/decompress_bunzip2.c:671:13: warning: Result of 'malloc' is converted
>   to a pointer of type 'unsigned int', which is incompatible with sizeof
>   operand type 'int' [unix.MallocSizeof]
> bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
>^~~~
> 
> Reviewing the bunzip_data structure, the element dbuf is type
> 
>   /* Intermediate buffer and its size (in bytes) */
>   unsigned int *dbuf, dbufSize;
> 
> So change the type in sizeof to 'unsigned int'
> 

You must be kidding.

If you want to change it, change it to sizeof(bd->dbuf) instead, but this flag
is at least in my opinion a total joke. For sizeof(int) != sizeof(unsigned
int) is beyond bizarre, no matter how stupid the platform.

-hpa



Re: [PATCH 09/16] initrd: remove the BLKFLSBUF call in handle_initrd

2020-07-02 Thread H. Peter Anvin
On 2020-06-15 05:53, Christoph Hellwig wrote:
> BLKFLSBUF used to be overloaded for the ramdisk driver to free the whole
> ramdisk, which was completely different behavior compared to all other
> drivers.  But this magic overload got removed in commit ff26956875c2
> ("brd: remove support for BLKFLSBUF"), so this call is entirely
> pointless now.
> 
> Signed-off-by: Christoph Hellwig 

Does *anyone* use initrd as opposed to initramfs anymore? It would seem
like a good candidate for deprecation/removal.

-hpa



Re: [PATCH] initrd: Remove erroneous comment

2020-06-22 Thread H. Peter Anvin
On 2020-06-22 14:01, Tom Rini wrote:
> 
> I'm picky here because, well, there's a whole lot of moving parts in the
> pre-kernel world.  In a strict sense, "UEFI" doesn't do anything with
> the kernel but based on hpa's comments I assume that at least the
> in-kernel UEFI stub does what Documentation/x86/booting.rst suggests to
> do and consumes initrd=/file just like "initrd /file" in extlinux.conf,
> etc do.  And since the EFI stub is cross-platform, it's worth noting
> this too.
> 

For what it's worth, normally boot loaders don't strip this from the
kernel command line passed to the kernel, although there might be ones
which do so. In general this is bad practice; it is better to let the
initrd show in /proc/cmdline.

-hpa



Re: [PATCH] initrd: Remove erroneous comment

2020-06-22 Thread H. Peter Anvin
On 2020-06-22 13:40, Tom Rini wrote:
> On Mon, Jun 22, 2020 at 01:02:16PM -0700, ron minnich wrote:
> 
>> The other thing you ought to consider fixing:
>> initrd is documented as follows:
>>
>> initrd= [BOOT] Specify the location of the initial ramdisk
>>
>> for bootloaders only.
>>
>> UEFI consumes initrd from the command line as well. As ARM servers
>> increasingly use UEFI, there may be situations in which the initrd
>> option doesn't make its way to the kernel? I don't know, UEFI is such
>> a black box to me. But I've seen this "initrd consumption" happen.
>>
>> Based on docs, and the growing use of bootloaders that are happy to
>> consume initrd= and not pass it to the kernel, you might be better off
>> trying to move to the new command line option anyway.
>>
>> IOW, this comment may not be what people want to see, but ... it might
>> also be right. Or possibly changed to:
>>
>> /*
>>  * The initrd keyword is in use today on ARM, PowerPC, and MIPS.
>>  * It is also reserved for use by bootloaders such as UEFI and may
>>  * be consumed by them and not passed on to the kernel.
>>  * The documentation also shows it as reserved for bootloaders.
>>  * It is advised to move to the initrdmem= option whereever possible.
>>  */
> 
> Fair warning, one of the other hats I wear is the chief custodian of the
> U-Boot project.
> 
> Note that on most architectures in modern times the device tree is used
> to pass in initrd type information and "initrd=" on the command line is
> quite legacy.
> 
> But what do you mean UEFI "consumes" initrd= ?  It's quite expected that
> when you configure grub/syslinux/systemd-boot/whatever via extlinux.conf
> or similar with "initrd /some/file" something reasonable happens to
> read that in to memory and pass along the location to Linux (which can
> vary from arch to arch, when not using device tree).  I guess looking at 
> Documentation/x86/boot.rst is where treating initrd= as a file that
> should be handled and ramdisk_image / ramdisk_size set came from.  I do
> wonder what happens in the case of ARM/ARM64 + UEFI without device tree.
> 

UEFI plus the in-kernel UEFI stub is, in some ways, a "bootloader" in
the traditional sense. It is totally fair that we should update the
documentation with this as a different case, though, because it is part
of the kernel tree and so the kernel now has partial ownership of the
namespace.

I suggest "STUB" for "in-kernel firmware stub" for this purpose; no need
to restrict it to a specific firmware for the purpose of namespace
reservation.

-hpa


Re: umip: AMD Ryzen 3900X, pagefault after emulate SLDT/SIDT instruction

2020-05-19 Thread H. Peter Anvin
On 2020-05-19 07:38, Andreas Rammhold wrote:
> Hi,
> 
> I've been running into a weird problem with UMIP on a current Ryzen
> 3900x with kernel 5.6.11 where a process receives a page fault after the
> kernel handled the SLDT (or SIDT) instruction (emulation).
> 
> The program I am running is run through WINE in 32bit mode and tries to
> figure out if it is running in a VMWare machine by comparing the results
> of SLDT against well known constants (basically as shown in the
> [example] linked below).
> 

Extremely weird. What is it expecting to happen -- or rather, what do you
*want* it to do?

-hpa



Re: [PATCH] x86: Use INVPCID mnemonic in invpcid.h

2020-05-08 Thread H. Peter Anvin
Reviewed-by: H. Peter Anvin (Intel) 

On 2020-05-08 02:22, Uros Bizjak wrote:
> Current minimum required version of binutils is 2.23,
> which supports INVPCID instruction mnemonic.
> 
> Replace the byte-wise specification of INVPCID with
> this proper mnemonic.
> 
> Signed-off-by: Uros Bizjak 
> CC: "H. Peter Anvin" 
> CC: Ingo Molnar 
> CC: Thomas Gleixner 
> ---
>  arch/x86/include/asm/invpcid.h | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
> index 989cfa86de85..23749bbca0ad 100644
> --- a/arch/x86/include/asm/invpcid.h
> +++ b/arch/x86/include/asm/invpcid.h
> @@ -12,12 +12,9 @@ static inline void __invpcid(unsigned long pcid, unsigned 
> long addr,
>* stale TLB entries and, especially if we're flushing global
>* mappings, we don't want the compiler to reorder any subsequent
>* memory accesses before the TLB flush.
> -  *
> -  * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
> -  * invpcid (%rcx), %rax in long mode.
>*/
> - asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
> -   : : "m" (desc), "a" (type), "c" () : "memory");
> + asm volatile ("invpcid %1, %0"
> +   : : "r" (type), "m" (desc) : "memory");
>  }
>  
>  #define INVPCID_TYPE_INDIV_ADDR  0
> 



Re: [PATCH] x86: bitops: fix build regression

2020-05-08 Thread H. Peter Anvin
On 2020-05-08 10:21, Nick Desaulniers wrote:
>>
>> One last suggestion.  Add the "b" modifier to the mask operand: "orb
>> %b1, %0".  That forces the compiler to use the 8-bit register name
>> instead of trying to deduce the width from the input.
> 
> Ah right: 
> https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#x86Operandmodifiers
> 
> Looks like that works for both compilers.  In that case, we can likely
> drop the `& 0xff`, too.  Let me play with that, then I'll hopefully
> send a v3 today.
> 

Good idea. I requested a while ago that they document these modifiers; they
chose not to document them all which in some ways is good; it shows what they
are willing to commit to indefinitely.

-hpa


Re: [tip: x86/urgent] x86/CPU/AMD: Clear RDRAND CPUID bit on AMD family 15h/16h

2019-08-24 Thread H. Peter Anvin
On 8/24/19 11:19 AM, Pavel Machek wrote:
> On Fri 2019-08-23 01:10:49, tip-bot2 for Tom Lendacky wrote:
>> The following commit has been merged into the x86/urgent branch of tip:
>>
>> Commit-ID: c49a0a80137c7ca7d6ced4c812c9e07a949f6f24
>> Gitweb:
>> https://git.kernel.org/tip/c49a0a80137c7ca7d6ced4c812c9e07a949f6f24
>> Author:Tom Lendacky 
>> AuthorDate:Mon, 19 Aug 2019 15:52:35 
>> Committer: Borislav Petkov 
>> CommitterDate: Mon, 19 Aug 2019 19:42:52 +02:00
>>
>> x86/CPU/AMD: Clear RDRAND CPUID bit on AMD family 15h/16h
>>
>> There have been reports of RDRAND issues after resuming from suspend on
>> some AMD family 15h and family 16h systems. This issue stems from a BIOS
>> not performing the proper steps during resume to ensure RDRAND continues
>> to function properly.
> 
> There are quite a few unanswered questions here.
> 
> a) Is there/should there be CVE for this?
> 
> b) Can we perform proper steps in kernel, thus making RDRAND usable
> even when BIOS is buggy?
> 

The kernel should at least be able to set its internal "CPUID" bit, visible
through /proc/cpuinfo.

-hpa



Re: [PATCH 1/1] x86/boot: clear some fields explicitly

2019-07-25 Thread H. Peter Anvin
On 7/25/19 3:03 PM, Thomas Gleixner wrote:
> On Thu, 25 Jul 2019, h...@zytor.com wrote:
>> On July 25, 2019 2:48:30 PM PDT, Thomas Gleixner  wrote:
>>>
>>> But seriously I think it's not completely insane what they are doing
>>> and the table based approach is definitely more readable and maintainable
>>> than the existing stuff.
>>
>> Doing this table based does seem like a good idea.
> 
> The question is whether we use a 'toclear' table or a 'preserve' table. I'd
> argue that the 'preserve' approach is saner.
> 

I agree.

>> Sent from my Android device with K-9 Mail. Please excuse my brevity.
> 
> I surely excuse the brevity, but the formatting mess which that brevity app
> creates is not excusable.
> 

I'll try to improve it...

-hpa



Re: [PATCH 1/1] x86/boot: clear some fields explicitly

2019-07-25 Thread H. Peter Anvin
On 7/25/19 12:22 AM, Thomas Gleixner wrote:
>>
>> The problem with this is that it will break silently when changes are
>> made to this structure.
> 
> That's not really the worst problem. Changes to that struct which touch any
> of the to be cleared ranges will break anyway if not handled correctly in
> the sanitizer function.
> 

Not really... that's kind of the point (the cleared ranges are cleared
explicitly because the boot loader failed to do so, so zeroing them is what
the boot loader should have done.)

The most correct way to address this would be to have an explicit list of
members to be *preserved* even if the sentinel triggers.

The easy way would be to put in a suitable cast to clear the warning -- I
would not be surprised if an explicit cast to something like (void *) would
quiet the warning, or else (yuck) put in an explicit (well-commented) #pragma
to shut it up.

-hpa


Re: [PATCH RFC 1/2] x86/boot: Introduce the setup_header2

2019-06-06 Thread H. Peter Anvin
On 5/24/19 2:55 AM, Daniel Kiper wrote:
> Due to limited space left in the setup header it was decided to
> introduce the setup_header2. Its role is to communicate Linux kernel
> supported features to the boot loader. Starting from now this is the
> primary way to communicate things to the boot loader.
> 
> Suggested-by: H. Peter Anvin 
> Signed-off-by: Daniel Kiper 
> Reviewed-by: Ross Philipson 
> Reviewed-by: Eric Snowberg 
> ---
> I know that setup_header2 is not the best name. There were some
> alternatives proposed like setup_header_extra, setup_header_addendum,
> setup_header_more, ext_setup_header, extended_setup_header, extended_header
> and extended_setup. Sadly, I am not happy with any of them. So,
> leaving setup_header2 as is but still looking for better name.
> Probably shorter == better...

I would say kernel_info. The relationships between the headers are analogous
to the various data sections:

setup_header= .data
boot_params/setup_data  = .bss

What is missing from the above list? That's right:

kernel_info = .rodata

We have been (ab)using .data for things that could go into .rodata or .bss for
a long time, for lack of alternatives and -- especially early on -- intertia.
Also, the BIOS stub is responsible for creating boot_params, so it isn't
available to a BIOS-based loader (setup_data is, though.)

setup_header is permanently limited to 144 bytes due to the reach of the
2-byte jump field, which doubles as a length field for the structure, combined
with the size of the "hole" in struct boot_params that a protected-mode loader
or the BIOS stub has to copy it into. It is currently 119 bytes long, which
leaves us with 25 very precious bytes. This isn't something that can be fixed
without revising the boot protocol entirely, breaking backwards compatibility.

boot_params proper is limited to 4096 bytes, but can be arbitrarily extended
by adding setup_data entries. It cannot be used to communicate properties of
the kernel image, because it is .bss and has no image-provided content.

kernel_info solves this by providing an extensible place for information about
the kernel image. It is readonly, because the kernel cannot rely on a
bootloader copying its contents anywhere, but that is OK; if it becomes
necessary it can still contain data items that an enabled bootloader would be
expected to copy into a setup_data chunk.

^ The above or some variant thereof may be a good thing to put both in your
patch comments as well as in the boot protocol documentation.

While we are making a change that bumps the version number anyway, there is
another change I would like to make to the boot protocol which we might as
well do at the same time. setup_data is a bit awkward to use for extremely
large data objects, both because the setup_data header has to be adjacent to
the data object, and because it has a 32-bit length field. However, it is
important that intermediate stages of the boot process have a way to identify
which chunks of memory are occupied by kernel data.

Thus I think we should introduce a uniform way to specify such indirect data.
We define a new setup_data type we can maybe call SETUP_INDIRECT; a
SETUP_INDIRECT data item would be an array of structures of the form:

struct setup_indirect {
__u32 type;
__u32 reserved; /* Reserved, must be set to zero */
__u64 len;
__u64 addr;
};

... where type is itself simply a SETUP_* type -- although we probably don't
want to let it be SETUP_INDIRECT itself since making it a tree structure could
require a lot of stack space in something that needs to parse it, and stack
space can be limited in boot contexts.

This would be particularly useful for having SETUP_INITRAMFS, if it becomes
desirable to allow the kernel to parse a non-contiguous set of memory regions
for the initramfs.

It might be a good idea to immediately start out struct kernel_info with
either a high mark or a bitmask of SETUP_* types that the kernel supports. A
bitmask would be more flexible, but would need provisions to be grown in the
future.

Which leads me to yet another thought.

We probably want to make the contents of kernel_info a bit more structured to
allow for content that may need to be extended in the future, or is inherently
variable length (like strings.)

This would lend itself to a structure such as:

- Magic number
- Length of total structure

... followed by a list of data chunks, each prefixed by a length field. The
first data chunk would be the main (root) structure; other data structures are
pointed to from the root structure using offsets from the beginning of the
structure (the magic number field.)

As an implementation detail, strings can of course be "pooled" into a single
data chunk as long as they are zero-terminated.

I have intentionally avoided specifying a type field for each data chunk;
history s

Re: [PATCH v3 2/2] initramfs: introduce do_readxattrs()

2019-05-17 Thread H. Peter Anvin
On 5/17/19 2:02 PM, Arvind Sankar wrote:
> On Fri, May 17, 2019 at 01:18:11PM -0700, h...@zytor.com wrote:
>>
>> Ok... I just realized this does not work for a modular initramfs, composed 
>> at load time from multiple files, which is a very real problem. Should be 
>> easy enough to deal with: instead of one large file, use one companion file 
>> per source file, perhaps something like filename..xattrs (suggesting double 
>> dots to make it less likely to conflict with a "real" file.) No leading dot, 
>> as it makes it more likely that archivers will sort them before the file 
>> proper.
> This version of the patch was changed from the previous one exactly to deal 
> with this case --
> it allows for the bootloader to load multiple initramfs archives, each
> with its own .xattr-list file, and to have that work properly.
> Could you elaborate on the issue that you see?
> 

Well, for one thing, how do you define "cpio archive", each with its own
.xattr-list file? Second, that would seem to depend on the ordering, no,
in which case you depend critically on .xattr-list file following the
files, which most archivers won't do.

Either way it seems cleaner to have this per file; especially if/as it
can be done without actually mucking up the format.

I need to run, but I'll post a more detailed explanation of what I did
in a little bit.

-hpa



Re: [PATCH v3 2/2] initramfs: introduce do_readxattrs()

2019-05-17 Thread H. Peter Anvin
On 5/17/19 1:18 PM, h...@zytor.com wrote:
> 
> Ok... I just realized this does not work for a modular initramfs, composed at 
> load time from multiple files, which is a very real problem. Should be easy 
> enough to deal with: instead of one large file, use one companion file per 
> source file, perhaps something like filename..xattrs (suggesting double dots 
> to make it less likely to conflict with a "real" file.) No leading dot, as it 
> makes it more likely that archivers will sort them before the file proper.
> 
> A side benefit is that the format can be simpler as there is no need to 
> encode the filename.
> 
> A technically cleaner solution still, but which would need archiver 
> modifications, would be to encode the xattrs as an optionally nameless file 
> (just an empty string) with a new file mode value, immediately following the 
> original file. The advantage there is that the archiver itself could support 
> xattrs and other extended metadata (which has been requested elsewhere); the 
> disadvantage obviously is that that it requires new support in the archiver. 
> However, at least it ought to be simpler since it is still a higher protocol 
> level than the cpio archive itself.
> 
> There's already one special case in cpio, which is the "!!!TRAILER!!!" 
> filename; although I don't think it is part of the formal spec, to the extent 
> there is one, I would expect that in practice it is always encoded with a 
> mode of 0, which incidentally could be used to unbreak the case where such a 
> filename actually exists. So one way to support such extended metadata would 
> be to set mode to 0 and use the filename to encode the type of metadata. I 
> wonder how existing GNU or BSD cpio (the BSD one is better maintained these 
> days) would deal with reading such a file; it would at least not be a 
> regression if it just read it still, possibly with warnings. It could also be 
> possible to use bits 17:16 in the mode, which are traditionally always zero 
> (mode_t being 16 bits), but I believe are present in most or all of the cpio 
> formats for historical reasons. It might be accepted better by existing 
> implementations to use one of these high bits combined with S_IFREG, I dont 
> know.
> 

Correction: it's just !!!TRAILER!!!.

I tested with GNU cpio, BSD cpio, scpio and pax.

With a mode of 0:

- GNU cpio errors, but extracts all the other files.
- BSD cpio extracts them as regular files.
- scpio and pax abort.

With a mode of 0x18000 (bit 16 + S_IFREG), all of them happily extracted
the data as regular files.

-hpa


Re: Potentially missing "memory" clobbers in bitops.h for x86

2019-03-29 Thread H. Peter Anvin
On 3/29/19 2:09 PM, Paul E. McKenney wrote:
>>
>> Note: the atomic versions of these functions obviously need to have
>> "volatile" and the clobber anyway, as they are by definition barriers
>> and moving memory operations around them would be a very serious error.
> 
> The atomic functions that return void don't need to order anything except
> the input and output arguments.  The oddness with clear_bit() is that the
> memory changed isn't necessarily the quantity referenced by the argument,
> if the number of bits specified is large.
> 
> So (for example) atomic_inc() does not need a "memory" clobber, right?
> 

I don't believe that is true: the code calling it has a reasonable
expectation that previous memory operations have finished and later
memory operations have not started from the point of view of another
processor. You are more of an expert on memory ordering than I am, but
I'm 89% sure that there is plenty of code in the kernel which makes that
assumption.

-hpa



Re: Potentially missing "memory" clobbers in bitops.h for x86

2019-03-29 Thread H. Peter Anvin
On 3/29/19 8:54 AM, Alexander Potapenko wrote:
> 
>> Of course, this would force the compiler to actually compute the
>> offset, which would slow things down.  I have no idea whether this
>> would be better or worse than just using the "memory" clobber.
> Just adding the "memory" clobber to clear_bit() changes sizes of 5
> kernel functions (the three mentioned above, plus hub_activate() and
> native_send_call_func_ipi()) by a small margin.
> This probably means the performance impact of this clobber is
> negligible in this case.

I would agree with that.

Could you perhaps verify whether or not any of the above functions
contains a currently manifest bug?

Note: the atomic versions of these functions obviously need to have
"volatile" and the clobber anyway, as they are by definition barriers
and moving memory operations around them would be a very serious error.

-hpa





Re: [PATCH v2] tty/serial: Add a serial port simulator

2019-03-28 Thread H. Peter Anvin
Dumb question: this is basically a pty on steroids. Wouldn't this be
better done by enhancing the pty devices?

-0hpa



Re: [RFC] Provide in-kernel headers for making it easy to extend the kernel

2019-03-06 Thread H. Peter Anvin
On 3/6/19 3:37 PM, Daniel Colascione wrote:
> 
> I just don't get the opposition to Joel's work. The rest of the thread
> already goes into detail about the problems with pure-filesystem
> solutions, and you and others are just totally ignoring those
> well-thought-out rationales for the module approach and doing
> inflooping on "lol just use a tarball". That's not productive.
> 

You might think they are well thought out, but at least from what I can
tell they seem completely spurious.

-hpa



Re: [RFC] Provide in-kernel headers for making it easy to extend the kernel

2019-03-06 Thread H. Peter Anvin
On 3/6/19 3:37 PM, Daniel Colascione wrote:
> 
> I just don't get the opposition to Joel's work. The rest of the thread
> already goes into detail about the problems with pure-filesystem
> solutions, and you and others are just totally ignoring those
> well-thought-out rationales for the module approach and doing
> inflooping on "lol just use a tarball". That's not productive.
> 
> Look; here's the bottom line: without this work, doing certain kinds
> of system tracing is a nightmare, and with this patch, it Just Works.
> You're arguing that various tools should do a better job of keeping
> the filesystem in sync with the kernel. Maybe you're right. But we
> don't live in a world where they will, because if this coherence were
> going to happen, it'd work already. But this work solves the problem:
> by necessity, anything that changes a kernel image *must* update
> modules coherently, whether the kernel image and module come from the
> filesystem, network boot, some kind of SQL database, or carrier
> pigeon. There's nothing wrong with work that very cheaply makes the
> kernel self-describing (introspection is elegant) and that takes
> advantage of *existing* kernel tooling infrastructure to transparently
> do a new thing.
> 
> You don't have to use this patch if you don't want to. Please stop
> trying to block it.
> 

No, that's not how this works. It is far worse to do something the wrong
way than not doing it at all, when it affects the kernel-user space
interactions.

Experience -- and we have almost 30 years of it -- has shown that hacks
of this nature become engrained and all of a sudden is "mandatory". At
the *very least* it needs to comply with the zero-one-infinity rule
rather than being yet another ad hoc hack.

More fundamentally, we already have multiple ways to handle objects that
need to go into the filesystem: they can be installed with (or as)
modules, they can use the firmware interface, and so on.

Saying "it can be a module" is worse than a copout: even if dynamically
loaded -- and many setups lock out post-boot module loadings for
security reasons -- there is nothing to cause it to unload.

The bottom line is that in the end there is no difference between making
this an archive of some sort and a module, except that to use the module
you need privilege that you otherwise may not need. If your argument is
that someone may not be providing the whole set of items provided by
"make modules_install", what is there to say that they would include
your specific module?

Here are some better ways of implementation I can see:

1. Include an archive in "make modules_install". Most trivial; no kernel
   changes at all.
2. Generalize the initramfs code to be able to create a pre-populated
   tmpfs at any time, that can be populated from an archive provided by
   the firmware loading mechanism; like all firmware allows it to either
   be built in or fetched from the filesystem. This allows it to be
   built in to the kernel image if that becomes necessary; using tmpfs
   means that it can be pushed out to swap rather than permanently
   stored in kernel memory, and this filesystem can be unmounted freeing
   its memory.
3. Use a squashfs image instead of an archive.

-hpa


Re: [RFC] Provide in-kernel headers for making it easy to extend the kernel

2019-03-06 Thread H. Peter Anvin
On 3/6/19 3:09 PM, Pavel Machek wrote:
> On Fri 2019-01-18 17:55:43, Joel Fernandes wrote:
>> From: "Joel Fernandes (Google)" 
>>
>> Introduce in-kernel headers and other artifacts which are made available
>> as an archive through proc (/proc/kheaders.tgz file). This archive makes
>> it possible to build kernel modules, run eBPF programs, and other
>> tracing programs that need to extend the kernel for tracing purposes
>> without any dependency on the file system having headers and build
>> artifacts.
>>
>> On Android and embedded systems, it is common to switch kernels but not
>> have kernel headers available on the file system. Raw kernel headers
>> also cannot be copied into the filesystem like they can be on other
>> distros, due to licensing and other issues. There's no linux-headers
> 
> If your licensing prevents you from having headers on the
> filesystem... then I guess you should fix the licensing.
> 
> I agree with Christoph, this looks pretty horrible.
>   Pavel
> 

The argument that "it can be a module" is basically an admission of
failure - if it isn't part of the kernel image itself there is no
benefit over where the modules are stored, which will be *somewhere* in
the filesystem.

What I *do* think makes sense is to create an archive with this
information and stuff it in the same place as the modules. It reduces
the amount it is possible to muck it up.

-hpa



Re: [PATCH] sched/x86: Save [ER]FLAGS on context switch

2019-02-20 Thread H. Peter Anvin
On 2/19/19 4:48 AM, Will Deacon wrote:
> 
> I think you'll still hate this, but could we not disable preemption during
> the uaccess-enabled region, re-enabling it on the fault path after we've
> toggled uaccess off and disable it again when we return back to the
> uaccess-enabled region? Doesn't help with tracing, but it should at least
> handle the common case.
> 

There is a worse problem with this, I still realize: this would mean blocking
preemption across what could possibly be a *very* large copy_from_user(), for
example.

Exceptions *have* to handle this; there is no way around it. Perhaps the
scheduler isn't the right place to put these kinds of asserts, either.

Now, __fentry__ is kind of a special beast; in some ways it is an "exception
implemented as a function call"; on x86 one could even consider using an INT
instruction in order to reduce the NOP footprint in the unarmed case.  Nor is
__fentry__ a C function; it has far more of an exception-like ABI.
*Regardless* of what else we do, I believe __fentry__ ought to
save/disable/restore AC, just like an exception does.

The idea of using s/a gcc plugin/objtool/ for this isn't really a bad idea.
Obviously the general problem is undecidable :) but the enforcement of some
simple, fairly draconian rules ("as tight as possible, but no tighter")
shouldn't be a huge problem.

An actual gcc plugin -- which would probably be quite complex -- could make
gcc itself aware of user space accesses and be able to rearrange them to
minimize STAC/CLAC and avoid kernel-space accesses inside those brackets.

Finally, of course, there is the option of simply outlawing this practice as a
matter of policy and require that all structures be accessed through a limited
set of APIs. As I recall, the number of places where there were
performance-critical regions which could not use the normal accessors are
fairly small (signal frames being the main one.) Doing bulk copy to/from
kernel memory and then accessing them from there would have some performance
cost, but would eliminate the need for this complexity entirely.

-hpa


Re: [PATCH] sched/x86: Save [ER]FLAGS on context switch

2019-02-18 Thread H. Peter Anvin
On 2/18/19 6:20 PM, Andy Lutomirski wrote:
> 
> 
>> On Feb 18, 2019, at 4:24 PM, Linus Torvalds  
>> wrote:
>>
>>> On Mon, Feb 18, 2019 at 2:31 PM H. Peter Anvin  wrote:
>>>
>>> The question is what "fix it" means. I'm really concerned about AC escapes,
>>> and everyone else should be, too.
>>
>> I do think that it might be the right thing to do to add some kind of
>> WARN_ON_ONCE() for AC being set in various can-reschedule situations.
>>
>> We'd just have to abstract it sanely. I'm sure arm64 has the exact
>> same issue with PAN - maybe it saves properly, but the same "we
>> wouldn't want to go through the scheduler with PAN clear".
>>
>> On x86, we might as well check DF at the same time as AC.
>>
> 
> hpa is right, though — calling into tracing code with AC set is not really so 
> good.  And calling schedule() (via preempt_enable() or whatever) is also bad 
> because it runs all the scheduler code with AC on.  Admittedly, the scheduler 
> is not *that* interesting of an attack surface.
> 

Not just that, but the other question is just how much code we are running
with AC open. It really should only be done in some very small regions.

-hpa



Re: [PATCH] sched/x86: Save [ER]FLAGS on context switch

2019-02-18 Thread H. Peter Anvin
On 2/16/19 2:30 AM, Peter Zijlstra wrote:
> On Fri, Feb 15, 2019 at 08:06:56PM -0800, h...@zytor.com wrote:
>> This implies we invoke schedule -- a restricted operation (consider
>> may_sleep) during execution of STAC-enabled code, but *not* as an
>> exception or interrupt, since those preserve the flags.
> 
> Meet preempt_enable().

I believe this falls under "doctor, it hurts when I do that." And it hurts for
very good reasons. See below.

>> I have serious concerns about this. This is more or less saying that
>> we have left an unlimited gap and have had AC escape.
> 
> Yes; by allowing normal C in between STAC and CLAC this is so.
> 
>> Does *anyone* see a need to allow this? I got a question at LPC from
>> someone about this, and what they were trying to do once all the
>> layers had been unwound was so far down the wrong track for a root
>> problem that actually has a very simple solution.
> 
> Have you read the rest of the thread?
> 
> All it takes for this to explode is a call to a C function that has
> tracing on it in between the user_access_begin() and user_access_end()
> things. That is a _very_ easy thing to do.
> 
> Heck, GCC is allowed to generate that broken code compiling
> lib/strnlen_user.c; it doesn't _have_ to inline do_strnlen_user (esp.
> with CONFIG_OPTIMIZE_INLINING), and making that a function call would
> get us fentry hooks and ftrace and *BOOM*.
> 
> (Now, of course, its a static function with a single caller, and GCC
> isn't _THAT_ stupid, but it could, if it wanted to)
> 
> Since the bar is _that_ low for mistakes, I figure we'd better fix it.
> 

The question is what "fix it" means. I'm really concerned about AC escapes,
and everyone else should be, too.

For an issue specific to tracing, it would be more appropriate to do the
appropriate things in the tracing entry/exit than in schedule. Otherwise, we
don't want to silently paper over mistakes which could mean that we run a
large portion of the kernel without protection we thought we had.

In that sense, calling schedule() with AC set is in fact a great place to have
a WARN() or even BUG(), because such an event really could imply that the
kernel has been security compromised.

Does that make more sense?

-hpa


Re: [PATCH 17/17] module: Prevent module removal racing with text_poke()

2019-01-17 Thread H. Peter Anvin
On 1/16/19 11:54 PM, Masami Hiramatsu wrote:
> On Wed, 16 Jan 2019 16:32:59 -0800
> Rick Edgecombe  wrote:
> 
>> From: Nadav Amit 
>>
>> It seems dangerous to allow code modifications to take place
>> concurrently with module unloading. So take the text_mutex while the
>> memory of the module is freed.
> 
> At that point, since the module itself is removed from module list,
> it seems no actual harm. Or would you have any concern?
> 

The issue isn't the module list, but rather when it is safe to free the
contents, so we don't clobber anything. We absolutely need to enforce
that we can't text_poke() something that might have already been freed.

That being said, we *also* really would prefer to enforce that we can't
text_poke() memory that doesn't actually contain code; as far as I can
tell we don't currently do that check.

This, again, is a good use for a separate mm context. We can enforce
that that context will only ever contain valid page mappings for actual
code pages.

(Note: in my proposed algorithm, with a separate mm, replace INVLPG with
switching CR3 if we have to do a rollback or roll forward in the
breakpoint handler.)

-hpa


Re: [PATCH 17/17] module: Prevent module removal racing with text_poke()

2019-01-17 Thread H. Peter Anvin
On 1/17/19 10:07 AM, Nadav Amit wrote:
>> On Jan 16, 2019, at 11:54 PM, Masami Hiramatsu  wrote:
>>
>> On Wed, 16 Jan 2019 16:32:59 -0800
>> Rick Edgecombe  wrote:
>>
>>> From: Nadav Amit 
>>>
>>> It seems dangerous to allow code modifications to take place
>>> concurrently with module unloading. So take the text_mutex while the
>>> memory of the module is freed.
>>
>> At that point, since the module itself is removed from module list,
>> it seems no actual harm. Or would you have any concern?
> 
> So it appears that you are right and all the users of text_poke() and
> text_poke_bp() do install module notifiers, and remove the module from their
> internal data structure when they are done (*). As long as they prevent
> text_poke*() to be called concurrently (e.g., using jump_label_lock()),
> everything is fine.
> 
> Having said that, the question is whether you “trust” text_poke*() users to
> do so. text_poke() description does not day explicitly that you need to
> prevent modules from being removed.
> 
> What do you say?
> 

Please make it explicit.

-hpa



Re: [PATCH v3 0/6] Static calls

2019-01-14 Thread H. Peter Anvin
On 1/14/19 9:01 PM, H. Peter Anvin wrote:
> 
> This could be as simple as spinning for a limited time waiting for
> states 0 or 3 if we are not the patching CPU. It is also not necessary
> to wait for the mask to become zero for the first sync if we find
> ourselves suddenly in state 4.
> 

So this would look something like this for the #BP handler; I think this
is safe.  This uses the TLB miss on the write page intentionally to slow
down the loop a bit to reduce the risk of livelock.  Note that
"bp_write_addr" here refers to the write address for the breakpoint that
was taken.

state = atomic_read(_poke_state);
if (state == 0)
return 0;   /* No patching in progress */

recheck:
clear bit in mask

switch (state) {
case 1:
case 4:
if (smp_processor_id() != bp_patching_cpu) {
int retries = NNN;
while (retries--) {
invlpg
if (*bp_write_addr != 0xcc)
goto recheck;
state = atomic_read(_poke_state);
if (state != 1 && state != 4)
goto recheck;
}
}
state = cmpxchg(_poke_state, 1, 4);
if (state != 1 && state != 4)
goto recheck;
atomic_write(bp_write_addr, bp_old_value);
break;
case 2:
if (smp_processor_id() != bp_patching_cpu) {
invlpg
state = atomic_read(_poke_state);
if (state != 2)
goto recheck;
}
complete patch sequence
remove breakpoint
break;

case 3:
case 0:
/*
 * If we are here, the #BP will go away on its
 * own, or we will re-take it if it was a "real"
 * breakpoint.
 */
break;
}
return 1;


Re: [PATCH v3 0/6] Static calls

2019-01-14 Thread H. Peter Anvin
On 1/14/19 7:05 PM, Andy Lutomirski wrote:
> On Mon, Jan 14, 2019 at 2:55 PM H. Peter Anvin  wrote:
>>
>> I think this sequence ought to work (keep in mind we are already under a
>> mutex, so the global data is safe even if we are preempted):
> 
> I'm trying to wrap my head around this.  The states are:
> 
> 0: normal operation
> 1: writing 0xcc, can be canceled
> 2: writing final instruction.  The 0xcc was definitely synced to all CPUs.
> 3: patch is definitely installed but maybe not sync_cored.
> 

4: breakpoint has been canceled; need to redo patching.

>>
>> set up page table entries
>> invlpg
>> set up bp patching global data
>>
>> cpu = get_cpu()
>>
> So we're assuming that the state is
> 
>> bp_old_value = atomic_read(bp_write_addr)
>>
>> do {
> 
> So we're assuming that the state is 0 here.  A WARN_ON_ONCE to check
> that would be nice.

The state here can be 0 or 4.

>> atomic_write(_poke_state, 1)
>>
>> atomic_write(bp_write_addr, 0xcc)
>>
>> mask <- online_cpu_mask - self
>> send IPIs
>> wait for mask = 0
>>
>> } while (cmpxchg(_poke_state, 1, 2) != 1);
>>
>> patch sites, remove breakpoints after patching each one
> 
> Not sure what you mean by patch *sites*.  As written, this only
> supports one patch site at a time, since there's only one
> bp_write_addr, and fixing that may be complicated.  Not fixing it
> might also be a scalability problem.

Fixing it isn't all that complicated; we just need to have a list of
patch locations (which we need anyway!) and walk (or search) it instead
of checking just one; I omitted that detail for simplicity.

>> atomic_write(_poke_state, 3);
>>
>> mask <- online_cpu_mask - self
>> send IPIs
>> wait for mask = 0
>>
>> atomic_write(_poke_state, 0);
>>
>> tear down patching global data
>> tear down page table entries
>>
>>
>>
>> The #BP handler would then look like:
>>
>> state = cmpxchg(_poke_state, 1, 4);
>> switch (state) {
>> case 1:
>> case 4:
> 
> What is state 4?
> 
>> invlpg
>> cmpxchg(bp_write_addr, 0xcc, bp_old_value)

I'm 85% sure that the cmpxchg here is actually unnecessary, an
atomic_write() is sufficient.

>> break;
>> case 2:
>> invlpg
>> complete patch sequence
>> remove breakpoint
>> break;
> 
> ISTM you might as well change state to 3 here, but it's arguably unnecessary.

If and only if you have only one patch location you could, but again,
unnecessary.

>> case 3:
>> /* If we are here, the #BP will go away on its own */
>> break;
>> case 0:
>> /* No patching in progress!!! */
>> return 0;
>> }
>>
>> clear bit in mask
>> return 1;
>>
>> The IPI handler:
>>
>> clear bit in mask
>> sync_core   /* Needed if multiple IPI events are chained */
> 
> I really like that this doesn't require fixups -- text_poke_bp() just
> works.  But I'm nervous about livelocks or maybe just extreme slowness
> under nasty loads.  Suppose some perf NMI code does a static call or
> uses a static call.  Now there's a situation where, under high
> frequency perf sampling, the patch process might almost always hit the
> breakpoint while in state 1.  It'll get reversed and done again, and
> we get stuck.  It would be neat if we could get the same "no
> deadlocks" property while significantly reducing the chance of a
> rollback.

This could be as simple as spinning for a limited time waiting for
states 0 or 3 if we are not the patching CPU. It is also not necessary
to wait for the mask to become zero for the first sync if we find
ourselves suddenly in state 4.

This wouldn't reduce the livelock probability to zero, but it ought to
reduce it enough that if we really are under such heavy event load we
may end up getting stuck in any number of ways...

> This is why I proposed something where we try to guarantee forward
> progress by making sure that any NMI code that might spin and wait for
> other CPUs is guaranteed to eventually sync_core(), clear its bit, and
> possibly finish a patch.  But this is a bit gross.

Yes, this gets really grotty and who knows how many code paths it would
touch.

-hpa




Re: [PATCH v3 0/6] Static calls

2019-01-14 Thread H. Peter Anvin
I think this sequence ought to work (keep in mind we are already under a
mutex, so the global data is safe even if we are preempted):

set up page table entries
invlpg
set up bp patching global data

cpu = get_cpu()

bp_old_value = atomic_read(bp_write_addr)

do {
atomic_write(_poke_state, 1)

atomic_write(bp_write_addr, 0xcc)

mask <- online_cpu_mask - self
send IPIs
wait for mask = 0

} while (cmpxchg(_poke_state, 1, 2) != 1);

patch sites, remove breakpoints after patching each one

atomic_write(_poke_state, 3);

mask <- online_cpu_mask - self
send IPIs
wait for mask = 0

atomic_write(_poke_state, 0);

tear down patching global data
tear down page table entries



The #BP handler would then look like:

state = cmpxchg(_poke_state, 1, 4);
switch (state) {
case 1:
case 4:
invlpg
cmpxchg(bp_write_addr, 0xcc, bp_old_value)
break;
case 2:
invlpg
complete patch sequence
remove breakpoint
break;
case 3:
/* If we are here, the #BP will go away on its own */
break;
case 0:
/* No patching in progress!!! */
return 0;
}

clear bit in mask
return 1;

The IPI handler:

clear bit in mask
sync_core   /* Needed if multiple IPI events are chained */


Re: [PATCH v3 0/6] Static calls

2019-01-14 Thread H. Peter Anvin
So I was already in the middle of composing this message when Andy posted:

> I don't even think this is sufficient.  I think we also need everyone
> who clears the bit to check if all bits are clear and, if so, remove
> the breakpoint.  Otherwise we have a situation where, if you are in
> text_poke_bp() and you take an NMI (or interrupt or MCE or whatever)
> and that interrupt then hits the breakpoint, then you deadlock because
> no one removes the breakpoint.
> 
> If we do this, and if we can guarantee that all CPUs make forward
> progress, then maybe the problem is solved. Can we guarantee something
> like all NMI handlers that might wait in a spinlock or for any other
> reason will periodically check if a sync is needed while they're
> spinning?

So the really, really nasty case is when an asynchronous event on the
*patching* processor gets stuck spinning on a resource which is
unavailable due to another processor spinning on the #BP. We can disable
interrupts, but we can't stop NMIs from coming in (although we could
test in the NMI handler if we are in that condition and return
immediately; I'm not sure we want to do that, and we still have to deal
with #MC and what not.)

The fundamental problem here is that we don't see the #BP on the
patching processor, in which case we could simply complete the patching
from the #BP handler on that processor.

On 1/13/19 6:40 PM, H. Peter Anvin wrote:
> On 1/13/19 6:31 PM, H. Peter Anvin wrote:
>>
>> static cpumask_t text_poke_cpumask;
>>
>> static void text_poke_sync(void)
>> {
>>  smp_wmb();
>>  text_poke_cpumask = cpu_online_mask;
>>  smp_wmb();  /* Should be optional on x86 */
>>  cpumask_clear_cpu(_poke_cpumask, smp_processor_id());
>>  on_each_cpu_mask(_poke_cpumask, text_poke_sync_cpu, NULL, false);
>>  while (!cpumask_empty(_poke_cpumask)) {
>>  cpu_relax();
>>  smp_rmb();
>>  }
>> }
>>
>> static void text_poke_sync_cpu(void *dummy)
>> {
>>  (void)dummy;
>>
>>  smp_rmb();
>>  cpumask_clear_cpu(_bitmask, smp_processor_id());
>>  /*
>>   * We are guaranteed to return with an IRET, either from the
>>   * IPI or the #BP handler; this provides serialization.
>>   */
>> }
>>
> 
> The invariants here are:
> 
> 1. The patching routine must set each bit in the cpumask after each event
>that requires synchronization is complete.
> 2. The bit can be (atomically) cleared on the target CPU only, and only in a
>place that guarantees a synchronizing event (e.g. IRET) before it may
>reaching the poked instruction.
> 3. At a minimum the IPI handler and #BP handler needs to clear the bit. It
>*is* also possible to clear it in other places, e.g. the NMI handler, if
>necessary as long as condition 2 is satisfied.
> 

OK, so with interrupts enabled *on the processor doing the patching* we
still have a problem if it takes an interrupt which in turn takes a #BP.
 Disabling interrupts would not help, because but an NMI and #MC could
still cause problems unless we can guarantee that no path which may be
invoked by NMI/#MC can do text_poke, which seems to be a very aggressive
assumption.

Note: I am assuming preemption is disabled.

The easiest/sanest way to deal with this might be to switch the IDT (or
provide a hook in the generic exception entry code) on the patching
processor, such that if an asynchronous event comes in, we either roll
forward or revert. This is doable because the second sync we currently
do is not actually necessary per the hardware guys.

If we take that #BP during the breakpoint deployment phase -- that is,
before the first sync has completed -- restore the previous value of the
breakpoint byte. Upon return text_poke_bp() will then have to loop back
to the beginning and do it again.

If we take the #BP after that point, we can complete the patch in the
normal manner, by writing the rest of the instruction and then removing
the #BP. text_poke_bp() will complete the synchronization sequence on
return, but if another processor is spinning and sees the breakpoint
having been removed, it is good to go.

Rignt now we do completely unnecessary setup and teardown of the PDT
entries for each phase of the patching. This would have to be removed,
so that the asynchronous event handler will always be able to do the
roll forward/roll back as required.

If this is unpalatable, the solution you touched on is probably also
doable, but I need to think *really* carefully about the sequencing
constraints, because now you also have to worry about events
interrupting a patch in progress but not completed. It would however
have the advantage that an arbitrary interrupt on the patching processor
is unlikely to cause a rollback, and s

Re: [PATCH v3 0/6] Static calls

2019-01-13 Thread H. Peter Anvin
On 1/13/19 6:31 PM, H. Peter Anvin wrote:
> 
> static cpumask_t text_poke_cpumask;
> 
> static void text_poke_sync(void)
> {
>   smp_wmb();
>   text_poke_cpumask = cpu_online_mask;
>   smp_wmb();  /* Should be optional on x86 */
>   cpumask_clear_cpu(_poke_cpumask, smp_processor_id());
>   on_each_cpu_mask(_poke_cpumask, text_poke_sync_cpu, NULL, false);
>   while (!cpumask_empty(_poke_cpumask)) {
>   cpu_relax();
>   smp_rmb();
>   }
> }
> 
> static void text_poke_sync_cpu(void *dummy)
> {
>   (void)dummy;
> 
>   smp_rmb();
>   cpumask_clear_cpu(_bitmask, smp_processor_id());
>   /*
>* We are guaranteed to return with an IRET, either from the
>* IPI or the #BP handler; this provides serialization.
>*/
> }
> 

The invariants here are:

1. The patching routine must set each bit in the cpumask after each event
   that requires synchronization is complete.
2. The bit can be (atomically) cleared on the target CPU only, and only in a
   place that guarantees a synchronizing event (e.g. IRET) before it may
   reaching the poked instruction.
3. At a minimum the IPI handler and #BP handler needs to clear the bit. It
   *is* also possible to clear it in other places, e.g. the NMI handler, if
   necessary as long as condition 2 is satisfied.

-hpa


Re: [PATCH v3 0/6] Static calls

2019-01-13 Thread H. Peter Anvin
On 1/11/19 11:39 AM, Jiri Kosina wrote:
> On Fri, 11 Jan 2019, h...@zytor.com wrote:
> 
>> I still don't see why can't simply spin in the #BP handler until the 
>> patch is complete.
> 
> I think this brings us to the already discussed possible deadlock, when 
> one CPU#0 is in the middle of text_poke_bp(), CPU#1 is spinning inside 
> spin_lock_irq*() and CPU#2 hits the breakpont while holding that very 
> 'lock'.
> 
> Then we're stuck forever, because CPU#1 will never handle the pending 
> sync_core() IPI (it's not NMI).
> 
> Or have I misunderstood what you meant?
> 

OK, I was thinking about this quite a while ago, and even started hacking on
it, but apparently I managed to forget some key details.

Specifically, you do *not* want to use the acknowledgment of the IPI as the
blocking condition, so don't use a waiting IPI.

Instead, you want a CPU bitmap (or percpu variable) that the IPI handler
clears.  When you are spinning in the IPI handler, you *also* need to clear
that bit.  Doing so is safe even in the case of batched updates, because you
are guaranteed to execute an IRET before you get to patched code.

So the synchronization part of the patching routine becomes:

static cpumask_t text_poke_cpumask;

static void text_poke_sync(void)
{
smp_wmb();
text_poke_cpumask = cpu_online_mask;
smp_wmb();  /* Optional on x86 */
cpumask_clear_cpu(_poke_cpumask, smp_processor_id());
on_each_cpu_mask(_poke_cpumask, text_poke_sync_cpu, NULL, false);
while (!cpumask_empty(_poke_cpumask)) {
cpu_relax();
smp_rmb();
}
}

static void text_poke_sync_cpu(void *dummy)
{
(void)dummy;

smp_rmb();
cpumask_clear_cpu(_bitmask, smp_processor_id());
/*
 * We are guaranteed to return with an IRET, either from the
 * IPI or the #BP handler; this provides serialization.
 */
}

The spin routine then needs add a call to do something like this. By
(optionally) not comparing to a specific breakpoint address we allow for
batching, but we may end up spinning on a breakpoint that is not actually a
patching breakpoint until the patching is done.

int poke_int3_handler(struct pt_regs *regs)
{
/* In the current handler, but shouldn't be needed... */
smp_rmb();

if (likely(!atomic_read(bp_patching_in_progress)))
return 0;

if (user_mode(regs) unlikely(!user_mode(regs) &&
atomic_read(_patching_in_progress)) {
text_poke_sync();
regs->ip--;
return 1;   /* May end up retaking the trap */
} else {
return 0;
}
}

Unless I'm totally mistaken, the worst thing that will happen with this code
is that it may end up taking a harmless spurious IPI at a later time.

-hpa


Re: [PATCH v3 0/6] Static calls

2019-01-10 Thread H. Peter Anvin
On 1/10/19 9:31 AM, Linus Torvalds wrote:
> On Wed, Jan 9, 2019 at 2:59 PM Josh Poimboeuf  wrote:
>>
>> NOTE: At least experimentally, the call destination writes seem to be
>> atomic with respect to instruction fetching.  On Nehalem I can easily
>> trigger crashes when writing a call destination across cachelines while
>> reading the instruction on other CPU; but I get no such crashes when
>> respecting cacheline boundaries.
> 
> I still doubt ifetch is atomic on a cacheline boundary for the simple
> reason that the bus between the IU and the L1 I$ is narrower in older
> CPU's.
> 

As far as I understand, on P6+ (and possibly earlier, but I don't know) it is
atomic on a 16-byte fetch datum, at least for Intel CPUs.

However, single byte accesses are always going to be safe.

-hpa



Re: [PATCH] x86/boot: clear rsdp address in boot_params for broken loaders

2018-12-03 Thread H. Peter Anvin
On 12/3/18 9:32 PM, Juergen Gross wrote:
> 
> I'd like to send a followup patch doing that. And I'd like to not only
> test sentinel for being non-zero, but all padding fields as well. This
> should be 4.21 material, though.
> 

No, you can't do that.  That breaks backwards compatibility.

-hpa



Re: [PATCH] x86/boot: clear rsdp address in boot_params for broken loaders

2018-12-03 Thread H. Peter Anvin
On 12/3/18 9:32 PM, Juergen Gross wrote:
> 
> I'd like to send a followup patch doing that. And I'd like to not only
> test sentinel for being non-zero, but all padding fields as well. This
> should be 4.21 material, though.
> 

No, you can't do that.  That breaks backwards compatibility.

-hpa



Re: Sleeping in user_access section

2018-11-27 Thread H. Peter Anvin
On 11/23/18 3:57 AM, Julien Thierry wrote:
> 
> On x86, the EFLAGS.AC bit is also saved upon exception and I think it is
> cleared upon exception entry so there is implicit exit from the
> user_access mode when taking exception/interrupt.
> 

No, it is restored, not cleared.

In summary: on exceptions, user_access regions are suspended, and on
return the user_access status is resumed.

However, explicitly calling sleeping functions is not supported.

-hpa



Re: Sleeping in user_access section

2018-11-27 Thread H. Peter Anvin
On 11/23/18 3:57 AM, Julien Thierry wrote:
> 
> On x86, the EFLAGS.AC bit is also saved upon exception and I think it is
> cleared upon exception entry so there is implicit exit from the
> user_access mode when taking exception/interrupt.
> 

No, it is restored, not cleared.

In summary: on exceptions, user_access regions are suspended, and on
return the user_access status is resumed.

However, explicitly calling sleeping functions is not supported.

-hpa



Re: [PATCH] x86/fpu: XRSTOR is expected to raise #GP

2018-11-26 Thread H. Peter Anvin
On 11/26/18 9:49 AM, Sebastian Andrzej Siewior wrote:
> On 2018-11-26 18:27:06 [+0100], Jann Horn wrote:
>> commit 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess
>> fixups") incorrectly replaced the fixup entry for XSTATE_OP with a
>> user-#PF-only fixup. However, XRSTOR can also raise #GP when the supplied
>> address points to userspace memory. Change it back.
> 
> The #GP is raised if the xstate content is invalid. But I guess the
> details don't matter.
> 
>> Reported-by: Sebastian Andrzej Siewior 
>> Fixes: 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess 
>> fixups")
>> Signed-off-by: Jann Horn 
> Acked-by: Sebastian Andrzej Siewior 
> 

It does matter -- please correct the patch description, or we might have some
serious confusion at some arbitrary point in the future with the result that
the bug gets re-introduced; it would not be the first time.

-hpa



Re: [PATCH] x86/fpu: XRSTOR is expected to raise #GP

2018-11-26 Thread H. Peter Anvin
On 11/26/18 9:49 AM, Sebastian Andrzej Siewior wrote:
> On 2018-11-26 18:27:06 [+0100], Jann Horn wrote:
>> commit 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess
>> fixups") incorrectly replaced the fixup entry for XSTATE_OP with a
>> user-#PF-only fixup. However, XRSTOR can also raise #GP when the supplied
>> address points to userspace memory. Change it back.
> 
> The #GP is raised if the xstate content is invalid. But I guess the
> details don't matter.
> 
>> Reported-by: Sebastian Andrzej Siewior 
>> Fixes: 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess 
>> fixups")
>> Signed-off-by: Jann Horn 
> Acked-by: Sebastian Andrzej Siewior 
> 

It does matter -- please correct the patch description, or we might have some
serious confusion at some arbitrary point in the future with the result that
the bug gets re-introduced; it would not be the first time.

-hpa



Re: [PATCH v5 02/10] x86/jump_label: Use text_poke_early() during early init

2018-11-20 Thread H. Peter Anvin
On 11/20/18 10:18 AM, Peter Zijlstra wrote:
>>
>> Can't we make this test in text_poke() directly, please?
> 
> He does that in 9/10 iirc.
> 

No, in 9/10 he does that change locally for the jump_label, but there is
absolutely no reason not to do that test in text_poke() proper, and
simply use text_poke() everywhere in the kernel.

-hpa



Re: [PATCH v5 02/10] x86/jump_label: Use text_poke_early() during early init

2018-11-20 Thread H. Peter Anvin
On 11/20/18 10:18 AM, Peter Zijlstra wrote:
>>
>> Can't we make this test in text_poke() directly, please?
> 
> He does that in 9/10 iirc.
> 

No, in 9/10 he does that change locally for the jump_label, but there is
absolutely no reason not to do that test in text_poke() proper, and
simply use text_poke() everywhere in the kernel.

-hpa



Re: [PATCH v5 02/10] x86/jump_label: Use text_poke_early() during early init

2018-11-20 Thread H. Peter Anvin
On 11/13/18 5:07 AM, Nadav Amit wrote:
> There is no apparent reason not to use text_poke_early() while we are
> during early-init and we do not patch code that might be on the stack
> (i.e., we'll return to the middle of the patched code). This appears to
> be the case of jump-labels, so do so.
> 
> This is required for the next patches that would set a temporary mm for
> patching, which is initialized after some static-keys are
> enabled/disabled.
> 
> Cc: Andy Lutomirski 
> Cc: Kees Cook 
> Cc: Dave Hansen 
> Cc: Masami Hiramatsu 
> Co-Developed-by: Peter Zijlstra 
> Signed-off-by: Nadav Amit 
> ---
>  arch/x86/kernel/jump_label.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
> index aac0c1f7e354..ed5fe274a7d8 100644
> --- a/arch/x86/kernel/jump_label.c
> +++ b/arch/x86/kernel/jump_label.c
> @@ -52,7 +52,12 @@ static void __ref __jump_label_transform(struct jump_entry 
> *entry,
>   jmp.offset = jump_entry_target(entry) -
>(jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
>  
> - if (early_boot_irqs_disabled)
> + /*
> +  * As long as we're UP and not yet marked RO, we can use
> +  * text_poke_early; SYSTEM_BOOTING guarantees both, as we switch to
> +  * SYSTEM_SCHEDULING before going either.
> +  */
> + if (system_state == SYSTEM_BOOTING)
>   poker = text_poke_early;
>  
>   if (type == JUMP_LABEL_JMP) {
> 

Can't we make this test in text_poke() directly, please?

-hpa



Re: [PATCH v5 02/10] x86/jump_label: Use text_poke_early() during early init

2018-11-20 Thread H. Peter Anvin
On 11/13/18 5:07 AM, Nadav Amit wrote:
> There is no apparent reason not to use text_poke_early() while we are
> during early-init and we do not patch code that might be on the stack
> (i.e., we'll return to the middle of the patched code). This appears to
> be the case of jump-labels, so do so.
> 
> This is required for the next patches that would set a temporary mm for
> patching, which is initialized after some static-keys are
> enabled/disabled.
> 
> Cc: Andy Lutomirski 
> Cc: Kees Cook 
> Cc: Dave Hansen 
> Cc: Masami Hiramatsu 
> Co-Developed-by: Peter Zijlstra 
> Signed-off-by: Nadav Amit 
> ---
>  arch/x86/kernel/jump_label.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
> index aac0c1f7e354..ed5fe274a7d8 100644
> --- a/arch/x86/kernel/jump_label.c
> +++ b/arch/x86/kernel/jump_label.c
> @@ -52,7 +52,12 @@ static void __ref __jump_label_transform(struct jump_entry 
> *entry,
>   jmp.offset = jump_entry_target(entry) -
>(jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
>  
> - if (early_boot_irqs_disabled)
> + /*
> +  * As long as we're UP and not yet marked RO, we can use
> +  * text_poke_early; SYSTEM_BOOTING guarantees both, as we switch to
> +  * SYSTEM_SCHEDULING before going either.
> +  */
> + if (system_state == SYSTEM_BOOTING)
>   poker = text_poke_early;
>  
>   if (type == JUMP_LABEL_JMP) {
> 

Can't we make this test in text_poke() directly, please?

-hpa



Re: [PATCH] x86/TSC: Use RDTSCP

2018-11-19 Thread H. Peter Anvin
On 11/19/18 11:52 AM, Andy Lutomirski wrote:
> 
> I thought I benchmarked this on Intel at some point and found the
> LFENCE;RDTSC variant to be slightly faster.  But I believe you, so:
> 
> Acked-by: Andy Lutomirski 
> 

As long as the difference isn't significant, the simplicity would seem to be a
win.

-hpa


Re: [PATCH] x86/TSC: Use RDTSCP

2018-11-19 Thread H. Peter Anvin
On 11/19/18 11:52 AM, Andy Lutomirski wrote:
> 
> I thought I benchmarked this on Intel at some point and found the
> LFENCE;RDTSC variant to be slightly faster.  But I believe you, so:
> 
> Acked-by: Andy Lutomirski 
> 

As long as the difference isn't significant, the simplicity would seem to be a
win.

-hpa


Re: PLEASE REVERT URGENTLY: Re: [PATCH v5 2/3] x86/boot: add acpi rsdp address to setup_header

2018-11-11 Thread H. Peter Anvin
On 11/10/18 1:03 AM, Juergen Gross wrote:
> 
> How would that help? The garabge data written could have the correct
> terminal sentinel value by chance.
> 
> That's why I re-used an existing field in setup_header (the version) to
> let grub tell the kernel which part of setup_header was written by grub.
> 
> That's the only way I could find to let the kernel distinguish between
> garbage and actual data.

There is plenty of space *before* the setup_header part of struct boot_params
too -- look a the various __pad fields, especially (in your case), __pad3[16]
and __pad4[116] would suit the bill just fine.

>> It would be enormously helpful if you could find out any more details about
>> exactly what they are doing to break things.
> 
> That's easy:
> 
> The memory layout is:
> 
> 0x1f1 bytes of data, including the sentinel, the setup_header, and then
> more data.
> 
> grub did read the kernel's setup_header in the correct size into its
> buffer (which contains random garbage before that), intializes the first
> 0x1f1 including the sentinel byte, and then writes back the buffer, but
> using a too large length for that.

Are you kidding me... it really overwrites it with completely random data, and
not simply overspilling contents of the file?

In that case it might not be possible (or desirable) to use those N bytes
following the setup_heaader, or we need to a bigger sentinel than one byte
(probability being what it is, 256^n gets to be a pretty big number for any n,
very quickly drowning in the noise compared to other potential sources of boot
failures, and most likely less fatal than most.)

How big is this garbage dump?  I'm going to brave a guess it is 512 bytes.  In
that case this is hardly a big deal: the E820 map begins at 0x290, and the
setup_header maximum goes to 0x280, so it is only 15 bytes lost.  If it is
worse than that, we would risk losing __pad8[48] and __pad9[276], and
especially the latter would be painful. In those case perhaps we should use
0x281..0x290 as a 15-byte sentinel; that is going to be virtually foolproof.

I'm also thinking that it might be desirable to add a flags field (__pad2
would be ideal) to struct boot_params; it would let us recycle some of the
obsolete fields (hd0_info, hd1_info, sys_desc_table, olpc_ofw_header, ...) and
perhaps be able to add some more robustness against these sort of things. This
would be the right way to do what your version feedback mechanism would do.

The reason why the feedback mechanism is fundamentally broken is that it only
gives the boot loader a way to assert that it supports a certain version of
the protocol, but it doesn't say *which* bootloader does such an assert -- and
therefore it is still wide open to implementation error.

We do, in fact, already have a feedback mechanism: the bootloader ID and
bootloader version. One way we could deal with this problem is to bump the
bootloader version reported by Grub, and add a quirk to the kernel that if the
bootloader ID is Grub (7) and the version is less than a certain number, zero
those fields. In fact, the more I think about it, this is what we should do.

That being said, Grub really needs to be kept honest.  They keep making both
severe design (like refusing to use the BIOS and UEFI entry points provided by
the kernel by default) and implementation errors, apparently without
meaningful oversight. I really ask that the people more closely involved with
Grub try to keep a closer eye on their code as it applies to Linux.

-hpa


Re: PLEASE REVERT URGENTLY: Re: [PATCH v5 2/3] x86/boot: add acpi rsdp address to setup_header

2018-11-11 Thread H. Peter Anvin
On 11/10/18 1:03 AM, Juergen Gross wrote:
> 
> How would that help? The garabge data written could have the correct
> terminal sentinel value by chance.
> 
> That's why I re-used an existing field in setup_header (the version) to
> let grub tell the kernel which part of setup_header was written by grub.
> 
> That's the only way I could find to let the kernel distinguish between
> garbage and actual data.

There is plenty of space *before* the setup_header part of struct boot_params
too -- look a the various __pad fields, especially (in your case), __pad3[16]
and __pad4[116] would suit the bill just fine.

>> It would be enormously helpful if you could find out any more details about
>> exactly what they are doing to break things.
> 
> That's easy:
> 
> The memory layout is:
> 
> 0x1f1 bytes of data, including the sentinel, the setup_header, and then
> more data.
> 
> grub did read the kernel's setup_header in the correct size into its
> buffer (which contains random garbage before that), intializes the first
> 0x1f1 including the sentinel byte, and then writes back the buffer, but
> using a too large length for that.

Are you kidding me... it really overwrites it with completely random data, and
not simply overspilling contents of the file?

In that case it might not be possible (or desirable) to use those N bytes
following the setup_heaader, or we need to a bigger sentinel than one byte
(probability being what it is, 256^n gets to be a pretty big number for any n,
very quickly drowning in the noise compared to other potential sources of boot
failures, and most likely less fatal than most.)

How big is this garbage dump?  I'm going to brave a guess it is 512 bytes.  In
that case this is hardly a big deal: the E820 map begins at 0x290, and the
setup_header maximum goes to 0x280, so it is only 15 bytes lost.  If it is
worse than that, we would risk losing __pad8[48] and __pad9[276], and
especially the latter would be painful. In those case perhaps we should use
0x281..0x290 as a 15-byte sentinel; that is going to be virtually foolproof.

I'm also thinking that it might be desirable to add a flags field (__pad2
would be ideal) to struct boot_params; it would let us recycle some of the
obsolete fields (hd0_info, hd1_info, sys_desc_table, olpc_ofw_header, ...) and
perhaps be able to add some more robustness against these sort of things. This
would be the right way to do what your version feedback mechanism would do.

The reason why the feedback mechanism is fundamentally broken is that it only
gives the boot loader a way to assert that it supports a certain version of
the protocol, but it doesn't say *which* bootloader does such an assert -- and
therefore it is still wide open to implementation error.

We do, in fact, already have a feedback mechanism: the bootloader ID and
bootloader version. One way we could deal with this problem is to bump the
bootloader version reported by Grub, and add a quirk to the kernel that if the
bootloader ID is Grub (7) and the version is less than a certain number, zero
those fields. In fact, the more I think about it, this is what we should do.

That being said, Grub really needs to be kept honest.  They keep making both
severe design (like refusing to use the BIOS and UEFI entry points provided by
the kernel by default) and implementation errors, apparently without
meaningful oversight. I really ask that the people more closely involved with
Grub try to keep a closer eye on their code as it applies to Linux.

-hpa


Re: [PATCH 2/2] x86/ldt: Unmap PTEs for the slow before freeing LDT

2018-10-24 Thread H. Peter Anvin
On 10/23/18 9:31 AM, Kirill A. Shutemov wrote:
> 
> It shouldn't be a particularly hot path anyway.
> 

That's putting it mildly.

-hpa



Re: [PATCH 2/2] x86/ldt: Unmap PTEs for the slow before freeing LDT

2018-10-24 Thread H. Peter Anvin
On 10/23/18 9:31 AM, Kirill A. Shutemov wrote:
> 
> It shouldn't be a particularly hot path anyway.
> 

That's putting it mildly.

-hpa



Re: [PATCH stable v2 1/2] termios, tty/tty_baudrate.c: fix buffer overrun

2018-10-23 Thread H. Peter Anvin
On 10/23/18 09:02, h...@zytor.com wrote:
>>
>> As I think Al's big termios cleanups are going to be hitting Linus's
>> tree soon, do you know how these patches interact with that?
>>
>> This patch seems like it will not, so I'll be glad to queue that up
>> after my first round of patches get merged to Linus later this week,
>> but
>> the second one worries me.
>>
>> thanks,
>>
>> greg k-h
> 
> I have been working with Al; we had approached much the same problems but 
> from different directions. Mine ended up being a bit more comprehensive as a 
> result, so I think we're going to end up using my code with Al's reviews.
> 
> So bottom line is that it should be all good.
> 

[Al: Feel free to yell at me if I got that wrong.]

-hpa


Re: [PATCH stable v2 1/2] termios, tty/tty_baudrate.c: fix buffer overrun

2018-10-23 Thread H. Peter Anvin
On 10/23/18 09:02, h...@zytor.com wrote:
>>
>> As I think Al's big termios cleanups are going to be hitting Linus's
>> tree soon, do you know how these patches interact with that?
>>
>> This patch seems like it will not, so I'll be glad to queue that up
>> after my first round of patches get merged to Linus later this week,
>> but
>> the second one worries me.
>>
>> thanks,
>>
>> greg k-h
> 
> I have been working with Al; we had approached much the same problems but 
> from different directions. Mine ended up being a bit more comprehensive as a 
> result, so I think we're going to end up using my code with Al's reviews.
> 
> So bottom line is that it should be all good.
> 

[Al: Feel free to yell at me if I got that wrong.]

-hpa


Re: [PATCH RFC] x86: Don't include '-Wa,-' when building with Clang

2018-10-23 Thread H. Peter Anvin
On 10/23/18 11:40, Nick Desaulniers wrote:
> On Mon, Oct 22, 2018 at 10:11 PM Nadav Amit  wrote:
>>
>> at 5:37 PM, Nathan Chancellor  wrote:
>>
>> Commit 77b0bf55bc67 ("kbuild/Makefile: Prepare for using macros in
>> inline assembly code to work around asm() related GCC inlining bugs")
>> added this flag to KBUILD_CFLAGS, where it works perfectly fine with
>> GCC. However, when building with Clang, all of the object files compile
>> fine but the build hangs indefinitely at init/main.o, right before the
>> linking stage. Don't include this flag when building with Clang.
>>
>> The kernel builds and boots to a shell in QEMU with both GCC and Clang
>> with this patch applied.
>>
>> Link: 
>> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FClangBuiltLinux%2Flinux%2Fissues%2F213data=02%7C01%7Cnamit%40vmware.com%7C871daebc2ca44947d28d08d638811fb5%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C1%7C0%7C636758524579997650sdata=shuxW81QRrO3TSqbgf462wgZYdLeAKeQEdGRxmnUX30%3Dreserved=0
>> Signed-off-by: Nathan Chancellor 
>> ---
>>
>> The reason this patch is labeled RFC is while I can verify that this
>> fixes the issue, I'm not entirely sure why the '-Wa,-' works for GCC
>> and not Clang. I looked into what the flag means and I couldn't really
>> find anything so I just assume it's taking input from stdin? The issue
>> could stem from how GCC forks gas versus how Clang does it. If this
>> isn't of concern and the maintainers are happy with this patch as is,
>> feel free to take it.
>>

Perhaps someone could actually, you know, time the build and see how
much -pipe actually matters, if at all?

-hpa



Re: [PATCH RFC] x86: Don't include '-Wa,-' when building with Clang

2018-10-23 Thread H. Peter Anvin
On 10/23/18 11:40, Nick Desaulniers wrote:
> On Mon, Oct 22, 2018 at 10:11 PM Nadav Amit  wrote:
>>
>> at 5:37 PM, Nathan Chancellor  wrote:
>>
>> Commit 77b0bf55bc67 ("kbuild/Makefile: Prepare for using macros in
>> inline assembly code to work around asm() related GCC inlining bugs")
>> added this flag to KBUILD_CFLAGS, where it works perfectly fine with
>> GCC. However, when building with Clang, all of the object files compile
>> fine but the build hangs indefinitely at init/main.o, right before the
>> linking stage. Don't include this flag when building with Clang.
>>
>> The kernel builds and boots to a shell in QEMU with both GCC and Clang
>> with this patch applied.
>>
>> Link: 
>> https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FClangBuiltLinux%2Flinux%2Fissues%2F213data=02%7C01%7Cnamit%40vmware.com%7C871daebc2ca44947d28d08d638811fb5%7Cb39138ca3cee4b4aa4d6cd83d9dd62f0%7C1%7C0%7C636758524579997650sdata=shuxW81QRrO3TSqbgf462wgZYdLeAKeQEdGRxmnUX30%3Dreserved=0
>> Signed-off-by: Nathan Chancellor 
>> ---
>>
>> The reason this patch is labeled RFC is while I can verify that this
>> fixes the issue, I'm not entirely sure why the '-Wa,-' works for GCC
>> and not Clang. I looked into what the flag means and I couldn't really
>> find anything so I just assume it's taking input from stdin? The issue
>> could stem from how GCC forks gas versus how Clang does it. If this
>> isn't of concern and the maintainers are happy with this patch as is,
>> feel free to take it.
>>

Perhaps someone could actually, you know, time the build and see how
much -pipe actually matters, if at all?

-hpa



Re: [PATCH] kvm/x86 : avoid shifting signed 32-bit value by 31 bits

2018-10-15 Thread H. Peter Anvin
On 10/15/18 10:23 AM, Paolo Bonzini wrote:
> 
> Even for a value from a 32-bit register?  That would be _BIT, which
> doesn't exist.
> 

Just use _BITUL(). gcc is smart enough to know that that the resulting value
is representable in 32 bits.

Or if you really care, submit a patch to create _BITU(), but I don't
personally see much of a point.

-hpa




Re: [PATCH] kvm/x86 : avoid shifting signed 32-bit value by 31 bits

2018-10-15 Thread H. Peter Anvin
On 10/15/18 10:23 AM, Paolo Bonzini wrote:
> 
> Even for a value from a 32-bit register?  That would be _BIT, which
> doesn't exist.
> 

Just use _BITUL(). gcc is smart enough to know that that the resulting value
is representable in 32 bits.

Or if you really care, submit a patch to create _BITU(), but I don't
personally see much of a point.

-hpa




Re: [PATCH] kvm/x86 : avoid shifting signed 32-bit value by 31 bits

2018-10-15 Thread H. Peter Anvin
On 10/7/18 6:04 PM, peng.h...@zte.com.cn wrote:
\>
> #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK(0xFF)
> -#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK(1 << 31)
> +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK(1UL << 31)
>>>
 It is reasonable to change to unsigned, while not necessary to unsigned
 long?
>>> AVIC_LOGICAL_ID_ENTRY_VALID_MASK is used in function avic_ldr_write.
>>> here I think it doesn't matter if you use unsigned or unsigned long. Do you 
>>> have any suggestions?
> 
>> In current case, AVIC_LOGICAL_ID_ENTRY_VALID_MASK is used to calculate
>> the value of new_entry with type of u32. So the definition here is not
>> harmful.
> 
>> Also, I did a quick grep and found similar definition (1 << 31) is popular
>> in the whole kernel tree.
> 
>> The reason to make this change is not that strong to me. Would you
>> minding sharing more reason behind this change?
> oh, I'm just thinking logically, not more reason.

The right way to do this would be to use the _BITUL() (or _BITULL()) macro.

-hpa



Re: [PATCH] kvm/x86 : avoid shifting signed 32-bit value by 31 bits

2018-10-15 Thread H. Peter Anvin
On 10/7/18 6:04 PM, peng.h...@zte.com.cn wrote:
\>
> #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK(0xFF)
> -#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK(1 << 31)
> +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK(1UL << 31)
>>>
 It is reasonable to change to unsigned, while not necessary to unsigned
 long?
>>> AVIC_LOGICAL_ID_ENTRY_VALID_MASK is used in function avic_ldr_write.
>>> here I think it doesn't matter if you use unsigned or unsigned long. Do you 
>>> have any suggestions?
> 
>> In current case, AVIC_LOGICAL_ID_ENTRY_VALID_MASK is used to calculate
>> the value of new_entry with type of u32. So the definition here is not
>> harmful.
> 
>> Also, I did a quick grep and found similar definition (1 << 31) is popular
>> in the whole kernel tree.
> 
>> The reason to make this change is not that strong to me. Would you
>> minding sharing more reason behind this change?
> oh, I'm just thinking logically, not more reason.

The right way to do this would be to use the _BITUL() (or _BITULL()) macro.

-hpa



Re: Insanely high baud rates

2018-10-11 Thread H. Peter Anvin
On 10/11/18 2:40 PM, Theodore Y. Ts'o wrote:
> On Thu, Oct 11, 2018 at 07:14:30AM -0700, h...@zytor.com wrote:
>>>
>>> I mean - what is the baud rate of a pty  ?
>>
>> Whatever the master wants it to be...
> 
> I think Alan's point is that it is highly unlikely you would be able
> to push the equivalent of 4 gbps through a PTY layer.  The TTY later
> was never engineered for those speeds, and the real question is ---
> what's the point?  That's what Ethernet is for.
> 

I like to consider the far future; I think things like the Y2038 problem is a
good example. Or, as I like to put it, "numbers are cheap, running out of
numbers is expensive." Sounds like we have enough of a plan that we aren't
completely stuck should it ever become an issue.  It might never, of course.

-hpa


Re: Insanely high baud rates

2018-10-11 Thread H. Peter Anvin
On 10/11/18 2:40 PM, Theodore Y. Ts'o wrote:
> On Thu, Oct 11, 2018 at 07:14:30AM -0700, h...@zytor.com wrote:
>>>
>>> I mean - what is the baud rate of a pty  ?
>>
>> Whatever the master wants it to be...
> 
> I think Alan's point is that it is highly unlikely you would be able
> to push the equivalent of 4 gbps through a PTY layer.  The TTY later
> was never engineered for those speeds, and the real question is ---
> what's the point?  That's what Ethernet is for.
> 

I like to consider the far future; I think things like the Y2038 problem is a
good example. Or, as I like to put it, "numbers are cheap, running out of
numbers is expensive." Sounds like we have enough of a plan that we aren't
completely stuck should it ever become an issue.  It might never, of course.

-hpa


Re: Insanely high baud rates

2018-10-11 Thread H. Peter Anvin
On 10/11/18 12:36 PM, Craig Milo Rogers wrote:
> On 18.10.11, Alan Cox wrote:
>> I mean - what is the baud rate of a pty  ?
> 
>   Solaris made the distinction between B0, which means pty hangup mode,
> and any other baud rate:
> 
> https://docs.oracle.com/cd/E88353_01/html/E37851/pty-4d.html
> 
>   But... why not implement a pty bandwidth limitation layer?  You say, I
> need to justify it?  It's for, uh... protecting the system from unrestricted
> pty usage DOS attacks!  Yeah.  That's what it's for.

B0 is hangup, that's not in question.

-hpa



Re: Insanely high baud rates

2018-10-11 Thread H. Peter Anvin
On 10/11/18 12:36 PM, Craig Milo Rogers wrote:
> On 18.10.11, Alan Cox wrote:
>> I mean - what is the baud rate of a pty  ?
> 
>   Solaris made the distinction between B0, which means pty hangup mode,
> and any other baud rate:
> 
> https://docs.oracle.com/cd/E88353_01/html/E37851/pty-4d.html
> 
>   But... why not implement a pty bandwidth limitation layer?  You say, I
> need to justify it?  It's for, uh... protecting the system from unrestricted
> pty usage DOS attacks!  Yeah.  That's what it's for.

B0 is hangup, that's not in question.

-hpa



Re: Insanely high baud rates

2018-10-09 Thread H. Peter Anvin
On 10/09/18 12:51, Willy Tarreau wrote:
> On Tue, Oct 09, 2018 at 12:19:04PM -0700, H. Peter Anvin wrote:
>> [Resending to a wider audience]
>>
>> In trying to get the termios2 interface actually implemented in glibc,
>> the question came up if we will ever care about baud rates in excess of
>> 4 Gbps, even in the relatively remote future.
>>
>> If this is something we care about *at all*, I would like to suggest
>> that rather than defining yet another kernel interface, we steal some
>> bits from the MSB of the speed fields, alternatively one of the c_cc
>> bytes (all  likearchitectures seem to have c_cc[18] free) or some field,
>> if we can find them, in c_cflags, to indicate an exponent.
>>
>> With 5 bits from the top of the speed fields, the current values would
>> be identical up to 248 Gbps, and values up to ~288 Pbps would be
>> encodable ±2 ppb.
>>
>> In the short term, all we would have to do in the kernel would be
>> erroring out on baud rates higher than 0x0fff (2^28-1 due to
>> implicit one aliasing rhe first bit of a 5-bit exponent - less than 2^27
>> are functionally denorms.) However, I'd like to put the glibc
>> infrastructure for this now if this is something we may ever be
>> interested in.
>>
>> Thoughts?
> 
> Just my two cents, maybe we can conclude that for now we don't care
> thus don't implement anything, but that everything you identified as
> a possible place to steal bits should be marked "reserved for future
> use, must be sent as zero". This will leave you ample room later to
> decide how to proceed (and maybe it will not be the bps that you'll
> want to change but the number of lanes, or word size, or bit encoding,
> especially at 4 Gbps).
> 

Well, it would be nice to be able to pre-enable it in glibc as much as
possible.  What I'm thinking of doing is to use a 64-bit "baud_t" type
in glibc, and reserve the upper 4 bits of the speed field as must be
zero (which is de facto the case anyway.) In other to avoid a *huge*
user space ABI versioning mess we need to be able to encode the baud
rate inside a 32-bit speed_t in glibc, and given that I believe it would
be a Very Nice Thing if we could squeeze the information into 32 bits on
the kernel side as well.

So reserving the upper 4 bits I think is The Right Thing. I think that
is actually a null change.

I'm not sure if it would be a good idea to make the kernel -EINVAL on
currently-unused c_cc bytes or c_*flags; I can see pros and cons (the
latter being in no small part that that is not legacy behavior.)

-hpa


Re: Insanely high baud rates

2018-10-09 Thread H. Peter Anvin
On 10/09/18 12:51, Willy Tarreau wrote:
> On Tue, Oct 09, 2018 at 12:19:04PM -0700, H. Peter Anvin wrote:
>> [Resending to a wider audience]
>>
>> In trying to get the termios2 interface actually implemented in glibc,
>> the question came up if we will ever care about baud rates in excess of
>> 4 Gbps, even in the relatively remote future.
>>
>> If this is something we care about *at all*, I would like to suggest
>> that rather than defining yet another kernel interface, we steal some
>> bits from the MSB of the speed fields, alternatively one of the c_cc
>> bytes (all  likearchitectures seem to have c_cc[18] free) or some field,
>> if we can find them, in c_cflags, to indicate an exponent.
>>
>> With 5 bits from the top of the speed fields, the current values would
>> be identical up to 248 Gbps, and values up to ~288 Pbps would be
>> encodable ±2 ppb.
>>
>> In the short term, all we would have to do in the kernel would be
>> erroring out on baud rates higher than 0x0fff (2^28-1 due to
>> implicit one aliasing rhe first bit of a 5-bit exponent - less than 2^27
>> are functionally denorms.) However, I'd like to put the glibc
>> infrastructure for this now if this is something we may ever be
>> interested in.
>>
>> Thoughts?
> 
> Just my two cents, maybe we can conclude that for now we don't care
> thus don't implement anything, but that everything you identified as
> a possible place to steal bits should be marked "reserved for future
> use, must be sent as zero". This will leave you ample room later to
> decide how to proceed (and maybe it will not be the bps that you'll
> want to change but the number of lanes, or word size, or bit encoding,
> especially at 4 Gbps).
> 

Well, it would be nice to be able to pre-enable it in glibc as much as
possible.  What I'm thinking of doing is to use a 64-bit "baud_t" type
in glibc, and reserve the upper 4 bits of the speed field as must be
zero (which is de facto the case anyway.) In other to avoid a *huge*
user space ABI versioning mess we need to be able to encode the baud
rate inside a 32-bit speed_t in glibc, and given that I believe it would
be a Very Nice Thing if we could squeeze the information into 32 bits on
the kernel side as well.

So reserving the upper 4 bits I think is The Right Thing. I think that
is actually a null change.

I'm not sure if it would be a good idea to make the kernel -EINVAL on
currently-unused c_cc bytes or c_*flags; I can see pros and cons (the
latter being in no small part that that is not legacy behavior.)

-hpa


Insanely high baud rates

2018-10-09 Thread H. Peter Anvin
[Resending to a wider audience]

In trying to get the termios2 interface actually implemented in glibc,
the question came up if we will ever care about baud rates in excess of
4 Gbps, even in the relatively remote future.

If this is something we care about *at all*, I would like to suggest
that rather than defining yet another kernel interface, we steal some
bits from the MSB of the speed fields, alternatively one of the c_cc
bytes (all  likearchitectures seem to have c_cc[18] free) or some field,
if we can find them, in c_cflags, to indicate an exponent.

With 5 bits from the top of the speed fields, the current values would
be identical up to 248 Gbps, and values up to ~288 Pbps would be
encodable ±2 ppb.

In the short term, all we would have to do in the kernel would be
erroring out on baud rates higher than 0x0fff (2^28-1 due to
implicit one aliasing rhe first bit of a 5-bit exponent – less than 2^27
are functionally denorms.) However, I'd like to put the glibc
infrastructure for this now if this is something we may ever be
interested in.

Thoughts?
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.


Insanely high baud rates

2018-10-09 Thread H. Peter Anvin
[Resending to a wider audience]

In trying to get the termios2 interface actually implemented in glibc,
the question came up if we will ever care about baud rates in excess of
4 Gbps, even in the relatively remote future.

If this is something we care about *at all*, I would like to suggest
that rather than defining yet another kernel interface, we steal some
bits from the MSB of the speed fields, alternatively one of the c_cc
bytes (all  likearchitectures seem to have c_cc[18] free) or some field,
if we can find them, in c_cflags, to indicate an exponent.

With 5 bits from the top of the speed fields, the current values would
be identical up to 248 Gbps, and values up to ~288 Pbps would be
encodable ±2 ppb.

In the short term, all we would have to do in the kernel would be
erroring out on baud rates higher than 0x0fff (2^28-1 due to
implicit one aliasing rhe first bit of a 5-bit exponent – less than 2^27
are functionally denorms.) However, I'd like to put the glibc
infrastructure for this now if this is something we may ever be
interested in.

Thoughts?
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.


Re: [PATCH stable v2 1/2] arch/alpha, termios: implement BOTHER, IBSHIFT and termios2

2018-10-08 Thread H. Peter Anvin
On 10/8/18 8:38 AM, Johan Hovold wrote:
> On Sun, Oct 07, 2018 at 09:06:19PM -0700, H. Peter Anvin wrote:
>> From: "H. Peter Anvin (Intel)" 
>>
>> Alpha has had c_ispeed and c_ospeed, but still set speeds in c_cflags
>> using arbitrary flags. Because BOTHER is not defined, the general
>> Linux code doesn't allow setting arbitrary baud rates, and because
>> CBAUDEX == 0, we can have an array overrun of the baud_rate[] table in
>> drivers/tty/tty_baudrate.c if (c_cflags & CBAUD) == 037.
>>
>> Resolve both problems by #defining BOTHER to 037 on Alpha.
>>
>> However, userspace still needs to know if setting BOTHER is actually
>> safe given legacy kernels (does anyone actually care about that on
>> Alpha anymore?), so enable the TCGETS2/TCSETS*2 ioctls on Alpha, even
>> though they use the same structure. Define struct termios2 just for
>> compatibility; it is the exact same structure as struct termios. In a
>> future patchset, this will be cleaned up so the uapi headers are
>> usable from libc.
> 
> Is this really needed? By defining BOTHER (and IBSHIFT which you forgot
> to mention here) you are enabling arbitrary rates also through TCSETS on
> alpha, right?
> 

Yes, it's needed, not because the old ioctls won't work on NEW kernels, but
because Alpha is so far behind the times, *and* the OLD kernels are severely
broken if we pass BOTHER to them, we need a new ioctl number so we can
guarantee that we won't do anything that user space doesn't intend; this is
actually made far worse because if I read the code correctly, the kernel will
still report back BOTHER and the speed field set on a legacy kernel in
response to TCGETS, but the values will be completely bogus.

This means that glibc will need a workaround for Alpha only, and the new ioctl
numbers handles support for it.  gcc should be able to fold the code together,
since it should be able to detect that multiple branches of execution are
otherwise identical.

To micro-optimize, we could define TERMIOS_OLD as (CBAUDEX ? 8 : 0) in a
future (non-stable) patch.

We don't need to worry about it on PowerPC because PowerPC implemented this so
long ago, before the current glibc support threshold.

-hpa



Re: [PATCH stable v2 1/2] arch/alpha, termios: implement BOTHER, IBSHIFT and termios2

2018-10-08 Thread H. Peter Anvin
On 10/8/18 8:38 AM, Johan Hovold wrote:
> On Sun, Oct 07, 2018 at 09:06:19PM -0700, H. Peter Anvin wrote:
>> From: "H. Peter Anvin (Intel)" 
>>
>> Alpha has had c_ispeed and c_ospeed, but still set speeds in c_cflags
>> using arbitrary flags. Because BOTHER is not defined, the general
>> Linux code doesn't allow setting arbitrary baud rates, and because
>> CBAUDEX == 0, we can have an array overrun of the baud_rate[] table in
>> drivers/tty/tty_baudrate.c if (c_cflags & CBAUD) == 037.
>>
>> Resolve both problems by #defining BOTHER to 037 on Alpha.
>>
>> However, userspace still needs to know if setting BOTHER is actually
>> safe given legacy kernels (does anyone actually care about that on
>> Alpha anymore?), so enable the TCGETS2/TCSETS*2 ioctls on Alpha, even
>> though they use the same structure. Define struct termios2 just for
>> compatibility; it is the exact same structure as struct termios. In a
>> future patchset, this will be cleaned up so the uapi headers are
>> usable from libc.
> 
> Is this really needed? By defining BOTHER (and IBSHIFT which you forgot
> to mention here) you are enabling arbitrary rates also through TCSETS on
> alpha, right?
> 

Yes, it's needed, not because the old ioctls won't work on NEW kernels, but
because Alpha is so far behind the times, *and* the OLD kernels are severely
broken if we pass BOTHER to them, we need a new ioctl number so we can
guarantee that we won't do anything that user space doesn't intend; this is
actually made far worse because if I read the code correctly, the kernel will
still report back BOTHER and the speed field set on a legacy kernel in
response to TCGETS, but the values will be completely bogus.

This means that glibc will need a workaround for Alpha only, and the new ioctl
numbers handles support for it.  gcc should be able to fold the code together,
since it should be able to detect that multiple branches of execution are
otherwise identical.

To micro-optimize, we could define TERMIOS_OLD as (CBAUDEX ? 8 : 0) in a
future (non-stable) patch.

We don't need to worry about it on PowerPC because PowerPC implemented this so
long ago, before the current glibc support threshold.

-hpa



Re: [PATCH v9 04/10] x86: refcount: prevent gcc distortions

2018-10-04 Thread H. Peter Anvin
On 10/04/18 13:29, Andy Lutomirski wrote:
>>
>> Wonderful.
>>
>> Here is the horrible code I mentioned yesterday.  This is about
>> implementing the immediate-patching framework that Linus and others have
>> discussed (it helps both performance and kernel hardening):
>>
> 
> I'm wondering if a production version should look more like:
> 
> patch_point:
> mov $whatever, [target]
> .pushsection ".immediate_patches"
> .quad .Lpatch_point
> .popsection
> 
> and let objtool parse the resulting assembled output and emit an entry
> in some table mapping patch_point to the actual address and size of
> the immediate to be patched.
> 

Putting the generation of the ersatz code in objtool is an interesting
idea, although I'm wondering if these macros, as awful as they are,
wouldn't have generic applicability (what they do is they allow the cold
branch of an asm to push temporaries to the stack rather than having to
have gcc clobber them.)

I'll think about it.

-hpa


Re: [PATCH v9 04/10] x86: refcount: prevent gcc distortions

2018-10-04 Thread H. Peter Anvin
On 10/04/18 13:29, Andy Lutomirski wrote:
>>
>> Wonderful.
>>
>> Here is the horrible code I mentioned yesterday.  This is about
>> implementing the immediate-patching framework that Linus and others have
>> discussed (it helps both performance and kernel hardening):
>>
> 
> I'm wondering if a production version should look more like:
> 
> patch_point:
> mov $whatever, [target]
> .pushsection ".immediate_patches"
> .quad .Lpatch_point
> .popsection
> 
> and let objtool parse the resulting assembled output and emit an entry
> in some table mapping patch_point to the actual address and size of
> the immediate to be patched.
> 

Putting the generation of the ersatz code in objtool is an interesting
idea, although I'm wondering if these macros, as awful as they are,
wouldn't have generic applicability (what they do is they allow the cold
branch of an asm to push temporaries to the stack rather than having to
have gcc clobber them.)

I'll think about it.

-hpa


Re: [PATCH 5/5] arch/xtensa, termios: use

2018-10-04 Thread H. Peter Anvin
On 10/04/18 15:35, Max Filippov wrote:
> 
> Looks good. But why not removing the header entirely and adding
> generic-y += termbits.h
> to arch/xtensa/include/uapi/asm/Kbuild?
> 

Good idea.  Should do that for others that also have the same #include.

-hpa


Re: [PATCH 5/5] arch/xtensa, termios: use

2018-10-04 Thread H. Peter Anvin
On 10/04/18 15:35, Max Filippov wrote:
> 
> Looks good. But why not removing the header entirely and adding
> generic-y += termbits.h
> to arch/xtensa/include/uapi/asm/Kbuild?
> 

Good idea.  Should do that for others that also have the same #include.

-hpa


[PATCH 3/5] arch/mips, termios: use

2018-10-04 Thread H. Peter Anvin (Intel)
The MIPS definition of termbits.h is almost identical to the generic
one, so use the generic one and only redefine a handful of constants.

Move TIOCSER_TEMT to ioctls.h where it lives for all other
architectures.

Signed-off-by: H. Peter Anvin (Intel) 
Cc: Ralf Baechle 
Cc: Paul Burton 
Cc: James Hogan 
Cc: Philippe Ombredanne 
Cc: Greg Kroah-Hartman 
Cc: Kate Stewart 
Cc: Thomas Gleixner 
Cc: 
Cc: Greg Kroah-Hartman 
Cc: Jiri Slaby 
---
 arch/mips/include/uapi/asm/ioctls.h   |   2 +
 arch/mips/include/uapi/asm/termbits.h | 213 +++---
 2 files changed, 15 insertions(+), 200 deletions(-)

diff --git a/arch/mips/include/uapi/asm/ioctls.h 
b/arch/mips/include/uapi/asm/ioctls.h
index 890245a9f0c4..a4e11e1438b5 100644
--- a/arch/mips/include/uapi/asm/ioctls.h
+++ b/arch/mips/include/uapi/asm/ioctls.h
@@ -114,4 +114,6 @@
 #define TIOCMIWAIT 0x5491 /* wait for a change on serial input line(s) */
 #define TIOCGICOUNT0x5492 /* read serial port inline interrupt counts */
 
+#define TIOCSER_TEMT   0x01/* Transmitter physically empty */
+
 #endif /* __ASM_IOCTLS_H */
diff --git a/arch/mips/include/uapi/asm/termbits.h 
b/arch/mips/include/uapi/asm/termbits.h
index dfeffba729b7..a08fa0a697f5 100644
--- a/arch/mips/include/uapi/asm/termbits.h
+++ b/arch/mips/include/uapi/asm/termbits.h
@@ -11,218 +11,31 @@
 #ifndef _ASM_TERMBITS_H
 #define _ASM_TERMBITS_H
 
-#include 
+#define NCCS 23
+#include 
 
-typedef unsigned char cc_t;
-typedef unsigned int speed_t;
-typedef unsigned int tcflag_t;
-
-/*
- * The ABI says nothing about NCC but seems to use NCCS as
- * replacement for it in struct termio
- */
-#define NCCS   23
-struct termios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-};
-
-struct termios2 {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-struct ktermios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-/* c_cc characters */
-#define VINTR   0  /* Interrupt character [ISIG].  */
-#define VQUIT   1  /* Quit character [ISIG].  */
-#define VERASE  2  /* Erase character [ICANON].  */
-#define VKILL   3  /* Kill-line character [ICANON].  */
+/* c_cc characters that differ from the generic ABI */
+#undef VMIN
 #define VMIN4  /* Minimum number of bytes read at once 
[!ICANON].  */
-#define VTIME   5  /* Time-out value (tenths of a second) 
[!ICANON].  */
+#undef VEOL2
 #define VEOL2   6  /* Second EOL character [ICANON].  */
-#define VSWTC   7  /* ??? */
-#define VSWTCH VSWTC
-#define VSTART  8  /* Start (X-ON) character [IXON, 
IXOFF].  */
-#define VSTOP   9  /* Stop (X-OFF) character [IXON, 
IXOFF].  */
-#define VSUSP  10  /* Suspend character [ISIG].  */
-#if 0
-/*
- * VDSUSP is not supported
- */
-#define VDSUSP 11  /* Delayed suspend character [ISIG].  */
-#endif
-#define VREPRINT   12  /* Reprint-line character [ICANON].  */
-#define VDISCARD   13  /* Discard character [IEXTEN].  */
-#define VWERASE14  /* Word-erase character 
[ICANON].  */
-#define VLNEXT 15  /* Literal-next character [IEXTEN].  */
+#undef VEOF
 #define VEOF   16  /* End-of-file character [ICANON].  */
+#undef VEOL
 #define VEOL   17  /* End-of-line character [ICANON].  */
 
-/* c_iflag bits */
-#define IGNBRK 001 /* Ignore break condition.  */
-#define BRKINT 002 /* Signal interrupt on break.  */
-#define IGNPAR 004 /* Ignore characters with parity errors.  */
-#define

[PATCH 3/5] arch/mips, termios: use

2018-10-04 Thread H. Peter Anvin (Intel)
The MIPS definition of termbits.h is almost identical to the generic
one, so use the generic one and only redefine a handful of constants.

Move TIOCSER_TEMT to ioctls.h where it lives for all other
architectures.

Signed-off-by: H. Peter Anvin (Intel) 
Cc: Ralf Baechle 
Cc: Paul Burton 
Cc: James Hogan 
Cc: Philippe Ombredanne 
Cc: Greg Kroah-Hartman 
Cc: Kate Stewart 
Cc: Thomas Gleixner 
Cc: 
Cc: Greg Kroah-Hartman 
Cc: Jiri Slaby 
---
 arch/mips/include/uapi/asm/ioctls.h   |   2 +
 arch/mips/include/uapi/asm/termbits.h | 213 +++---
 2 files changed, 15 insertions(+), 200 deletions(-)

diff --git a/arch/mips/include/uapi/asm/ioctls.h 
b/arch/mips/include/uapi/asm/ioctls.h
index 890245a9f0c4..a4e11e1438b5 100644
--- a/arch/mips/include/uapi/asm/ioctls.h
+++ b/arch/mips/include/uapi/asm/ioctls.h
@@ -114,4 +114,6 @@
 #define TIOCMIWAIT 0x5491 /* wait for a change on serial input line(s) */
 #define TIOCGICOUNT0x5492 /* read serial port inline interrupt counts */
 
+#define TIOCSER_TEMT   0x01/* Transmitter physically empty */
+
 #endif /* __ASM_IOCTLS_H */
diff --git a/arch/mips/include/uapi/asm/termbits.h 
b/arch/mips/include/uapi/asm/termbits.h
index dfeffba729b7..a08fa0a697f5 100644
--- a/arch/mips/include/uapi/asm/termbits.h
+++ b/arch/mips/include/uapi/asm/termbits.h
@@ -11,218 +11,31 @@
 #ifndef _ASM_TERMBITS_H
 #define _ASM_TERMBITS_H
 
-#include 
+#define NCCS 23
+#include 
 
-typedef unsigned char cc_t;
-typedef unsigned int speed_t;
-typedef unsigned int tcflag_t;
-
-/*
- * The ABI says nothing about NCC but seems to use NCCS as
- * replacement for it in struct termio
- */
-#define NCCS   23
-struct termios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-};
-
-struct termios2 {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-struct ktermios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-/* c_cc characters */
-#define VINTR   0  /* Interrupt character [ISIG].  */
-#define VQUIT   1  /* Quit character [ISIG].  */
-#define VERASE  2  /* Erase character [ICANON].  */
-#define VKILL   3  /* Kill-line character [ICANON].  */
+/* c_cc characters that differ from the generic ABI */
+#undef VMIN
 #define VMIN4  /* Minimum number of bytes read at once 
[!ICANON].  */
-#define VTIME   5  /* Time-out value (tenths of a second) 
[!ICANON].  */
+#undef VEOL2
 #define VEOL2   6  /* Second EOL character [ICANON].  */
-#define VSWTC   7  /* ??? */
-#define VSWTCH VSWTC
-#define VSTART  8  /* Start (X-ON) character [IXON, 
IXOFF].  */
-#define VSTOP   9  /* Stop (X-OFF) character [IXON, 
IXOFF].  */
-#define VSUSP  10  /* Suspend character [ISIG].  */
-#if 0
-/*
- * VDSUSP is not supported
- */
-#define VDSUSP 11  /* Delayed suspend character [ISIG].  */
-#endif
-#define VREPRINT   12  /* Reprint-line character [ICANON].  */
-#define VDISCARD   13  /* Discard character [IEXTEN].  */
-#define VWERASE14  /* Word-erase character 
[ICANON].  */
-#define VLNEXT 15  /* Literal-next character [IEXTEN].  */
+#undef VEOF
 #define VEOF   16  /* End-of-file character [ICANON].  */
+#undef VEOL
 #define VEOL   17  /* End-of-line character [ICANON].  */
 
-/* c_iflag bits */
-#define IGNBRK 001 /* Ignore break condition.  */
-#define BRKINT 002 /* Signal interrupt on break.  */
-#define IGNPAR 004 /* Ignore characters with parity errors.  */
-#define

[PATCH 5/5] arch/xtensa, termios: use

2018-10-04 Thread H. Peter Anvin (Intel)
The Xtensa definition of termbits.h is identical to the generic one.

Signed-off-by: H. Peter Anvin (Intel) 
Cc: Chris Zankel 
Cc: Max Filippov 
Cc: Thomas Gleixner 
Cc: Greg Kroah-Hartman 
Cc: Philippe Ombredanne 
Cc: Kate Stewart 
Cc: 
Cc: Greg Kroah-Hartman 
Cc: Jiri Slaby 
---
 arch/xtensa/include/uapi/asm/termbits.h | 222 +---
 1 file changed, 1 insertion(+), 221 deletions(-)

diff --git a/arch/xtensa/include/uapi/asm/termbits.h 
b/arch/xtensa/include/uapi/asm/termbits.h
index d4206a7c5138..3935b106de79 100644
--- a/arch/xtensa/include/uapi/asm/termbits.h
+++ b/arch/xtensa/include/uapi/asm/termbits.h
@@ -1,221 +1 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * include/asm-xtensa/termbits.h
- *
- * Copied from SH.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_TERMBITS_H
-#define _XTENSA_TERMBITS_H
-
-
-#include 
-
-typedef unsigned char  cc_t;
-typedef unsigned int   speed_t;
-typedef unsigned int   tcflag_t;
-
-#define NCCS 19
-struct termios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-};
-
-struct termios2 {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-struct ktermios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-/* c_cc characters */
-
-#define VINTR 0
-#define VQUIT 1
-#define VERASE 2
-#define VKILL 3
-#define VEOF 4
-#define VTIME 5
-#define VMIN 6
-#define VSWTC 7
-#define VSTART 8
-#define VSTOP 9
-#define VSUSP 10
-#define VEOL 11
-#define VREPRINT 12
-#define VDISCARD 13
-#define VWERASE 14
-#define VLNEXT 15
-#define VEOL2 16
-
-/* c_iflag bits */
-
-#define IGNBRK 001
-#define BRKINT 002
-#define IGNPAR 004
-#define PARMRK 010
-#define INPCK  020
-#define ISTRIP 040
-#define INLCR  100
-#define IGNCR  200
-#define ICRNL  400
-#define IUCLC  0001000
-#define IXON   0002000
-#define IXANY  0004000
-#define IXOFF  001
-#define IMAXBEL002
-#define IUTF8  004
-
-/* c_oflag bits */
-
-#define OPOST  001
-#define OLCUC  002
-#define ONLCR  004
-#define OCRNL  010
-#define ONOCR  020
-#define ONLRET 040
-#define OFILL  100
-#define OFDEL  200
-#define NLDLY  400
-#define   NL0  000
-#define   NL1  400
-#define CRDLY  0003000
-#define   CR0  000
-#define   CR1  0001000
-#define   CR2  0002000
-#define   CR3  0003000
-#define TABDLY 0014000
-#define   TAB0 000
-#define   TAB1 0004000
-#define   TAB2 001
-#define   TAB3 0014000
-#define   XTABS0014000
-#define BSDLY  002
-#define   BS0  000
-#define   BS1  002
-#define VTDLY  004
-#define   VT0  000
-#define   VT1  004
-#define FFDLY  010
-#define   FF0  000
-#define   FF1  010
-
-/* c_cflag bit meaning */
-
-#define CBAUD  0010017
-#define  B0000 /* hang up */
-#define  B50   001
-#define  B75   002
-#define  B110  003
-#define  B134  004
-#define  B150  005
-#define  B200  006
-#define  B300  007
-#define  B600  010
-#define  B1200 011
-#define  B1800 012
-#define  B2400 013
-#define  B4800 014
-#define  B9600 015
-#define  B19200016
-#define  B38400017
-#define EXTA B19200
-#define EXTB B38400
-#define CSIZE  060
-#define   CS5  000
-#define   CS6  020
-#define   CS7  040
-#define   CS8  060
-#define CSTOPB 100
-#define CREAD  200
-#define PARENB 400
-#define PARODD 0001000
-#define HUPCL  0002000
-#define CLOCAL 0004000
-#define CBAUDEX 001
-#define   

[PATCH 0/5] termios: remove arch redundancy in

2018-10-04 Thread H. Peter Anvin (Intel)
 is one of those files which define an ABI. Some were
made different due to the desire to be compatible with legacy
architectures, others were mostly direct copies of the i386
definitions, which are now in asm-generic.

This folds the IA64, MIPS, PA-RISC, and Xtensa implementations into
the generic one.  IA64 and Xtensa are identical, MIPS and PA-RISC are
trivially different and just need a handful of constants redefined.

 has a few very minor adjustments to allow this.

 arch/ia64/include/uapi/asm/termbits.h   | 210 +-
 arch/mips/include/uapi/asm/ioctls.h |   2 +
 arch/mips/include/uapi/asm/termbits.h   | 213 ++
 arch/parisc/include/uapi/asm/termbits.h | 197 +---
 arch/xtensa/include/uapi/asm/termbits.h | 222 +---
 include/uapi/asm-generic/termbits.h |   7 +-
 6 files changed, 27 insertions(+), 824 deletions(-)

Signed-off-by: H. Peter Anvin (Intel) 
Cc: "James E.J. Bottomley" 
Cc: Arnd Bergmann 
Cc: Chris Zankel 
Cc: Fenghua Yu 
Cc: Greg Kroah-Hartman 
Cc: Helge Deller 
Cc: James Hogan 
Cc: Jiri Slaby 
Cc: Kate Stewart 
Cc: Max Filippov 
Cc: Paul Burton 
Cc: Philippe Ombredanne 
Cc: Ralf Baechle 
Cc: Thomas Gleixner 
Cc: Tony Luck 
Cc: 
Cc: 
Cc: 
Cc: 


[PATCH 1/5] asm-generic, termios: add alias constants from MIPS

2018-10-04 Thread H. Peter Anvin (Intel)
Some architectures, in this case MIPS, need a couple of legacy alias
constants for bits. There really is no reason why we can't define them
generically for all architectures.

Signed-off-by: H. Peter Anvin (Intel) 
Cc: Arnd Bergmann 
Cc: Greg Kroah-Hartman 
Cc: Jiri Slaby 
linux-kernel@vger.kernel.org (open list)
---
 include/uapi/asm-generic/termbits.h | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/uapi/asm-generic/termbits.h 
b/include/uapi/asm-generic/termbits.h
index 2fbaf9ae89dd..96ae175eec5b 100644
--- a/include/uapi/asm-generic/termbits.h
+++ b/include/uapi/asm-generic/termbits.h
@@ -8,7 +8,10 @@ typedef unsigned char  cc_t;
 typedef unsigned int   speed_t;
 typedef unsigned int   tcflag_t;
 
-#define NCCS 19
+#ifndef NCCS
+# define NCCS 19
+#endif
+
 struct termios {
tcflag_t c_iflag;   /* input mode flags */
tcflag_t c_oflag;   /* output mode flags */
@@ -49,6 +52,7 @@ struct ktermios {
 #define VTIME 5
 #define VMIN 6
 #define VSWTC 7
+#define VSWTCH VSWTC
 #define VSTART 8
 #define VSTOP 9
 #define VSUSP 10
@@ -173,6 +177,7 @@ struct ktermios {
 #define ECHONL 100
 #define NOFLSH 200
 #define TOSTOP 400
+#define ITOSTOP TOSTOP
 #define ECHOCTL0001000
 #define ECHOPRT0002000
 #define ECHOKE 0004000
-- 
2.14.4



[PATCH 1/5] asm-generic, termios: add alias constants from MIPS

2018-10-04 Thread H. Peter Anvin (Intel)
Some architectures, in this case MIPS, need a couple of legacy alias
constants for bits. There really is no reason why we can't define them
generically for all architectures.

Signed-off-by: H. Peter Anvin (Intel) 
Cc: Arnd Bergmann 
Cc: Greg Kroah-Hartman 
Cc: Jiri Slaby 
linux-kernel@vger.kernel.org (open list)
---
 include/uapi/asm-generic/termbits.h | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/uapi/asm-generic/termbits.h 
b/include/uapi/asm-generic/termbits.h
index 2fbaf9ae89dd..96ae175eec5b 100644
--- a/include/uapi/asm-generic/termbits.h
+++ b/include/uapi/asm-generic/termbits.h
@@ -8,7 +8,10 @@ typedef unsigned char  cc_t;
 typedef unsigned int   speed_t;
 typedef unsigned int   tcflag_t;
 
-#define NCCS 19
+#ifndef NCCS
+# define NCCS 19
+#endif
+
 struct termios {
tcflag_t c_iflag;   /* input mode flags */
tcflag_t c_oflag;   /* output mode flags */
@@ -49,6 +52,7 @@ struct ktermios {
 #define VTIME 5
 #define VMIN 6
 #define VSWTC 7
+#define VSWTCH VSWTC
 #define VSTART 8
 #define VSTOP 9
 #define VSUSP 10
@@ -173,6 +177,7 @@ struct ktermios {
 #define ECHONL 100
 #define NOFLSH 200
 #define TOSTOP 400
+#define ITOSTOP TOSTOP
 #define ECHOCTL0001000
 #define ECHOPRT0002000
 #define ECHOKE 0004000
-- 
2.14.4



[PATCH 5/5] arch/xtensa, termios: use

2018-10-04 Thread H. Peter Anvin (Intel)
The Xtensa definition of termbits.h is identical to the generic one.

Signed-off-by: H. Peter Anvin (Intel) 
Cc: Chris Zankel 
Cc: Max Filippov 
Cc: Thomas Gleixner 
Cc: Greg Kroah-Hartman 
Cc: Philippe Ombredanne 
Cc: Kate Stewart 
Cc: 
Cc: Greg Kroah-Hartman 
Cc: Jiri Slaby 
---
 arch/xtensa/include/uapi/asm/termbits.h | 222 +---
 1 file changed, 1 insertion(+), 221 deletions(-)

diff --git a/arch/xtensa/include/uapi/asm/termbits.h 
b/arch/xtensa/include/uapi/asm/termbits.h
index d4206a7c5138..3935b106de79 100644
--- a/arch/xtensa/include/uapi/asm/termbits.h
+++ b/arch/xtensa/include/uapi/asm/termbits.h
@@ -1,221 +1 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * include/asm-xtensa/termbits.h
- *
- * Copied from SH.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_TERMBITS_H
-#define _XTENSA_TERMBITS_H
-
-
-#include 
-
-typedef unsigned char  cc_t;
-typedef unsigned int   speed_t;
-typedef unsigned int   tcflag_t;
-
-#define NCCS 19
-struct termios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-};
-
-struct termios2 {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-struct ktermios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-/* c_cc characters */
-
-#define VINTR 0
-#define VQUIT 1
-#define VERASE 2
-#define VKILL 3
-#define VEOF 4
-#define VTIME 5
-#define VMIN 6
-#define VSWTC 7
-#define VSTART 8
-#define VSTOP 9
-#define VSUSP 10
-#define VEOL 11
-#define VREPRINT 12
-#define VDISCARD 13
-#define VWERASE 14
-#define VLNEXT 15
-#define VEOL2 16
-
-/* c_iflag bits */
-
-#define IGNBRK 001
-#define BRKINT 002
-#define IGNPAR 004
-#define PARMRK 010
-#define INPCK  020
-#define ISTRIP 040
-#define INLCR  100
-#define IGNCR  200
-#define ICRNL  400
-#define IUCLC  0001000
-#define IXON   0002000
-#define IXANY  0004000
-#define IXOFF  001
-#define IMAXBEL002
-#define IUTF8  004
-
-/* c_oflag bits */
-
-#define OPOST  001
-#define OLCUC  002
-#define ONLCR  004
-#define OCRNL  010
-#define ONOCR  020
-#define ONLRET 040
-#define OFILL  100
-#define OFDEL  200
-#define NLDLY  400
-#define   NL0  000
-#define   NL1  400
-#define CRDLY  0003000
-#define   CR0  000
-#define   CR1  0001000
-#define   CR2  0002000
-#define   CR3  0003000
-#define TABDLY 0014000
-#define   TAB0 000
-#define   TAB1 0004000
-#define   TAB2 001
-#define   TAB3 0014000
-#define   XTABS0014000
-#define BSDLY  002
-#define   BS0  000
-#define   BS1  002
-#define VTDLY  004
-#define   VT0  000
-#define   VT1  004
-#define FFDLY  010
-#define   FF0  000
-#define   FF1  010
-
-/* c_cflag bit meaning */
-
-#define CBAUD  0010017
-#define  B0000 /* hang up */
-#define  B50   001
-#define  B75   002
-#define  B110  003
-#define  B134  004
-#define  B150  005
-#define  B200  006
-#define  B300  007
-#define  B600  010
-#define  B1200 011
-#define  B1800 012
-#define  B2400 013
-#define  B4800 014
-#define  B9600 015
-#define  B19200016
-#define  B38400017
-#define EXTA B19200
-#define EXTB B38400
-#define CSIZE  060
-#define   CS5  000
-#define   CS6  020
-#define   CS7  040
-#define   CS8  060
-#define CSTOPB 100
-#define CREAD  200
-#define PARENB 400
-#define PARODD 0001000
-#define HUPCL  0002000
-#define CLOCAL 0004000
-#define CBAUDEX 001
-#define   

[PATCH 0/5] termios: remove arch redundancy in

2018-10-04 Thread H. Peter Anvin (Intel)
 is one of those files which define an ABI. Some were
made different due to the desire to be compatible with legacy
architectures, others were mostly direct copies of the i386
definitions, which are now in asm-generic.

This folds the IA64, MIPS, PA-RISC, and Xtensa implementations into
the generic one.  IA64 and Xtensa are identical, MIPS and PA-RISC are
trivially different and just need a handful of constants redefined.

 has a few very minor adjustments to allow this.

 arch/ia64/include/uapi/asm/termbits.h   | 210 +-
 arch/mips/include/uapi/asm/ioctls.h |   2 +
 arch/mips/include/uapi/asm/termbits.h   | 213 ++
 arch/parisc/include/uapi/asm/termbits.h | 197 +---
 arch/xtensa/include/uapi/asm/termbits.h | 222 +---
 include/uapi/asm-generic/termbits.h |   7 +-
 6 files changed, 27 insertions(+), 824 deletions(-)

Signed-off-by: H. Peter Anvin (Intel) 
Cc: "James E.J. Bottomley" 
Cc: Arnd Bergmann 
Cc: Chris Zankel 
Cc: Fenghua Yu 
Cc: Greg Kroah-Hartman 
Cc: Helge Deller 
Cc: James Hogan 
Cc: Jiri Slaby 
Cc: Kate Stewart 
Cc: Max Filippov 
Cc: Paul Burton 
Cc: Philippe Ombredanne 
Cc: Ralf Baechle 
Cc: Thomas Gleixner 
Cc: Tony Luck 
Cc: 
Cc: 
Cc: 
Cc: 


[PATCH 2/5] arch/ia64, termios: use

2018-10-04 Thread H. Peter Anvin (Intel)
The IA64 definition of termbits.h is identical to the generic.

Signed-off-by: H. Peter Anvin (Intel) 
Cc: Tony Luck 
Cc: Fenghua Yu 
Cc: Kate Stewart 
Cc: Philippe Ombredanne 
Cc: Greg Kroah-Hartman 
Cc: Thomas Gleixner 
Cc: 
Cc: Greg Kroah-Hartman 
CC: Jiri Slaby 
---
 arch/ia64/include/uapi/asm/termbits.h | 210 +-
 1 file changed, 1 insertion(+), 209 deletions(-)

diff --git a/arch/ia64/include/uapi/asm/termbits.h 
b/arch/ia64/include/uapi/asm/termbits.h
index 000a1a297c75..3935b106de79 100644
--- a/arch/ia64/include/uapi/asm/termbits.h
+++ b/arch/ia64/include/uapi/asm/termbits.h
@@ -1,209 +1 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_IA64_TERMBITS_H
-#define _ASM_IA64_TERMBITS_H
-
-/*
- * Based on .
- *
- * Modified 1999
- * David Mosberger-Tang , Hewlett-Packard Co
- *
- * 99/01/28Added new baudrates
- */
-
-#include 
-
-typedef unsigned char  cc_t;
-typedef unsigned int   speed_t;
-typedef unsigned int   tcflag_t;
-
-#define NCCS 19
-struct termios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-};
-
-struct termios2 {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-struct ktermios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-/* c_cc characters */
-#define VINTR 0
-#define VQUIT 1
-#define VERASE 2
-#define VKILL 3
-#define VEOF 4
-#define VTIME 5
-#define VMIN 6
-#define VSWTC 7
-#define VSTART 8
-#define VSTOP 9
-#define VSUSP 10
-#define VEOL 11
-#define VREPRINT 12
-#define VDISCARD 13
-#define VWERASE 14
-#define VLNEXT 15
-#define VEOL2 16
-
-/* c_iflag bits */
-#define IGNBRK 001
-#define BRKINT 002
-#define IGNPAR 004
-#define PARMRK 010
-#define INPCK  020
-#define ISTRIP 040
-#define INLCR  100
-#define IGNCR  200
-#define ICRNL  400
-#define IUCLC  0001000
-#define IXON   0002000
-#define IXANY  0004000
-#define IXOFF  001
-#define IMAXBEL002
-#define IUTF8  004
-
-/* c_oflag bits */
-#define OPOST  001
-#define OLCUC  002
-#define ONLCR  004
-#define OCRNL  010
-#define ONOCR  020
-#define ONLRET 040
-#define OFILL  100
-#define OFDEL  200
-#define NLDLY  400
-#define   NL0  000
-#define   NL1  400
-#define CRDLY  0003000
-#define   CR0  000
-#define   CR1  0001000
-#define   CR2  0002000
-#define   CR3  0003000
-#define TABDLY 0014000
-#define   TAB0 000
-#define   TAB1 0004000
-#define   TAB2 001
-#define   TAB3 0014000
-#define   XTABS0014000
-#define BSDLY  002
-#define   BS0  000
-#define   BS1  002
-#define VTDLY  004
-#define   VT0  000
-#define   VT1  004
-#define FFDLY  010
-#define   FF0  000
-#define   FF1  010
-
-/* c_cflag bit meaning */
-#define CBAUD  0010017
-#define  B0000 /* hang up */
-#define  B50   001
-#define  B75   002
-#define  B110  003
-#define  B134  004
-#define  B150  005
-#define  B200  006
-#define  B300  007
-#define  B600  010
-#define  B1200 011
-#define  B1800 012
-#define  B2400 013
-#define  B4800 014
-#define  B9600 015
-#define  B19200016
-#define  B38400017
-#define EXTA B19200
-#define EXTB B38400
-#define CSIZE  060
-#define   CS5  000
-#define   CS6  020
-#define   CS7  040
-#define   CS8  060
-#define CSTOPB 100
-#define CREAD  200
-#define PARENB 400
-#define PARODD 0001000
-#define HUPCL  0002000
-#define CLOCAL 0004000
-#define CBAUDEX 001
-#defineBOTHER 001
-#defineB57600 0010001
-#define   B115200 0010002
-#define   B230400 0010003
-#define   B460800 0010004
-#define   B50 0010005
-#define   B576000 0010006
-#define   B921600 0010007

[PATCH 2/5] arch/ia64, termios: use

2018-10-04 Thread H. Peter Anvin (Intel)
The IA64 definition of termbits.h is identical to the generic.

Signed-off-by: H. Peter Anvin (Intel) 
Cc: Tony Luck 
Cc: Fenghua Yu 
Cc: Kate Stewart 
Cc: Philippe Ombredanne 
Cc: Greg Kroah-Hartman 
Cc: Thomas Gleixner 
Cc: 
Cc: Greg Kroah-Hartman 
CC: Jiri Slaby 
---
 arch/ia64/include/uapi/asm/termbits.h | 210 +-
 1 file changed, 1 insertion(+), 209 deletions(-)

diff --git a/arch/ia64/include/uapi/asm/termbits.h 
b/arch/ia64/include/uapi/asm/termbits.h
index 000a1a297c75..3935b106de79 100644
--- a/arch/ia64/include/uapi/asm/termbits.h
+++ b/arch/ia64/include/uapi/asm/termbits.h
@@ -1,209 +1 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_IA64_TERMBITS_H
-#define _ASM_IA64_TERMBITS_H
-
-/*
- * Based on .
- *
- * Modified 1999
- * David Mosberger-Tang , Hewlett-Packard Co
- *
- * 99/01/28Added new baudrates
- */
-
-#include 
-
-typedef unsigned char  cc_t;
-typedef unsigned int   speed_t;
-typedef unsigned int   tcflag_t;
-
-#define NCCS 19
-struct termios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-};
-
-struct termios2 {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-struct ktermios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-/* c_cc characters */
-#define VINTR 0
-#define VQUIT 1
-#define VERASE 2
-#define VKILL 3
-#define VEOF 4
-#define VTIME 5
-#define VMIN 6
-#define VSWTC 7
-#define VSTART 8
-#define VSTOP 9
-#define VSUSP 10
-#define VEOL 11
-#define VREPRINT 12
-#define VDISCARD 13
-#define VWERASE 14
-#define VLNEXT 15
-#define VEOL2 16
-
-/* c_iflag bits */
-#define IGNBRK 001
-#define BRKINT 002
-#define IGNPAR 004
-#define PARMRK 010
-#define INPCK  020
-#define ISTRIP 040
-#define INLCR  100
-#define IGNCR  200
-#define ICRNL  400
-#define IUCLC  0001000
-#define IXON   0002000
-#define IXANY  0004000
-#define IXOFF  001
-#define IMAXBEL002
-#define IUTF8  004
-
-/* c_oflag bits */
-#define OPOST  001
-#define OLCUC  002
-#define ONLCR  004
-#define OCRNL  010
-#define ONOCR  020
-#define ONLRET 040
-#define OFILL  100
-#define OFDEL  200
-#define NLDLY  400
-#define   NL0  000
-#define   NL1  400
-#define CRDLY  0003000
-#define   CR0  000
-#define   CR1  0001000
-#define   CR2  0002000
-#define   CR3  0003000
-#define TABDLY 0014000
-#define   TAB0 000
-#define   TAB1 0004000
-#define   TAB2 001
-#define   TAB3 0014000
-#define   XTABS0014000
-#define BSDLY  002
-#define   BS0  000
-#define   BS1  002
-#define VTDLY  004
-#define   VT0  000
-#define   VT1  004
-#define FFDLY  010
-#define   FF0  000
-#define   FF1  010
-
-/* c_cflag bit meaning */
-#define CBAUD  0010017
-#define  B0000 /* hang up */
-#define  B50   001
-#define  B75   002
-#define  B110  003
-#define  B134  004
-#define  B150  005
-#define  B200  006
-#define  B300  007
-#define  B600  010
-#define  B1200 011
-#define  B1800 012
-#define  B2400 013
-#define  B4800 014
-#define  B9600 015
-#define  B19200016
-#define  B38400017
-#define EXTA B19200
-#define EXTB B38400
-#define CSIZE  060
-#define   CS5  000
-#define   CS6  020
-#define   CS7  040
-#define   CS8  060
-#define CSTOPB 100
-#define CREAD  200
-#define PARENB 400
-#define PARODD 0001000
-#define HUPCL  0002000
-#define CLOCAL 0004000
-#define CBAUDEX 001
-#defineBOTHER 001
-#defineB57600 0010001
-#define   B115200 0010002
-#define   B230400 0010003
-#define   B460800 0010004
-#define   B50 0010005
-#define   B576000 0010006
-#define   B921600 0010007

[PATCH 4/5] arch/parisc, termios: use

2018-10-04 Thread H. Peter Anvin (Intel)
The PARISC  definition of termbits.h is almost identical to the generic
one, so use the generic one and only redefine a handful of constants.

Signed-off-by: H. Peter Anvin (Intel) 
Cc: "James E.J. Bottomley" 
Cc: Helge Deller 
Cc: Kate Stewart 
Cc: Thomas Gleixner 
Cc: Philippe Ombredanne 
Cc: Greg Kroah-Hartman 
Cc: 
Cc: Greg Kroah-Hartman 
Cc: Jiri Slaby 
---
 arch/parisc/include/uapi/asm/termbits.h | 197 +---
 1 file changed, 4 insertions(+), 193 deletions(-)

diff --git a/arch/parisc/include/uapi/asm/termbits.h 
b/arch/parisc/include/uapi/asm/termbits.h
index 40e920f8d683..28ce9067f2e0 100644
--- a/arch/parisc/include/uapi/asm/termbits.h
+++ b/arch/parisc/include/uapi/asm/termbits.h
@@ -2,201 +2,12 @@
 #ifndef __ARCH_PARISC_TERMBITS_H__
 #define __ARCH_PARISC_TERMBITS_H__
 
-#include 
+#include 
 
-typedef unsigned char  cc_t;
-typedef unsigned int   speed_t;
-typedef unsigned int   tcflag_t;
-
-#define NCCS 19
-struct termios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-};
-
-struct termios2 {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-struct ktermios {
-   tcflag_t c_iflag;   /* input mode flags */
-   tcflag_t c_oflag;   /* output mode flags */
-   tcflag_t c_cflag;   /* control mode flags */
-   tcflag_t c_lflag;   /* local mode flags */
-   cc_t c_line;/* line discipline */
-   cc_t c_cc[NCCS];/* control characters */
-   speed_t c_ispeed;   /* input speed */
-   speed_t c_ospeed;   /* output speed */
-};
-
-/* c_cc characters */
-#define VINTR 0
-#define VQUIT 1
-#define VERASE 2
-#define VKILL 3
-#define VEOF 4
-#define VTIME 5
-#define VMIN 6
-#define VSWTC 7
-#define VSTART 8
-#define VSTOP 9
-#define VSUSP 10
-#define VEOL 11
-#define VREPRINT 12
-#define VDISCARD 13
-#define VWERASE 14
-#define VLNEXT 15
-#define VEOL2 16
-
-
-/* c_iflag bits */
-#define IGNBRK 001
-#define BRKINT 002
-#define IGNPAR 004
-#define PARMRK 010
-#define INPCK  020
-#define ISTRIP 040
-#define INLCR  100
-#define IGNCR  200
-#define ICRNL  400
-#define IUCLC  0001000
-#define IXON   0002000
-#define IXANY  0004000
-#define IXOFF  001
+/* c_iflag bits that differ from the generic ABI */
+#undef IMAXBEL
 #define IMAXBEL004
+#undef IUTF8
 #define IUTF8  010
 
-/* c_oflag bits */
-#define OPOST  001
-#define OLCUC  002
-#define ONLCR  004
-#define OCRNL  010
-#define ONOCR  020
-#define ONLRET 040
-#define OFILL  100
-#define OFDEL  200
-#define NLDLY  400
-#define   NL0  000
-#define   NL1  400
-#define CRDLY  0003000
-#define   CR0  000
-#define   CR1  0001000
-#define   CR2  0002000
-#define   CR3  0003000
-#define TABDLY 0014000
-#define   TAB0 000
-#define   TAB1 0004000
-#define   TAB2 001
-#define   TAB3 0014000
-#define   XTABS0014000
-#define BSDLY  002
-#define   BS0  000
-#define   BS1  002
-#define VTDLY  004
-#define   VT0  000
-#define   VT1  004
-#define FFDLY  010
-#define   FF0  000
-#define   FF1  010
-
-/* c_cflag bit meaning */
-#define CBAUD   0010017
-#define  B0 000 /* hang up */
-#define  B50001
-#define  B75002
-#define  B110   003
-#define  B134   004
-#define  B150   005
-#define  B200   006
-#define  B300   007
-#define  B600   010
-#define  B1200  011
-#define  B1800  012
-#define  B2400  013
-#define  B4800  014
-#define  B9600  015
-#define  B19200 016
-#define  B38400 017
-#define EXTA B19200
-#define EXTB B38400
-#define CSIZE   060
-#define   CS5   000
-#define   CS6   020
-#define   CS7   040
-#define   CS8   060
-#define CSTOPB  100
-#define CREAD   200
-#define PARENB  400
-#define PARODD  0001000
-#define HUPCL   0002000
-#define CLOCAL  0004000
-#define CBAUDEX 001
-#defineBOTHER 001
-#defineB57600 0010001
-#define   B115200 0010002
-#define   B230400 0010003
-#define   B460800 0010004
-#define   B50 0010005
-#define   B5760

  1   2   3   4   5   6   7   8   9   10   >